If you need to keep backups of your instance in EC2, then you can set a cron job to take a snapshot every hour and then run a python script for removing the unwanted snapshots.
Here is the script for creating snapshots:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import boto
import iso8601
import sys
import datetime
from boto.ec2.connection import EC2Connection
AWS_ACCESS_KEY_ID = 'AAAAAAAAAAAAAATQ'
AWS_SECRET_ACCESS_KEY = 'MAAAAAAAAAAAAAAAAAAAAn'
AWS_ACCOUNT_ID = "322222222226"
START_TIME = str(datetime.datetime.now())
arguments_length = len(sys.argv)
if arguments_length != 3:
print "You need to supply 2 arguments at least. The first is the AWS volume_id and the second is the log path"
print "For example:"
sys.exit()
volume_id = sys.argv[1]
log_location = sys.argv[2]
conn = EC2Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
description = 'Hourly automatic backup'
rval = conn.create_snapshot(volume_id, description = description)
END_TIME = str(datetime.datetime.now())
f = open(log_location, 'a+')
s = 'start:\t' + START_TIME + '\tend:\t' + END_TIME + '\tstatus: ' + str(rval) + ' for backup of volume ' + volume_id +'\n'
f.write(s)
f.close()
Here is the script for cleaning the snapshot list and keeping only the ones that you need:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import boto
import iso8601
import sys
import datetime
from boto.ec2.connection import EC2Connection
TO_BE_KEPT_FOR_HOURLY = 'TO BE KEPT FOR HOURLY'
TO_BE_KEPT_FOR_DAILY = 'TO BE KEPT FOR DAILY'
TO_BE_KEPT_FOR_WEEKLY = 'TO BE KEPT FOR WEEKLY'
TO_BE_DELETED = 'TO BE DELETED'
DRY_RUN = 'dry'
LIVE_RUN = 'live'
AWS_ACCESS_KEY_ID = 'ABCDEFGHIJKLMONQ' # CHANGE TO YOUR OWN
AWS_SECRET_ACCESS_KEY = 'MASASDFASDFASDFSADFASDFASDASDrn' # CHANGE TO YOUR OWN
AWS_ACCOUNT_ID = "31232312316" # CHANGE TO YOUR OWN
NUM_HOURLY = 24 * 7
NUM_DAILY = 7 * 5
NUM_WEEKLY = 30
arguments_length = len(sys.argv)
if arguments_length != 4:
print "You need to supply 4 arguments at least. The first is the AWS volume_id, the second is the log path, and the 3rd is whether this is a dry run or not"
print "The last argument should be either 'dry' or 'live'."
sys.exit()
volume_id = sys.argv[1]
log_location = sys.argv[2]
if sys.argv[3] == LIVE_RUN:
dry_run_status = LIVE_RUN
else:
dry_run_status = DRY_RUN
NOW = datetime.datetime.now()
LAST_HOURLY = NOW - datetime.timedelta(hours = NUM_HOURLY)
LAST_DAILY = LAST_HOURLY - datetime.timedelta(days = NUM_DAILY)
LAST_WEEKLY = LAST_DAILY - datetime.timedelta(days = 7 * NUM_WEEKLY)
f = open(log_location, 'a+')
snaps=[]
easy_snap_array = []
conn = EC2Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
all_snaps=conn.get_all_snapshots(owner = AWS_ACCOUNT_ID)
# Keep just the snapshots for the volume of interest
for snap in all_snaps:
if snap.volume_id == volume_id:
snaps.append(snap)
if len(snaps) == 0:
f.write('\n\nNOW: ' + str(NOW) + '\tDid not find any snapshots for volume_id: %s' + volume_id)
f.close()
sys.exit()
for snap in snaps:
snap_dict = dict()
snap_time = iso8601.parse_date(snap.start_time)
snap_time = snap_time.replace(tzinfo=None) # remove the timezone
time_delta = snap_time.now() - snap_time
snap_dict['snap_object'] = snap
snap_dict['time_start'] = snap_time.replace(tzinfo=None)
snap_dict['days_delta'] = time_delta.days
snap_dict['weeks_delta'] = time_delta.days / 7
snap_dict['hours_delta'] = time_delta.days * 24 + time_delta.seconds / 3600
snap_dict['status'] = 'NOT DEFINED'
easy_snap_array.append(snap_dict)
f.write('\n\nNOW: ' + str(NOW) + '*************' + dry_run_status + '********************\n\n')
f.write('LAST_HOURLY:\t\t' + str(LAST_HOURLY) +'\n')
f.write('LAST_DAILY:\t\t' + str(LAST_DAILY) +'\n')
f.write('LAST_WEEKLY:\t\t' + str(LAST_WEEKLY) +'\n')
snap_day_dict = dict()
snap_week_dict = dict()
# reverse=False return in ascending, True in descending time_start
for easy_snap in sorted(easy_snap_array, key=lambda k: k['time_start'],reverse=True):
if easy_snap['time_start'] > LAST_HOURLY:
easy_snap['status'] = TO_BE_KEPT_FOR_HOURLY
if LAST_HOURLY > easy_snap['time_start'] > LAST_DAILY:
day_key = str(easy_snap['time_start'].date())
if day_key not in snap_day_dict:
snap_day_dict[day_key] = easy_snap['snap_object']
easy_snap['status'] = TO_BE_KEPT_FOR_DAILY
else:
easy_snap['status'] = TO_BE_DELETED
if LAST_DAILY > easy_snap['time_start'] > LAST_WEEKLY:
week_key = str(easy_snap['time_start'].isocalendar()[0]) +'-'+\
str(easy_snap['time_start'].isocalendar()[1])
if week_key not in snap_week_dict:
snap_week_dict[week_key] = easy_snap['snap_object']
easy_snap['status'] = TO_BE_KEPT_FOR_WEEKLY
else:
easy_snap['status'] = TO_BE_DELETED
if LAST_WEEKLY > easy_snap['time_start']:
easy_snap['status'] = TO_BE_DELETED
s = 'start_time: ' + str(easy_snap['time_start']) + '\t' +\
'hours_delta: ' + str(easy_snap['hours_delta']) + '\t' +\
'days_delta: ' + str(easy_snap['days_delta']) + '\t' +\
'weeks_delta: ' + str(easy_snap['weeks_delta']) + '\t' +\
'volume_id: ' + str(easy_snap['snap_object'].volume_id) + '\t' +\
'snapshot_id: ' + str(easy_snap['snap_object'].id) + '\t' +\
'status: ' + str(easy_snap['status']) + '\t' +\
'\n'
f.write(s)
if dry_run_status == DRY_RUN:
pass
elif dry_run_status == LIVE_RUN:
# delete all that snapshots marked for deletion
for easy_snap in easy_snap_array:
if easy_snap['status'] == TO_BE_DELETED:
result = conn.delete_snapshot(easy_snap['snap_object'].id)
f.write("Deleted snapshot with id: " + easy_snap['snap_object'].id + "\tRETURN STATUS: "+str(result)+"\n")
else:
pass
f.close()