three99

web development

Python Grandfather Father Son Snapshots - Backup for EC2

| Comments

If you need to keep backups of your instance in EC2, then you can set a cron job to take a snapshot every hour and then run a python script for removing the unwanted snapshots.

Here is the script for creating snapshots:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import boto
import iso8601
import sys
import datetime
from boto.ec2.connection import EC2Connection


AWS_ACCESS_KEY_ID = 'AAAAAAAAAAAAAATQ'
AWS_SECRET_ACCESS_KEY = 'MAAAAAAAAAAAAAAAAAAAAn'
AWS_ACCOUNT_ID = "322222222226"

START_TIME = str(datetime.datetime.now())
arguments_length = len(sys.argv)

if arguments_length != 3:
  print "You need to supply 2 arguments at least. The first is the AWS volume_id and the second is the log path"
  print "For example:"
  
  sys.exit()


volume_id = sys.argv[1]
log_location = sys.argv[2]

conn = EC2Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

description = 'Hourly automatic backup'

rval = conn.create_snapshot(volume_id, description = description)
END_TIME = str(datetime.datetime.now())
f = open(log_location, 'a+')
s = 'start:\t' + START_TIME + '\tend:\t' + END_TIME + '\tstatus: ' + str(rval)  + ' for backup of volume ' + volume_id +'\n'
f.write(s)
f.close()

Here is the script for cleaning the snapshot list and keeping only the ones that you need:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import boto
import iso8601
import sys
import datetime
from boto.ec2.connection import EC2Connection

TO_BE_KEPT_FOR_HOURLY = 'TO BE KEPT FOR HOURLY'
TO_BE_KEPT_FOR_DAILY  = 'TO BE KEPT FOR DAILY'
TO_BE_KEPT_FOR_WEEKLY = 'TO BE KEPT FOR WEEKLY'
TO_BE_DELETED         = 'TO BE DELETED'
DRY_RUN                   = 'dry'
LIVE_RUN              = 'live'

AWS_ACCESS_KEY_ID     = 'ABCDEFGHIJKLMONQ'                # CHANGE  TO YOUR OWN
AWS_SECRET_ACCESS_KEY = 'MASASDFASDFASDFSADFASDFASDASDrn' # CHANGE  TO YOUR OWN
AWS_ACCOUNT_ID            = "31232312316"                     # CHANGE  TO YOUR OWN

NUM_HOURLY    = 24 * 7
NUM_DAILY = 7 * 5
NUM_WEEKLY    = 30

arguments_length = len(sys.argv)

if arguments_length != 4:
  print "You need to supply 4 arguments at least. The first is the AWS volume_id, the second is the log path, and the 3rd is whether this is a dry run or not"
  print "The last argument should be either 'dry' or 'live'."
  sys.exit()

volume_id     = sys.argv[1]
log_location  = sys.argv[2]
if sys.argv[3] == LIVE_RUN:
  dry_run_status  = LIVE_RUN
else:
  dry_run_status  = DRY_RUN


NOW = datetime.datetime.now()

LAST_HOURLY   = NOW           - datetime.timedelta(hours  = NUM_HOURLY)
LAST_DAILY    = LAST_HOURLY   - datetime.timedelta(days   = NUM_DAILY)
LAST_WEEKLY   = LAST_DAILY    - datetime.timedelta(days   = 7 * NUM_WEEKLY)


f = open(log_location, 'a+')
snaps=[]
easy_snap_array = [] 

conn = EC2Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

all_snaps=conn.get_all_snapshots(owner = AWS_ACCOUNT_ID)

# Keep just the snapshots for the volume of interest
for snap in all_snaps:
  if snap.volume_id == volume_id:
      snaps.append(snap)

if len(snaps) == 0:

  f.write('\n\nNOW: ' + str(NOW) + '\tDid not find any snapshots for volume_id: %s' + volume_id)
  f.close()
  sys.exit()

for snap in snaps:

  snap_dict = dict()
  snap_time = iso8601.parse_date(snap.start_time)
  snap_time = snap_time.replace(tzinfo=None) # remove the timezone
  time_delta = snap_time.now() - snap_time
  snap_dict['snap_object'] = snap
  snap_dict['time_start'] = snap_time.replace(tzinfo=None)
  snap_dict['days_delta'] = time_delta.days
  snap_dict['weeks_delta'] = time_delta.days / 7
  snap_dict['hours_delta'] = time_delta.days * 24 + time_delta.seconds / 3600
  snap_dict['status'] = 'NOT DEFINED'
  easy_snap_array.append(snap_dict)

f.write('\n\nNOW: ' + str(NOW) + '*************' + dry_run_status + '********************\n\n')
f.write('LAST_HOURLY:\t\t' + str(LAST_HOURLY) +'\n')
f.write('LAST_DAILY:\t\t' + str(LAST_DAILY) +'\n')
f.write('LAST_WEEKLY:\t\t' + str(LAST_WEEKLY) +'\n')

snap_day_dict = dict()
snap_week_dict = dict()



# reverse=False return in ascending, True in descending time_start 
for easy_snap in sorted(easy_snap_array,  key=lambda k: k['time_start'],reverse=True):

  if easy_snap['time_start'] > LAST_HOURLY:
      easy_snap['status'] = TO_BE_KEPT_FOR_HOURLY

  if LAST_HOURLY > easy_snap['time_start'] > LAST_DAILY:
      day_key = str(easy_snap['time_start'].date())
      if day_key not in snap_day_dict:
          snap_day_dict[day_key] = easy_snap['snap_object']
          easy_snap['status'] = TO_BE_KEPT_FOR_DAILY
      else:
          easy_snap['status'] = TO_BE_DELETED

  if LAST_DAILY > easy_snap['time_start'] > LAST_WEEKLY:
      week_key = str(easy_snap['time_start'].isocalendar()[0]) +'-'+\
          str(easy_snap['time_start'].isocalendar()[1])
      if week_key not in snap_week_dict:
          snap_week_dict[week_key] = easy_snap['snap_object']
          easy_snap['status'] = TO_BE_KEPT_FOR_WEEKLY
      else:
          easy_snap['status'] = TO_BE_DELETED

  if LAST_WEEKLY > easy_snap['time_start']:
      easy_snap['status'] = TO_BE_DELETED

  s = 'start_time: '  + str(easy_snap['time_start'])              + '\t' +\
      'hours_delta: ' + str(easy_snap['hours_delta'])             + '\t' +\
      'days_delta: '  + str(easy_snap['days_delta'])              + '\t' +\
      'weeks_delta: ' + str(easy_snap['weeks_delta'])             + '\t' +\
      'volume_id: '   + str(easy_snap['snap_object'].volume_id)   + '\t' +\
      'snapshot_id: ' + str(easy_snap['snap_object'].id)          + '\t' +\
      'status: '      + str(easy_snap['status'])                  + '\t' +\
      '\n'

  f.write(s)

if dry_run_status == DRY_RUN:
  pass
elif dry_run_status == LIVE_RUN:
  # delete all that snapshots marked for deletion
  for easy_snap in easy_snap_array:
      if easy_snap['status'] == TO_BE_DELETED:
          result = conn.delete_snapshot(easy_snap['snap_object'].id)
          f.write("Deleted snapshot with id: " + easy_snap['snap_object'].id + "\tRETURN STATUS: "+str(result)+"\n")
else:
  pass

f.close()

Comments