99

Python grandfather father son snapshots - backup for EC2

If you need to keep backups of your instance in EC2, then you can set a cron job to take a snapshot every hour and then run a python script for removing the unwanted snapshots.

Here is the script for creating snapshots:

import boto
    import iso8601
    import sys
    import datetime
    from boto.ec2.connection import EC2Connection


    AWS_ACCESS_KEY_ID = 'AAAAAAAAAAAAAATQ'
    AWS_SECRET_ACCESS_KEY = 'MAAAAAAAAAAAAAAAAAAAAn'
    AWS_ACCOUNT_ID = "322222222226"

    START_TIME = str(datetime.datetime.now())
    arguments_length = len(sys.argv)

    if arguments_length != 3:
        print "You need to supply 2 arguments at least. The first is the AWS volume_id and the second is the log path"
        print "For example:"

        sys.exit()


    volume_id = sys.argv[1]
    log_location = sys.argv[2]

    conn = EC2Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

    description = 'Hourly automatic backup'

    rval = conn.create_snapshot(volume_id, description = description)
    END_TIME = str(datetime.datetime.now())
    f = open(log_location, 'a+')
    s = 'start:\t' + START_TIME + '\tend:\t' + END_TIME + '\tstatus: ' + str(rval)  + ' for backup of volume ' + volume_id +'\n'
    f.write(s)
    f.close()

Here is the script for cleaning the snapshot list and keeping only the ones that you need:

import boto
    import iso8601
    import sys
    import datetime
    from boto.ec2.connection import EC2Connection

    TO_BE_KEPT_FOR_HOURLY    = 'TO BE KEPT FOR HOURLY'
    TO_BE_KEPT_FOR_DAILY = 'TO BE KEPT FOR DAILY'
    TO_BE_KEPT_FOR_WEEKLY    = 'TO BE KEPT FOR WEEKLY'
    TO_BE_DELETED            = 'TO BE DELETED'
    DRY_RUN                  = 'dry'
    LIVE_RUN             = 'live'

    AWS_ACCESS_KEY_ID        = 'ABCDEFGHIJKLMONQ'              # CHANGE  TO YOUR OWN
    AWS_SECRET_ACCESS_KEY    = 'MASASDFASDFASDFSADFASDFASDASDrn'   # CHANGE  TO YOUR OWN
    AWS_ACCOUNT_ID           = "31232312316"                     # CHANGE  TO YOUR OWN

    NUM_HOURLY   = 24 * 7
    NUM_DAILY    = 7 * 5
    NUM_WEEKLY   = 30

    arguments_length = len(sys.argv)

    if arguments_length != 4:
        print "You need to supply 4 arguments at least. The first is the AWS volume_id, the second is the log path, and the 3rd is whether this is a dry run or not"
        print "The last argument should be either 'dry' or 'live'."
        sys.exit()

    volume_id        = sys.argv[1]
    log_location = sys.argv[2]
    if sys.argv[3] == LIVE_RUN:
        dry_run_status   = LIVE_RUN
    else:
        dry_run_status   = DRY_RUN


    NOW = datetime.datetime.now()

    LAST_HOURLY  = NOW         - datetime.timedelta(hours    = NUM_HOURLY)
    LAST_DAILY   = LAST_HOURLY - datetime.timedelta(days = NUM_DAILY)
    LAST_WEEKLY  = LAST_DAILY  - datetime.timedelta(days = 7 * NUM_WEEKLY)


    f = open(log_location, 'a+')
    snaps=[]
    easy_snap_array = []

    conn = EC2Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

    all_snaps=conn.get_all_snapshots(owner = AWS_ACCOUNT_ID)

    # Keep just the snapshots for the volume of interest
    for snap in all_snaps:
        if snap.volume_id == volume_id:
            snaps.append(snap)

    if len(snaps) == 0:

        f.write('\n\nNOW: ' + str(NOW) + '\tDid not find any snapshots for volume_id: %s' + volume_id)
        f.close()
        sys.exit()

    for snap in snaps:

        snap_dict = dict()
        snap_time = iso8601.parse_date(snap.start_time)
        snap_time = snap_time.replace(tzinfo=None) # remove the timezone
        time_delta = snap_time.now() - snap_time
        snap_dict['snap_object'] = snap
        snap_dict['time_start'] = snap_time.replace(tzinfo=None)
        snap_dict['days_delta'] = time_delta.days
        snap_dict['weeks_delta'] = time_delta.days / 7
        snap_dict['hours_delta'] = time_delta.days * 24 + time_delta.seconds / 3600
        snap_dict['status'] = 'NOT DEFINED'
        easy_snap_array.append(snap_dict)

    f.write('\n\nNOW: ' + str(NOW) + '*************' + dry_run_status + '********************\n\n')
    f.write('LAST_HOURLY:\t\t' + str(LAST_HOURLY) +'\n')
    f.write('LAST_DAILY:\t\t' + str(LAST_DAILY) +'\n')
    f.write('LAST_WEEKLY:\t\t' + str(LAST_WEEKLY) +'\n')

    snap_day_dict = dict()
    snap_week_dict = dict()



    # reverse=False return in ascending, True in descending time_start
    for easy_snap in sorted(easy_snap_array,  key=lambda k: k['time_start'],reverse=True):

        if easy_snap['time_start'] > LAST_HOURLY:
            easy_snap['status'] = TO_BE_KEPT_FOR_HOURLY

        if LAST_HOURLY > easy_snap['time_start'] > LAST_DAILY:
            day_key = str(easy_snap['time_start'].date())
            if day_key not in snap_day_dict:
                snap_day_dict[day_key] = easy_snap['snap_object']
                easy_snap['status'] = TO_BE_KEPT_FOR_DAILY
            else:
                easy_snap['status'] = TO_BE_DELETED

        if LAST_DAILY > easy_snap['time_start'] > LAST_WEEKLY:
            week_key = str(easy_snap['time_start'].isocalendar()[0]) +'-'+\
                str(easy_snap['time_start'].isocalendar()[1])
            if week_key not in snap_week_dict:
                snap_week_dict[week_key] = easy_snap['snap_object']
                easy_snap['status'] = TO_BE_KEPT_FOR_WEEKLY
            else:
                easy_snap['status'] = TO_BE_DELETED

        if LAST_WEEKLY > easy_snap['time_start']:
            easy_snap['status'] = TO_BE_DELETED

        s = 'start_time: ' + str(easy_snap['time_start'])                + '\t' +\
            'hours_delta: '  + str(easy_snap['hours_delta'])               + '\t' +\
            'days_delta: '   + str(easy_snap['days_delta'])                + '\t' +\
            'weeks_delta: '  + str(easy_snap['weeks_delta'])               + '\t' +\
            'volume_id: '    + str(easy_snap['snap_object'].volume_id)    + '\t' +\
            'snapshot_id: '  + str(easy_snap['snap_object'].id)           + '\t' +\
            'status: '       + str(easy_snap['status'])                    + '\t' +\
            '\n'

        f.write(s)

    if dry_run_status == DRY_RUN:
        pass
    elif dry_run_status == LIVE_RUN:
        # delete all that snapshots marked for deletion
        for easy_snap in easy_snap_array:
            if easy_snap['status'] == TO_BE_DELETED:
                result = conn.delete_snapshot(easy_snap['snap_object'].id)
                f.write("Deleted snapshot with id: " + easy_snap['snap_object'].id + "\tRETURN STATUS: "+str(result)+"\n")
    else:
        pass

    f.close()


comments powered by Disqus