[Server-devel] [PATCH] ds_backup.py: Implement server availability check

martin.langhoff at gmail.com martin.langhoff at gmail.com
Tue Jun 24 20:18:22 EDT 2008


From: Martin Langhoff <martin at laptop.org>

Add relevant checks for server availabilty. If the server
gives us a temporary unavailability error (503) we will
retry with an exponential backoff while holding on to
the (local) lock.
---
 client/ds_backup.py |   70 +++++++++++++++++++++++++++++----------------------
 1 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/client/ds_backup.py b/client/ds_backup.py
index dabcf39..60d0be2 100755
--- a/client/ds_backup.py
+++ b/client/ds_backup.py
@@ -19,7 +19,8 @@
 
 import os
 import sha
-import urllib
+import urllib2
+from urllib2 import URLError, HTTPError
 import os.path
 import tempfile
 import time
@@ -38,30 +39,19 @@ class TransferError(BackupError): pass
 class NoPriorBackups(BackupError): pass
 class BulkRestoreUnavailable(BackupError): pass
 
-def find_last_backup(server, xo_serial):
-    try:
-        ret = urllib.urlopen(server + '/last/%s' % xo_serial).read()
-        return ret.split(',', 1)
-    except IOError, e:
-        if e[1] == 404:
-            raise ProtocolVersionError(server)
-        elif e[1] == 403:
-            raise RefusedByServerError(server)
-        elif e[1] == 503:
-            raise ServerTooBusyError(server)
-
-def find_restore_path(server, xo_serial):
+def check_server_available(server, xo_serial):
+
     try:
-        ret = urllib.urlopen(server + '/restore/%s' % xo_serial).read()
-        if ret == '0':
-            raise NoPriorBackups(server)
-        else:
-            return ret
-    except IOError, e:
-        if e[1] == 500:
-            raise BulkRestoreUnavailable(server)
-        elif e[1] == 503:
-            raise ServerTooBusyError(server)
+        ret = urllib2.urlopen(server + '/available/%s' % xo_serial).read()
+        return 200
+    except HTTPError, e:
+        # server is there, did not fullfull req
+        #  expect 404, 403, 503 as e[1]
+        return e.code
+    except URLError, e:
+        # log it?
+        # print e.reason
+        return -1
 
 def rsync_to_xs(from_path, to_path, keyfile, user):
 
@@ -71,7 +61,7 @@ def rsync_to_xs(from_path, to_path, keyfile, user):
     if not re.compile('/$').search(from_path):
         from_path = from_path + '/'
 
-    ssh = '/usr/bin/ssh -F /dev/null -o "PasswordAuthentication no" -i "%s" -l "%s"' \
+    ssh = '/usr/bin/ssh -F /dev/null -o "PasswordAuthentication no" -o "StrictHostKeyChecking no" -i "%s" -l "%s"' \
         % (keyfile, user)
     rsync = "/usr/bin/rsync -az --partial --delete --timeout=160 -e '%s' '%s' '%s' " % \
             (ssh, from_path, to_path)
@@ -129,8 +119,28 @@ if __name__ == "__main__":
     ds_path = env.get_profile_path('datastore')
     pk_path = os.path.join(env.get_profile_path(), 'owner.key')
 
-    # TODO: Check backup server availability
-    # if ping_xs():
-    rsync_to_xs(ds_path, 'schoolserver:datastore', pk_path, sn)
-    # this marks success to the controlling script...
-    os.system('touch ~/.sugar/default/ds_backup-done')
+    # Check backup server availability.
+    # On 503 ("too busy") apply exponential back-off
+    # over 10 attempts. Combined with the staggered sleep
+    # in ds_backup.sh, this should keep thundering herds
+    # under control. We are also holding a flock to prevent
+    # local races.
+    # With range(1,7) we sleep up to 64 minutes.
+    for n in range(1,7):
+        sstatus = check_server_available(backup_url, sn)
+        if (sstatus == 200):
+            # cleared to run
+            rsync_to_xs(ds_path, 'schoolserver:datastore', pk_path, sn)
+            # this marks success to the controlling script...
+            os.system('touch ~/.sugar/default/ds_backup-done')
+            exit(0)
+        elif (sstatus == 503):
+            # exponenxtial backoff
+            time.sleep(60 * 2**n)
+        elif (sstatus == -1):
+            # could not connect - XS is not there
+            exit(1)
+        else:
+            # 500, 404, 403, or other unexpected value
+            exit(1)
+
-- 
1.5.6.dirty



More information about the Server-devel mailing list