- add backup core and command line tool

author olivier <>

Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)

committer olivier <>

Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)
author olivier <>
Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)
committer olivier <>
Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)
diff --git a/telemeta/backup/__init__.py b/telemeta/backup/__init__.py

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..80878dbf0533429b472b52f61561b65cbf47b3dd 100644 (file)
--- a/telemeta/backup/__init__.py
+++ b/telemeta/backup/__init__.py
@@ -0,0 +1 @@
+from telemeta.backup.core import BackupBuilder
diff --git a/telemeta/backup/core.py b/telemeta/backup/core.py

new file mode 100644 (file)

index 0000000..d200d7a
--- /dev/null
+++ b/telemeta/backup/core.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Samalyse SARL
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://svn.parisson.org/telemeta/TelemetaLicense.
+#
+# Author: Olivier Guilyardi <olivier@samalyse.com>
+
+import os
+import libxml2
+from xml.dom import getDOMImplementation, Node
+import shutil
+import md5
+from django.conf import settings
+from telemeta.models import MediaItem
+
+class BackupBuilder(object):
+    """Provide backup-related features"""
+
+    def __get_file_md5(self, path):
+        "Compute the MD5 hash of a file (Python version of md5sum)"
+        file = open(path, "rb")
+        hash = md5.new()
+        while True:
+            buffer = file.read(0x100000)
+            if len(buffer) == 0:
+                break
+            hash.update(buffer)
+
+        file.close()            
+        return hash.hexdigest()
+
+    def __get_media_filename(self, item):
+        return item.id + ".wav"
+
+    def store_collection(self, collection, dest_dir):
+        """Serialize and store a collection with related items and media 
+        files into a subdirectory of the provided directory
+        """
+        coll_dir = dest_dir + "/" + collection.id
+        os.mkdir(coll_dir)
+
+        xml = self.collection_to_xml(collection)
+        file = open(coll_dir + "/collection.xml", "wb")
+        file.write(xml.encode("utf-8"))
+        file.close()
+
+        if collection.has_mediafile():
+            md5_file = open(coll_dir + "/MD5SUM", "wb")
+
+            items = collection.items.all()
+            for item in items:
+                if item.file:
+                    dst_basename = self.__get_media_filename(item)
+                    dst = coll_dir + "/" + dst_basename
+                    shutil.copyfile(settings.MEDIA_ROOT + "/" + item.file, dst)
+                    hash = self.__get_file_md5(dst)
+                    md5_file.write(hash + "  " + dst_basename + "\n")
+
+            md5_file.close()
+
+    def collection_to_xml(self, collection):
+        """Return a string containing the XML representation of a collection 
+        and related items
+        """
+        impl = getDOMImplementation()
+        doc = impl.createDocument(None, "telemeta", None)
+        coll_node = collection.to_dom().documentElement
+        doc.documentElement.appendChild(coll_node)
+        items_node_name = MediaItem.get_dom_element_name() + "List"
+        items_node = doc.createElement(items_node_name)
+        coll_node.appendChild(items_node)
+
+        items = collection.items.all()
+        for item in items:
+            if item.file:
+                item.file = self.__get_media_filename(item)
+            items_node.appendChild(item.to_dom().documentElement)
+        doc.normalize()
+
+        # libxml2 has prettier output than xml.dom:
+        tree = libxml2.parseDoc(doc.toxml(encoding="utf-8"))
+        return unicode(tree.serialize(encoding="utf-8", format=1), "utf-8")
+
+        
+
diff --git a/telemeta/bin/telemeta-backup b/telemeta/bin/telemeta-backup

new file mode 100755 (executable)

index 0000000..4d226d8
--- /dev/null
+++ b/telemeta/bin/telemeta-backup
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Samalyse SARL
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://svn.parisson.org/telemeta/TelemetaLicense.
+#
+# Author: Olivier Guilyardi <olivier@samalyse.com>
+
+import os
+import sys
+import time
+from django.core.management import setup_environ
+
+def print_usage(toolname):
+    print "Usage: " + toolname + " <project_dir> <backup_dir>"
+    print "  project_dir: the directory of the Django project which hosts Telemeta"
+    print "  backup_dir: the destination backup folder (must exist)"
+
+def write_readme(dest_dir, coll_num):
+    readme = open(dest_dir + "/" + "README", "w")
+    timestr = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
+    readme.write("Telemeta Backup\n\n")
+    readme.write("- date: " + timestr + "\n")
+    readme.write("- number of collections: " + str(coll_num) + "\n\n")
+    readme.close()
+
+def backup(dest_dir):
+    from telemeta.models import MediaCollection
+    from telemeta.backup import BackupBuilder
+
+    builder = BackupBuilder()
+
+    collections = MediaCollection.objects.order_by('id')
+    count = collections.count()
+
+    print "Writing README file..",
+    write_readme(dest_dir, count)
+    print "Done."
+
+    i = 0
+    for collection in collections:
+        if i % 100 == 0:
+            set_dir = dest_dir + ("/collections-%d-%d" % (i+1, i+100))
+            os.mkdir(set_dir)
+        i += 1
+        print "Processing collection %d/%d (%d%%) with id: %s.. " \
+            % (i, count, i*100/count, collection.id),
+        sys.stdout.flush()
+        builder.store_collection(collection, set_dir)
+        print "Done"
+
+def run():
+    if len(sys.argv) != 3:
+        print_usage(os.path.basename(sys.argv[0]))
+        sys.exit(1)
+    else:
+        project_dir = sys.argv[1]
+        backup_dir = sys.argv[2]
+        sys.path.append(project_dir)
+        import settings
+        setup_environ(settings)
+        backup(backup_dir)
+        
+if __name__ == '__main__':
+    run()
diff --git a/telemeta/models.py b/telemeta/models.py

index 8593862ba99f52c2bdd83d2b5b935d9ea76dfc34..1b3a4fd9d11048515a96397736221da7505b7026 100644 (file)
--- a/telemeta/models.py
+++ b/telemeta/models.py
@@ -24,7 +24,7 @@ media_id_regex = r'[0-9A-Za-z._:%?-]+'
  class MediaModel(Component):
      pass
  
-class MediaCore:
+class MediaCore(object):
      def to_dict(self):  
          "Return model fields as a dict of name/value pairs"
          fields_dict = {}
@@ -32,18 +32,23 @@ class MediaCore:
              fields_dict[field.name] = getattr(self, field.name)
          return fields_dict
  
+    def get_dom_element_name(cls):
+        clsname = cls.__name__
+        return clsname[0].lower() + clsname[1:]
+    get_dom_element_name = classmethod(get_dom_element_name)
+
      def to_dom(self):
          "Return the DOM representation of this media object"
          impl = getDOMImplementation()
-        clsname = self.__class__.__name__
-        root = clsname[0].lower() + clsname[1:]
+        root = self.get_dom_element_name()
          doc = impl.createDocument(None, root, None)
          top = doc.documentElement
-        top.setAttribute("pk", self.id)
+        top.setAttribute("id", self.id)
          fields = self.to_dict()
          for name, value in fields.iteritems():
              element = doc.createElement(name)
-            element.appendChild(doc.createTextNode(str(value)))
+            value = unicode(str(value), "utf-8")
+            element.appendChild(doc.createTextNode(value))
              top.appendChild(element)
          return doc
author	olivier <>
	Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)
committer	olivier <>
	Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)
telemeta/backup/__init__.py		patch \| blob \| history
telemeta/backup/core.py	[new file with mode: 0644]	patch \| blob
telemeta/bin/telemeta-backup	[new file with mode: 0755]	patch \| blob
telemeta/models.py		patch \| blob \| history