]> git.parisson.com Git - telemeta.git/commitdiff
- add backup core and command line tool
authorolivier <>
Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)
committerolivier <>
Mon, 21 May 2007 18:02:32 +0000 (18:02 +0000)
- consolidate MediaCore.to_dom() and fix encoding

telemeta/backup/__init__.py
telemeta/backup/core.py [new file with mode: 0644]
telemeta/bin/telemeta-backup [new file with mode: 0755]
telemeta/models.py

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..80878dbf0533429b472b52f61561b65cbf47b3dd 100644 (file)
@@ -0,0 +1 @@
+from telemeta.backup.core import BackupBuilder
diff --git a/telemeta/backup/core.py b/telemeta/backup/core.py
new file mode 100644 (file)
index 0000000..d200d7a
--- /dev/null
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Samalyse SARL
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://svn.parisson.org/telemeta/TelemetaLicense.
+#
+# Author: Olivier Guilyardi <olivier@samalyse.com>
+
+import os
+import libxml2
+from xml.dom import getDOMImplementation, Node
+import shutil
+import md5
+from django.conf import settings
+from telemeta.models import MediaItem
+
+class BackupBuilder(object):
+    """Provide backup-related features"""
+
+    def __get_file_md5(self, path):
+        "Compute the MD5 hash of a file (Python version of md5sum)"
+        file = open(path, "rb")
+        hash = md5.new()
+        while True:
+            buffer = file.read(0x100000)
+            if len(buffer) == 0:
+                break
+            hash.update(buffer)
+
+        file.close()            
+        return hash.hexdigest()
+
+    def __get_media_filename(self, item):
+        return item.id + ".wav"
+
+    def store_collection(self, collection, dest_dir):
+        """Serialize and store a collection with related items and media 
+        files into a subdirectory of the provided directory
+        """
+        coll_dir = dest_dir + "/" + collection.id
+        os.mkdir(coll_dir)
+
+        xml = self.collection_to_xml(collection)
+        file = open(coll_dir + "/collection.xml", "wb")
+        file.write(xml.encode("utf-8"))
+        file.close()
+
+        if collection.has_mediafile():
+            md5_file = open(coll_dir + "/MD5SUM", "wb")
+
+            items = collection.items.all()
+            for item in items:
+                if item.file:
+                    dst_basename = self.__get_media_filename(item)
+                    dst = coll_dir + "/" + dst_basename
+                    shutil.copyfile(settings.MEDIA_ROOT + "/" + item.file, dst)
+                    hash = self.__get_file_md5(dst)
+                    md5_file.write(hash + "  " + dst_basename + "\n")
+
+            md5_file.close()
+
+    def collection_to_xml(self, collection):
+        """Return a string containing the XML representation of a collection 
+        and related items
+        """
+        impl = getDOMImplementation()
+        doc = impl.createDocument(None, "telemeta", None)
+        coll_node = collection.to_dom().documentElement
+        doc.documentElement.appendChild(coll_node)
+        items_node_name = MediaItem.get_dom_element_name() + "List"
+        items_node = doc.createElement(items_node_name)
+        coll_node.appendChild(items_node)
+
+        items = collection.items.all()
+        for item in items:
+            if item.file:
+                item.file = self.__get_media_filename(item)
+            items_node.appendChild(item.to_dom().documentElement)
+        doc.normalize()
+
+        # libxml2 has prettier output than xml.dom:
+        tree = libxml2.parseDoc(doc.toxml(encoding="utf-8"))
+        return unicode(tree.serialize(encoding="utf-8", format=1), "utf-8")
+
+        
+
diff --git a/telemeta/bin/telemeta-backup b/telemeta/bin/telemeta-backup
new file mode 100755 (executable)
index 0000000..4d226d8
--- /dev/null
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Samalyse SARL
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://svn.parisson.org/telemeta/TelemetaLicense.
+#
+# Author: Olivier Guilyardi <olivier@samalyse.com>
+
+import os
+import sys
+import time
+from django.core.management import setup_environ
+
+def print_usage(toolname):
+    print "Usage: " + toolname + " <project_dir> <backup_dir>"
+    print "  project_dir: the directory of the Django project which hosts Telemeta"
+    print "  backup_dir: the destination backup folder (must exist)"
+
+def write_readme(dest_dir, coll_num):
+    readme = open(dest_dir + "/" + "README", "w")
+    timestr = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
+    readme.write("Telemeta Backup\n\n")
+    readme.write("- date: " + timestr + "\n")
+    readme.write("- number of collections: " + str(coll_num) + "\n\n")
+    readme.close()
+
+def backup(dest_dir):
+    from telemeta.models import MediaCollection
+    from telemeta.backup import BackupBuilder
+
+    builder = BackupBuilder()
+
+    collections = MediaCollection.objects.order_by('id')
+    count = collections.count()
+
+    print "Writing README file..",
+    write_readme(dest_dir, count)
+    print "Done."
+
+    i = 0
+    for collection in collections:
+        if i % 100 == 0:
+            set_dir = dest_dir + ("/collections-%d-%d" % (i+1, i+100))
+            os.mkdir(set_dir)
+        i += 1
+        print "Processing collection %d/%d (%d%%) with id: %s.. " \
+            % (i, count, i*100/count, collection.id),
+        sys.stdout.flush()
+        builder.store_collection(collection, set_dir)
+        print "Done"
+
+def run():
+    if len(sys.argv) != 3:
+        print_usage(os.path.basename(sys.argv[0]))
+        sys.exit(1)
+    else:
+        project_dir = sys.argv[1]
+        backup_dir = sys.argv[2]
+        sys.path.append(project_dir)
+        import settings
+        setup_environ(settings)
+        backup(backup_dir)
+        
+if __name__ == '__main__':
+    run()
index 8593862ba99f52c2bdd83d2b5b935d9ea76dfc34..1b3a4fd9d11048515a96397736221da7505b7026 100644 (file)
@@ -24,7 +24,7 @@ media_id_regex = r'[0-9A-Za-z._:%?-]+'
 class MediaModel(Component):
     pass
 
-class MediaCore:
+class MediaCore(object):
     def to_dict(self):  
         "Return model fields as a dict of name/value pairs"
         fields_dict = {}
@@ -32,18 +32,23 @@ class MediaCore:
             fields_dict[field.name] = getattr(self, field.name)
         return fields_dict
 
+    def get_dom_element_name(cls):
+        clsname = cls.__name__
+        return clsname[0].lower() + clsname[1:]
+    get_dom_element_name = classmethod(get_dom_element_name)
+
     def to_dom(self):
         "Return the DOM representation of this media object"
         impl = getDOMImplementation()
-        clsname = self.__class__.__name__
-        root = clsname[0].lower() + clsname[1:]
+        root = self.get_dom_element_name()
         doc = impl.createDocument(None, root, None)
         top = doc.documentElement
-        top.setAttribute("pk", self.id)
+        top.setAttribute("id", self.id)
         fields = self.to_dict()
         for name, value in fields.iteritems():
             element = doc.createElement(name)
-            element.appendChild(doc.createTextNode(str(value)))
+            value = unicode(str(value), "utf-8")
+            element.appendChild(doc.createTextNode(value))
             top.appendChild(element)
         return doc