From 7f21fab16bc489eada7e70221e1c55ac6384fcfe Mon Sep 17 00:00:00 2001 From: olivier <> Date: Mon, 21 May 2007 18:02:32 +0000 Subject: [PATCH] - add backup core and command line tool - consolidate MediaCore.to_dom() and fix encoding --- telemeta/backup/__init__.py | 1 + telemeta/backup/core.py | 89 ++++++++++++++++++++++++++++++++++++ telemeta/bin/telemeta-backup | 70 ++++++++++++++++++++++++++++ telemeta/models.py | 15 ++++-- 4 files changed, 170 insertions(+), 5 deletions(-) create mode 100644 telemeta/backup/core.py create mode 100755 telemeta/bin/telemeta-backup diff --git a/telemeta/backup/__init__.py b/telemeta/backup/__init__.py index e69de29b..80878dbf 100644 --- a/telemeta/backup/__init__.py +++ b/telemeta/backup/__init__.py @@ -0,0 +1 @@ +from telemeta.backup.core import BackupBuilder diff --git a/telemeta/backup/core.py b/telemeta/backup/core.py new file mode 100644 index 00000000..d200d7a0 --- /dev/null +++ b/telemeta/backup/core.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Samalyse SARL +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://svn.parisson.org/telemeta/TelemetaLicense. +# +# Author: Olivier Guilyardi + +import os +import libxml2 +from xml.dom import getDOMImplementation, Node +import shutil +import md5 +from django.conf import settings +from telemeta.models import MediaItem + +class BackupBuilder(object): + """Provide backup-related features""" + + def __get_file_md5(self, path): + "Compute the MD5 hash of a file (Python version of md5sum)" + file = open(path, "rb") + hash = md5.new() + while True: + buffer = file.read(0x100000) + if len(buffer) == 0: + break + hash.update(buffer) + + file.close() + return hash.hexdigest() + + def __get_media_filename(self, item): + return item.id + ".wav" + + def store_collection(self, collection, dest_dir): + """Serialize and store a collection with related items and media + files into a subdirectory of the provided directory + """ + coll_dir = dest_dir + "/" + collection.id + os.mkdir(coll_dir) + + xml = self.collection_to_xml(collection) + file = open(coll_dir + "/collection.xml", "wb") + file.write(xml.encode("utf-8")) + file.close() + + if collection.has_mediafile(): + md5_file = open(coll_dir + "/MD5SUM", "wb") + + items = collection.items.all() + for item in items: + if item.file: + dst_basename = self.__get_media_filename(item) + dst = coll_dir + "/" + dst_basename + shutil.copyfile(settings.MEDIA_ROOT + "/" + item.file, dst) + hash = self.__get_file_md5(dst) + md5_file.write(hash + " " + dst_basename + "\n") + + md5_file.close() + + def collection_to_xml(self, collection): + """Return a string containing the XML representation of a collection + and related items + """ + impl = getDOMImplementation() + doc = impl.createDocument(None, "telemeta", None) + coll_node = collection.to_dom().documentElement + doc.documentElement.appendChild(coll_node) + items_node_name = MediaItem.get_dom_element_name() + "List" + items_node = doc.createElement(items_node_name) + coll_node.appendChild(items_node) + + items = collection.items.all() + for item in items: + if item.file: + item.file = self.__get_media_filename(item) + items_node.appendChild(item.to_dom().documentElement) + doc.normalize() + + # libxml2 has prettier output than xml.dom: + tree = libxml2.parseDoc(doc.toxml(encoding="utf-8")) + return unicode(tree.serialize(encoding="utf-8", format=1), "utf-8") + + + diff --git a/telemeta/bin/telemeta-backup b/telemeta/bin/telemeta-backup new file mode 100755 index 00000000..4d226d8a --- /dev/null +++ b/telemeta/bin/telemeta-backup @@ -0,0 +1,70 @@ +#!/usr/bin/python +# +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Samalyse SARL +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://svn.parisson.org/telemeta/TelemetaLicense. +# +# Author: Olivier Guilyardi + +import os +import sys +import time +from django.core.management import setup_environ + +def print_usage(toolname): + print "Usage: " + toolname + " " + print " project_dir: the directory of the Django project which hosts Telemeta" + print " backup_dir: the destination backup folder (must exist)" + +def write_readme(dest_dir, coll_num): + readme = open(dest_dir + "/" + "README", "w") + timestr = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) + readme.write("Telemeta Backup\n\n") + readme.write("- date: " + timestr + "\n") + readme.write("- number of collections: " + str(coll_num) + "\n\n") + readme.close() + +def backup(dest_dir): + from telemeta.models import MediaCollection + from telemeta.backup import BackupBuilder + + builder = BackupBuilder() + + collections = MediaCollection.objects.order_by('id') + count = collections.count() + + print "Writing README file..", + write_readme(dest_dir, count) + print "Done." + + i = 0 + for collection in collections: + if i % 100 == 0: + set_dir = dest_dir + ("/collections-%d-%d" % (i+1, i+100)) + os.mkdir(set_dir) + i += 1 + print "Processing collection %d/%d (%d%%) with id: %s.. " \ + % (i, count, i*100/count, collection.id), + sys.stdout.flush() + builder.store_collection(collection, set_dir) + print "Done" + +def run(): + if len(sys.argv) != 3: + print_usage(os.path.basename(sys.argv[0])) + sys.exit(1) + else: + project_dir = sys.argv[1] + backup_dir = sys.argv[2] + sys.path.append(project_dir) + import settings + setup_environ(settings) + backup(backup_dir) + +if __name__ == '__main__': + run() diff --git a/telemeta/models.py b/telemeta/models.py index 8593862b..1b3a4fd9 100644 --- a/telemeta/models.py +++ b/telemeta/models.py @@ -24,7 +24,7 @@ media_id_regex = r'[0-9A-Za-z._:%?-]+' class MediaModel(Component): pass -class MediaCore: +class MediaCore(object): def to_dict(self): "Return model fields as a dict of name/value pairs" fields_dict = {} @@ -32,18 +32,23 @@ class MediaCore: fields_dict[field.name] = getattr(self, field.name) return fields_dict + def get_dom_element_name(cls): + clsname = cls.__name__ + return clsname[0].lower() + clsname[1:] + get_dom_element_name = classmethod(get_dom_element_name) + def to_dom(self): "Return the DOM representation of this media object" impl = getDOMImplementation() - clsname = self.__class__.__name__ - root = clsname[0].lower() + clsname[1:] + root = self.get_dom_element_name() doc = impl.createDocument(None, root, None) top = doc.documentElement - top.setAttribute("pk", self.id) + top.setAttribute("id", self.id) fields = self.to_dict() for name, value in fields.iteritems(): element = doc.createElement(name) - element.appendChild(doc.createTextNode(str(value))) + value = unicode(str(value), "utf-8") + element.appendChild(doc.createTextNode(value)) top.appendChild(element) return doc -- 2.39.5