import os
 import libxml2
-from xml.dom import getDOMImplementation, Node
+from xml.dom.minidom import getDOMImplementation, Node
 import shutil
 import md5
 from django.conf import settings
 from telemeta.models import MediaItem
 
-class BackupBuilder(object):
+class CollectionSerializer(object):
     """Provide backup-related features"""
 
+    def __init__(self, collection):
+        self.collection = collection
+
     def __get_file_md5(self, path):
         "Compute the MD5 hash of a file (Python version of md5sum)"
         file = open(path, "rb")
         hash = md5.new()
         while True:
-            buffer = file.read(0x100000)
+            buffer = file.read(0x10000)
             if len(buffer) == 0:
                 break
             hash.update(buffer)
-
         file.close()            
         return hash.hexdigest()
 
     def __get_media_filename(self, item):
         return item.id + ".wav"
 
-    def store_collection(self, collection, dest_dir):
-        """Serialize and store a collection with related items and media 
+    def store(self, dest_dir):
+        """Serialize and store the collection with related items and media 
         files into a subdirectory of the provided directory
         """
-        coll_dir = dest_dir + "/" + collection.id
+        coll_dir = dest_dir + "/" + self.collection.id
         os.mkdir(coll_dir)
 
-        xml = self.collection_to_xml(collection)
+        xml = self.get_xml()
         file = open(coll_dir + "/collection.xml", "wb")
         file.write(xml.encode("utf-8"))
         file.close()
 
-        if collection.has_mediafile():
+        if self.collection.has_mediafile():
             md5_file = open(coll_dir + "/MD5SUM", "wb")
 
-            items = collection.items.all()
+            items = self.collection.items.all()
             for item in items:
                 if item.file:
                     dst_basename = self.__get_media_filename(item)
 
             md5_file.close()
 
-    def collection_to_xml(self, collection):
-        """Return a string containing the XML representation of a collection 
+    def get_xml(self):
+        """Return a string containing the XML representation of the collection 
         and related items
         """
         impl = getDOMImplementation()
         doc = impl.createDocument(None, "telemeta", None)
-        coll_node = collection.to_dom().documentElement
-        doc.documentElement.appendChild(coll_node)
+        coll_doc = self.collection.to_dom()
+        coll_node = doc.documentElement.appendChild(coll_doc.documentElement)
+        coll_doc.unlink()
         items_node_name = MediaItem.get_dom_element_name() + "List"
         items_node = doc.createElement(items_node_name)
         coll_node.appendChild(items_node)
 
-        items = collection.items.all()
+        items = self.collection.items.all()
         for item in items:
             if item.file:
                 item.file = self.__get_media_filename(item)
-            items_node.appendChild(item.to_dom().documentElement)
+            item_doc = item.to_dom()
+            items_node.appendChild(item_doc.documentElement)
+            item_doc.unlink()
         doc.normalize()
 
         # libxml2 has prettier output than xml.dom:
         tree = libxml2.parseDoc(doc.toxml(encoding="utf-8"))
-        return unicode(tree.serialize(encoding="utf-8", format=1), "utf-8")
+        doc.unlink()
+        xml = unicode(tree.serialize(encoding="utf-8", format=1), "utf-8")
+        tree.free()
 
+        return xml
         
 
 
 
 def backup(dest_dir):
     from telemeta.models import MediaCollection
-    from telemeta.backup import BackupBuilder
-
-    builder = BackupBuilder()
+    from telemeta.backup import CollectionSerializer
 
     collections = MediaCollection.objects.order_by('id')
     count = collections.count()
         print "Processing collection %d/%d (%d%%) with id: %s.. " \
             % (i, count, i*100/count, collection.id),
         sys.stdout.flush()
-        builder.store_collection(collection, set_dir)
+        serializer = CollectionSerializer(collection)
+        serializer.store(set_dir)
         print "Done"
 
 def run():