]> git.parisson.com Git - telemeta-data.git/commitdiff
migration: consolidation, print grouped stats
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Wed, 4 Mar 2009 19:15:26 +0000 (19:15 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Wed, 4 Mar 2009 19:15:26 +0000 (19:15 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@70 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/migrate.py
trunk/import/migration/tasks/__init__.py [new file with mode: 0644]
trunk/import/migration/tasks/core.py
trunk/import/migration/tasks/enums.py
trunk/import/migration/tasks/ethnic.py
trunk/import/migration/tasks/geoethno.py
trunk/import/migration/tasks/publishers.py
trunk/import/migration/tasks/reset.py

index b48a66e81717e5dcdf79fc3881f7b55e4132671c..f16569f6d9a1f0f2797ccb63c071952310533036 100644 (file)
@@ -3,6 +3,7 @@ from telemeta.core import ComponentManager
 import sys
 import ConfigParser
 import MySQLdb
+import time
 
 from tasks.api import IDataMigrator, IDataInitializer
 import tasks
@@ -12,7 +13,7 @@ class Migrator(Component):
     migrators     = ExtensionPoint(IDataMigrator)
 
     def run_tasks(self, tasks, type, only_task = None):
-        ntasks = 0
+        done = []
         task = None
         for t in tasks:
             if only_task:
@@ -22,37 +23,53 @@ class Migrator(Component):
                 task = t
 
             if task:                
-                print "Runinng %s %s\n" % (task.get_name(), type)
-                task.setup(cfg, self.src_db, self.target_db)
+                sys.stdout.write("Runinng %s %s" % (task.get_name(), type))
+                start = time.time()
+                task.setup(self.cfg, self.src_db, self.target_db)
                 task.process()
-                ntasks += 1
+                sys.stdout.write("\t(%.2fs)\n" % (time.time() - start))
+                done.append(task)
                 if only_task:
                     break
-        return ntasks                    
+
+        return done
         
 
-    def run(self, config, only_task = None):
-        self.cfg = config
-        self.src_db = MySQLdb.connect(user=cfg.get('src', 'user'), 
-                   host=cfg.get('src', 'host'), 
-                   db=cfg.get('src', 'name'), 
-                   passwd=cfg.get('src', 'pass'),
-                   charset='utf8')
+    def setup(self, inifile):
+        self.cfg = ConfigParser.ConfigParser()
+        self.cfg.read(inifile)
 
-        self.target_db = MySQLdb.connect(user=cfg.get('target', 'user'), 
-                   host=cfg.get('target', 'host'), 
-                   db=cfg.get('target', 'name'), 
-                   passwd=cfg.get('target', 'pass'),
+        self.src_db = MySQLdb.connect(user=self.cfg.get('src', 'user'), 
+                   host=self.cfg.get('src', 'host'), 
+                   db=self.cfg.get('src', 'name'), 
+                   passwd=self.cfg.get('src', 'pass'),
                    charset='utf8')
 
+        self.target_db = MySQLdb.connect(user=self.cfg.get('target', 'user'), 
+                   host=self.cfg.get('target', 'host'), 
+                   db=self.cfg.get('target', 'name'), 
+                   passwd=self.cfg.get('target', 'pass'),
+                   charset='utf8')
 
-        ntasks  = self.run_tasks(self.initializers, "initializer", only_task = only_task)
-        ntasks += self.run_tasks(self.migrators, "migrator", only_task = only_task)
+    def run(self, only_task = None):
+        self.done = []
+        self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task))
+        self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task))
 
-        if only_task and not ntasks:
+        if only_task and not len(self.done):
             raise "No such task: %s" % only_task
-                    
-        
+
+    def print_stats(self):
+        init = False
+        for task in self.done:
+            if len(task.stats):
+                if not init:
+                    print 'Statistics:'
+                    init = True
+                print '%s:' % task.get_name()
+                for s in task.stats:
+                    print '\t%s:\t%s' % (s, task.stats[s])
+            
     
 if __name__ == '__main__':
     if len(sys.argv) != 2 and len(sys.argv) != 3:
@@ -64,11 +81,9 @@ if __name__ == '__main__':
     else:
         only_task = None
 
-    cfg = ConfigParser.ConfigParser()
-    cfg.read(sys.argv[1])
-
     cmpmgr = ComponentManager()
     migrator = Migrator(cmpmgr)
-    migrator.run(cfg, only_task)
-
+    migrator.setup(sys.argv[1])
+    migrator.run(only_task)
+    migrator.print_stats()
         
diff --git a/trunk/import/migration/tasks/__init__.py b/trunk/import/migration/tasks/__init__.py
new file mode 100644 (file)
index 0000000..af6b6d9
--- /dev/null
@@ -0,0 +1,5 @@
+import reset
+import enums
+import geoethno
+import ethnic
+import publishers
index 3ff5e51dc517a52ca3e1313461ac7628344e28bb..481ab20635f0fec844238e47c7e7db7de779aa38 100644 (file)
@@ -1,3 +1,20 @@
+import sys
+from telemeta.core import *
+
+class DataMigrationTask(Component):
+    
+    def setup(self, cfg, src_db, target_db):
+        self.target_db = target_db
+        self.target_cursor = target_db.cursor()
+        self.src_db = src_db
+        self.src_cursor = src_db.cursor()
+        self.src_db_name = cfg.get('src', 'name')
+        self.target_db_name = cfg.get('target', 'name')
+        self.stats = {}
+
+    def step(self):
+        sys.stdout.write('.')
+        sys.stdout.flush()
 
 class GroupedItemsManager(object):
 
index 87c94cba5bd9de7527008f35578fea30b5db5084..6cf32b7ccaf3f4364de097e13386f80fd7e32863 100644 (file)
@@ -2,8 +2,9 @@
 
 from telemeta.core import *
 from api import IDataMigrator
+from core import DataMigrationTask
 
-class SimpleEnumMigrator(Component):
+class SimpleEnumMigrator(DataMigrationTask):
 
     implements(IDataMigrator)
 
@@ -20,13 +21,8 @@ class SimpleEnumMigrator(Component):
         'FormStyl generi':'generic_styles'
     }
 
-    def setup(self, cfg, src_db, target_db):
-        self.target_cursor = target_db.cursor()
-        self.src_db_name = cfg.get('src', 'name')
-
     def get_name(self):
         return "enumerations"
-
     
     def process(self):
         for src in self.map:
index 349de6b5b7b167b41cd7bc61d201bab90eecf379..13f5b7159020d45f1b8d849795dfd6bde86cbebd 100644 (file)
@@ -2,16 +2,14 @@
 
 from telemeta.core import *
 from api import IDataMigrator
-from core import GroupedItemsManager
+from core import GroupedItemsManager, DataMigrationTask
 
-class EthnicGroupsMigrator(Component):
+class EthnicGroupsMigrator(DataMigrationTask):
 
     implements(IDataMigrator)
 
     def setup(self, cfg, src_db, target_db):
-        self.target_db = target_db
-        self.target_cursor = target_db.cursor()
-        self.src_cursor = src_db.cursor()
+        DataMigrationTask.setup(self, cfg, src_db, target_db)
         self.data = GroupedItemsManager()
 
     def get_name(self):
@@ -52,7 +50,10 @@ class EthnicGroupsMigrator(Component):
     def process(self):
         self.extract()
         self.insert()
-        print "Ethnic groups/aliases: %d/%d\n" % (len(self.data.groups), self.data.count_items())
+        self.stats = {
+            'groups':   len(self.data.groups),
+            'aliases':  self.data.count_items()
+        }
                     
         
 
index bc51b0eee6eea0d19f55f28fa891651b77c7dc69..3b2326d953a8458918b67d99cca14e622071abb6 100644 (file)
@@ -2,8 +2,9 @@ from telemeta.core import *
 import sys
 import xml.dom.minidom as dom
 from api import IDataMigrator
+from core import DataMigrationTask
 
-class GeoEthnoImporter(Component):
+class GeoEthnoImporter(DataMigrationTask):
 
     implements(IDataMigrator)
 
@@ -13,8 +14,8 @@ class GeoEthnoImporter(Component):
     nhistoric_names = 0
 
     def setup(self, cfg, src_db, target_db):
-        self.db = target_db
-        self.cursor = self.db.cursor()
+        DataMigrationTask.setup(self, cfg, src_db, target_db)
+        self.cursor = self.target_cursor
         self.dom = dom.parse(cfg.get('geoethno', 'xml_file'))
         self.known_types = []
 
@@ -67,9 +68,8 @@ class GeoEthnoImporter(Component):
             self.nhistoric_names += 1
                     
 
-        if self.nlocations % 1000 == 0:
-            sys.stdout.write('.')
-            sys.stdout.flush()
+#        if self.nlocations % 1000 == 0:
+#            self.step()
         
     def add_aliases(self, name, items):
         for alias in items:
@@ -126,18 +126,20 @@ class GeoEthnoImporter(Component):
                 self.add_aliases(name, self.flatten_node_list(aliasNodes))
                 self.process_children(n, name)
 
-        self.path.pop()                
+        self.path.pop()       
+        if len(self.path) == 1:
+            self.step()
 
     def process(self):
         self.path = []
         self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '')
-        sys.stdout.write('\nGeoethno import result:\n')
-        sys.stdout.write('  types: %d\n' % len(self.known_types))
-        sys.stdout.write('  locations: %d\n' % self.nlocations)
-        sys.stdout.write('  relations: %d\n' % self.nrelations)
-        sys.stdout.write('  aliases: %d\n' % self.naliases)
-        sys.stdout.write('  historical names: %d\n' % self.nhistoric_names)
-
+        self.stats = {
+            'types':            len(self.known_types),
+            'locations':        self.nlocations,
+            'relations':        self.nrelations,
+            'aliases':          self.naliases,
+            'historical names': self.nhistoric_names
+        }
 
     def warn(self, msg):
         print u"\nWarning: %s: %s\n" % ("/".join(self.path), msg)
index f4caaa5fd0e6e38496e594f29d25de86a91d1c29..fab36f1cea73a7d79efadd6951e486cdc5311f36 100644 (file)
@@ -2,17 +2,15 @@
 
 from telemeta.core import *
 from api import IDataMigrator
-from core import GroupedItemsManager
+from core import GroupedItemsManager, DataMigrationTask
 
-class PublishersMigrator(Component):
+class PublishersMigrator(DataMigrationTask):
     groups = {}
 
     implements(IDataMigrator)
 
     def setup(self, cfg, src_db, target_db):
-        self.target_db = target_db
-        self.target_cursor = target_db.cursor()
-        self.src_cursor = src_db.cursor()
+        DataMigrationTask.setup(self, cfg, src_db, target_db)
         self.data = GroupedItemsManager()
 
     def get_name(self):
@@ -39,5 +37,8 @@ class PublishersMigrator(Component):
     def process(self):
         self.extract()
         self.insert()
-        print "Publishers/Collections: %d/%d\n" % (len(self.data.groups), self.data.count_items())
+        self.stats = {
+            'publishers': len(self.data.groups),
+            'collections': self.data.count_items()
+        }
                     
index c2fcce618f4744858584233515ba8a68dd7d1ec0..c2a4a674869d7a357d268653d9c53d497ce291b4 100644 (file)
@@ -2,8 +2,9 @@
 
 from telemeta.core import *
 from api import IDataInitializer
+from core import DataMigrationTask
 
-class DatabaseResetMigrator(Component):
+class DatabaseResetMigrator(DataMigrationTask):
 
     implements(IDataInitializer)
 
@@ -42,9 +43,6 @@ class DatabaseResetMigrator(Component):
         'revisions'
     ]
 
-    def setup(self, cfg, src_db, target_db):
-        self.target_cursor = target_db.cursor()
-
     def get_name(self):
         return "reset"