From 6584a82168064b705e2676082c3c725da78e61c5 Mon Sep 17 00:00:00 2001 From: olivier Date: Wed, 4 Mar 2009 19:15:26 +0000 Subject: [PATCH] migration: consolidation, print grouped stats git-svn-id: http://svn.parisson.org/svn/crem@70 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/migrate.py | 67 +++++++++++++--------- trunk/import/migration/tasks/__init__.py | 5 ++ trunk/import/migration/tasks/core.py | 17 ++++++ trunk/import/migration/tasks/enums.py | 8 +-- trunk/import/migration/tasks/ethnic.py | 13 +++-- trunk/import/migration/tasks/geoethno.py | 30 +++++----- trunk/import/migration/tasks/publishers.py | 13 +++-- trunk/import/migration/tasks/reset.py | 6 +- 8 files changed, 97 insertions(+), 62 deletions(-) create mode 100644 trunk/import/migration/tasks/__init__.py diff --git a/trunk/import/migration/migrate.py b/trunk/import/migration/migrate.py index b48a66e..f16569f 100644 --- a/trunk/import/migration/migrate.py +++ b/trunk/import/migration/migrate.py @@ -3,6 +3,7 @@ from telemeta.core import ComponentManager import sys import ConfigParser import MySQLdb +import time from tasks.api import IDataMigrator, IDataInitializer import tasks @@ -12,7 +13,7 @@ class Migrator(Component): migrators = ExtensionPoint(IDataMigrator) def run_tasks(self, tasks, type, only_task = None): - ntasks = 0 + done = [] task = None for t in tasks: if only_task: @@ -22,37 +23,53 @@ class Migrator(Component): task = t if task: - print "Runinng %s %s\n" % (task.get_name(), type) - task.setup(cfg, self.src_db, self.target_db) + sys.stdout.write("Runinng %s %s" % (task.get_name(), type)) + start = time.time() + task.setup(self.cfg, self.src_db, self.target_db) task.process() - ntasks += 1 + sys.stdout.write("\t(%.2fs)\n" % (time.time() - start)) + done.append(task) if only_task: break - return ntasks + + return done - def run(self, config, only_task = None): - self.cfg = config - self.src_db = MySQLdb.connect(user=cfg.get('src', 'user'), - host=cfg.get('src', 'host'), - db=cfg.get('src', 'name'), - passwd=cfg.get('src', 'pass'), - charset='utf8') + def setup(self, inifile): + self.cfg = ConfigParser.ConfigParser() + self.cfg.read(inifile) - self.target_db = MySQLdb.connect(user=cfg.get('target', 'user'), - host=cfg.get('target', 'host'), - db=cfg.get('target', 'name'), - passwd=cfg.get('target', 'pass'), + self.src_db = MySQLdb.connect(user=self.cfg.get('src', 'user'), + host=self.cfg.get('src', 'host'), + db=self.cfg.get('src', 'name'), + passwd=self.cfg.get('src', 'pass'), charset='utf8') + self.target_db = MySQLdb.connect(user=self.cfg.get('target', 'user'), + host=self.cfg.get('target', 'host'), + db=self.cfg.get('target', 'name'), + passwd=self.cfg.get('target', 'pass'), + charset='utf8') - ntasks = self.run_tasks(self.initializers, "initializer", only_task = only_task) - ntasks += self.run_tasks(self.migrators, "migrator", only_task = only_task) + def run(self, only_task = None): + self.done = [] + self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task)) + self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task)) - if only_task and not ntasks: + if only_task and not len(self.done): raise "No such task: %s" % only_task - - + + def print_stats(self): + init = False + for task in self.done: + if len(task.stats): + if not init: + print 'Statistics:' + init = True + print '%s:' % task.get_name() + for s in task.stats: + print '\t%s:\t%s' % (s, task.stats[s]) + if __name__ == '__main__': if len(sys.argv) != 2 and len(sys.argv) != 3: @@ -64,11 +81,9 @@ if __name__ == '__main__': else: only_task = None - cfg = ConfigParser.ConfigParser() - cfg.read(sys.argv[1]) - cmpmgr = ComponentManager() migrator = Migrator(cmpmgr) - migrator.run(cfg, only_task) - + migrator.setup(sys.argv[1]) + migrator.run(only_task) + migrator.print_stats() diff --git a/trunk/import/migration/tasks/__init__.py b/trunk/import/migration/tasks/__init__.py new file mode 100644 index 0000000..af6b6d9 --- /dev/null +++ b/trunk/import/migration/tasks/__init__.py @@ -0,0 +1,5 @@ +import reset +import enums +import geoethno +import ethnic +import publishers diff --git a/trunk/import/migration/tasks/core.py b/trunk/import/migration/tasks/core.py index 3ff5e51..481ab20 100644 --- a/trunk/import/migration/tasks/core.py +++ b/trunk/import/migration/tasks/core.py @@ -1,3 +1,20 @@ +import sys +from telemeta.core import * + +class DataMigrationTask(Component): + + def setup(self, cfg, src_db, target_db): + self.target_db = target_db + self.target_cursor = target_db.cursor() + self.src_db = src_db + self.src_cursor = src_db.cursor() + self.src_db_name = cfg.get('src', 'name') + self.target_db_name = cfg.get('target', 'name') + self.stats = {} + + def step(self): + sys.stdout.write('.') + sys.stdout.flush() class GroupedItemsManager(object): diff --git a/trunk/import/migration/tasks/enums.py b/trunk/import/migration/tasks/enums.py index 87c94cb..6cf32b7 100644 --- a/trunk/import/migration/tasks/enums.py +++ b/trunk/import/migration/tasks/enums.py @@ -2,8 +2,9 @@ from telemeta.core import * from api import IDataMigrator +from core import DataMigrationTask -class SimpleEnumMigrator(Component): +class SimpleEnumMigrator(DataMigrationTask): implements(IDataMigrator) @@ -20,13 +21,8 @@ class SimpleEnumMigrator(Component): 'FormStyl generi':'generic_styles' } - def setup(self, cfg, src_db, target_db): - self.target_cursor = target_db.cursor() - self.src_db_name = cfg.get('src', 'name') - def get_name(self): return "enumerations" - def process(self): for src in self.map: diff --git a/trunk/import/migration/tasks/ethnic.py b/trunk/import/migration/tasks/ethnic.py index 349de6b..13f5b71 100644 --- a/trunk/import/migration/tasks/ethnic.py +++ b/trunk/import/migration/tasks/ethnic.py @@ -2,16 +2,14 @@ from telemeta.core import * from api import IDataMigrator -from core import GroupedItemsManager +from core import GroupedItemsManager, DataMigrationTask -class EthnicGroupsMigrator(Component): +class EthnicGroupsMigrator(DataMigrationTask): implements(IDataMigrator) def setup(self, cfg, src_db, target_db): - self.target_db = target_db - self.target_cursor = target_db.cursor() - self.src_cursor = src_db.cursor() + DataMigrationTask.setup(self, cfg, src_db, target_db) self.data = GroupedItemsManager() def get_name(self): @@ -52,7 +50,10 @@ class EthnicGroupsMigrator(Component): def process(self): self.extract() self.insert() - print "Ethnic groups/aliases: %d/%d\n" % (len(self.data.groups), self.data.count_items()) + self.stats = { + 'groups': len(self.data.groups), + 'aliases': self.data.count_items() + } diff --git a/trunk/import/migration/tasks/geoethno.py b/trunk/import/migration/tasks/geoethno.py index bc51b0e..3b2326d 100644 --- a/trunk/import/migration/tasks/geoethno.py +++ b/trunk/import/migration/tasks/geoethno.py @@ -2,8 +2,9 @@ from telemeta.core import * import sys import xml.dom.minidom as dom from api import IDataMigrator +from core import DataMigrationTask -class GeoEthnoImporter(Component): +class GeoEthnoImporter(DataMigrationTask): implements(IDataMigrator) @@ -13,8 +14,8 @@ class GeoEthnoImporter(Component): nhistoric_names = 0 def setup(self, cfg, src_db, target_db): - self.db = target_db - self.cursor = self.db.cursor() + DataMigrationTask.setup(self, cfg, src_db, target_db) + self.cursor = self.target_cursor self.dom = dom.parse(cfg.get('geoethno', 'xml_file')) self.known_types = [] @@ -67,9 +68,8 @@ class GeoEthnoImporter(Component): self.nhistoric_names += 1 - if self.nlocations % 1000 == 0: - sys.stdout.write('.') - sys.stdout.flush() +# if self.nlocations % 1000 == 0: +# self.step() def add_aliases(self, name, items): for alias in items: @@ -126,18 +126,20 @@ class GeoEthnoImporter(Component): self.add_aliases(name, self.flatten_node_list(aliasNodes)) self.process_children(n, name) - self.path.pop() + self.path.pop() + if len(self.path) == 1: + self.step() def process(self): self.path = [] self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '') - sys.stdout.write('\nGeoethno import result:\n') - sys.stdout.write(' types: %d\n' % len(self.known_types)) - sys.stdout.write(' locations: %d\n' % self.nlocations) - sys.stdout.write(' relations: %d\n' % self.nrelations) - sys.stdout.write(' aliases: %d\n' % self.naliases) - sys.stdout.write(' historical names: %d\n' % self.nhistoric_names) - + self.stats = { + 'types': len(self.known_types), + 'locations': self.nlocations, + 'relations': self.nrelations, + 'aliases': self.naliases, + 'historical names': self.nhistoric_names + } def warn(self, msg): print u"\nWarning: %s: %s\n" % ("/".join(self.path), msg) diff --git a/trunk/import/migration/tasks/publishers.py b/trunk/import/migration/tasks/publishers.py index f4caaa5..fab36f1 100644 --- a/trunk/import/migration/tasks/publishers.py +++ b/trunk/import/migration/tasks/publishers.py @@ -2,17 +2,15 @@ from telemeta.core import * from api import IDataMigrator -from core import GroupedItemsManager +from core import GroupedItemsManager, DataMigrationTask -class PublishersMigrator(Component): +class PublishersMigrator(DataMigrationTask): groups = {} implements(IDataMigrator) def setup(self, cfg, src_db, target_db): - self.target_db = target_db - self.target_cursor = target_db.cursor() - self.src_cursor = src_db.cursor() + DataMigrationTask.setup(self, cfg, src_db, target_db) self.data = GroupedItemsManager() def get_name(self): @@ -39,5 +37,8 @@ class PublishersMigrator(Component): def process(self): self.extract() self.insert() - print "Publishers/Collections: %d/%d\n" % (len(self.data.groups), self.data.count_items()) + self.stats = { + 'publishers': len(self.data.groups), + 'collections': self.data.count_items() + } diff --git a/trunk/import/migration/tasks/reset.py b/trunk/import/migration/tasks/reset.py index c2fcce6..c2a4a67 100644 --- a/trunk/import/migration/tasks/reset.py +++ b/trunk/import/migration/tasks/reset.py @@ -2,8 +2,9 @@ from telemeta.core import * from api import IDataInitializer +from core import DataMigrationTask -class DatabaseResetMigrator(Component): +class DatabaseResetMigrator(DataMigrationTask): implements(IDataInitializer) @@ -42,9 +43,6 @@ class DatabaseResetMigrator(Component): 'revisions' ] - def setup(self, cfg, src_db, target_db): - self.target_cursor = target_db.cursor() - def get_name(self): return "reset" -- 2.39.5