From 354f55c5a897645ec95ac6dccc1258c58a475e42 Mon Sep 17 00:00:00 2001 From: olivier Date: Mon, 6 Jul 2009 21:19:39 +0000 Subject: [PATCH] migration: improve output, write to logfile git-svn-id: http://svn.parisson.org/svn/crem@113 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/migrate.py | 15 +-- trunk/import/migration/migration.ini.default | 3 + trunk/import/migration/tasks/api.py | 4 +- trunk/import/migration/tasks/core.py | 115 ++++++++++++++++--- trunk/import/migration/tasks/geoethno.py | 43 ++++--- 5 files changed, 135 insertions(+), 45 deletions(-) diff --git a/trunk/import/migration/migrate.py b/trunk/import/migration/migrate.py index ae7bee7..d2be87d 100644 --- a/trunk/import/migration/migrate.py +++ b/trunk/import/migration/migrate.py @@ -92,16 +92,18 @@ class MigrationManager(Component): passwd = self.cfg.get('target', 'pass'), charset = 'utf8') - self.logger = Logger() + self.logger = Logger(self.cfg.get('report', 'filename')) def run(self, only_task = None): """Run all tasks or a single one identified by its name with only_task""" self.done = [] - self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task)) - self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task)) - - if only_task and not len(self.done): - raise "No such task: %s" % only_task + try: + self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task)) + self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task)) + if only_task and not len(self.done): + raise "No such task: %s" % only_task + except KeyboardInterrupt: + self.logger.interrupt() def list_tasks(self): """Generator listing available tasks as DataMigrationTask instances""" @@ -142,5 +144,4 @@ if __name__ == '__main__': manager.setup(sys.argv[1]) manager.run(only_task) - manager.print_stats() diff --git a/trunk/import/migration/migration.ini.default b/trunk/import/migration/migration.ini.default index ee87cc5..f162ddb 100644 --- a/trunk/import/migration/migration.ini.default +++ b/trunk/import/migration/migration.ini.default @@ -12,3 +12,6 @@ host=127.0.0.1 [geoethno] xml_file=data/geoEthnoTelemeta.xml + +[report] +filename=migration.log diff --git a/trunk/import/migration/tasks/api.py b/trunk/import/migration/tasks/api.py index 32b370f..a84a12e 100644 --- a/trunk/import/migration/tasks/api.py +++ b/trunk/import/migration/tasks/api.py @@ -45,7 +45,9 @@ class IDataMigrationTask(Interface): def process(): """Run the migration task""" - + + def get_stats(): + """Return statistics as a dictionary of keyword/value pairs""" class IDataInitializer(IDataMigrationTask): """Initialization migration tasks interface""" diff --git a/trunk/import/migration/tasks/core.py b/trunk/import/migration/tasks/core.py index 1e84e2a..c7584b0 100644 --- a/trunk/import/migration/tasks/core.py +++ b/trunk/import/migration/tasks/core.py @@ -67,6 +67,9 @@ class DataMigrationTask(Component): def warn(self, msg): self.logger.warn(msg) + def get_stats(self): + return self.stats + class DataMigrator(DataMigrationTask): def build_assignments(self, map): @@ -218,42 +221,88 @@ class Logger(object): CRED = CESC + "31m" CBLUE = CESC + "34m" CCYAN = CESC + "36m" - CRESET = CESC + "0m" + CCURSOR + CDEFAULT = CESC + "0m" + CRESET = CDEFAULT + CCURSOR + CUP = CESC + "1A" + CREVERSENL= "\r" + CUP + + def __init__(self, report_filename = None): + self.report_filename = report_filename def warn(self, message): self.warnings.append(message) - if self.need_newline: - sys.stdout.write("\n") - sys.stdout.write("%s\n" % message) - self.need_newline = False + #if self.need_newline: + # sys.stdout.write("\n") + #sys.stdout.write("%s\n" % message.encode('utf8')) + #self.need_newline = False + + def color(self, msg, color = None): + if color: + color = self.CBOLD + color + else: + color = self.CDEFAULT + stdout.write(color + self.CNOCURSOR + msg) def color1(self, msg): - stdout.write(self.CBOLD + self.CGREEN + msg + self.CRESET) + self.color(msg, self.CGREEN) def color2(self, msg): - stdout.write(self.CBOLD + self.CCYAN + msg + self.CRESET) + self.color(msg, self.CCYAN) def color3(self, msg): - stdout.write(self.CBOLD + self.CRED + msg + self.CRESET) + self.color(msg, self.CRED) - def print_progress(self, ratio, start = False): + def print_progress(self, ratio, end = False): msg = "Running %s %s..." % (self.task.get_name(), self.task.get_type()) msg = "%-46s" % msg + barwidth = 10 p = "" - r = int(round(ratio * 10)) + r = int(round(ratio * barwidth)) for i in range(0, r): p += '=' - for i in range(r, 10): + for i in range(r, barwidth): p += ' ' self.color1("\r" + msg) self.color2(" [%s] %d%%" % (p, ratio * 100)) - self.color3(" warnings: %d" % len(self.warnings)) + if end: + self.color2(" (%.2fs)" % (time.time() - self.start_time)) + + self.color("\n%s" % self.task.__doc__) + #self.color3(" warnings: %d" % len(self.warnings)) + stats = self.task.get_stats() + if len(self.warnings): + stats['warnings'] = len(self.warnings) + if stats: + stdout.write("\n%s" % self.fmt_stats(stats)) + stdout.write("\n") + if not end: + stdout.write(self.CREVERSENL + self.CREVERSENL) + if stats: + stdout.write(self.CREVERSENL) sys.stdout.flush() self.need_newline = True + def _print_progress(self, ratio, end = False): + self.color3("\r[%d%%]" % (ratio * 100)) + #if end: + # self.color2(" (%.2fs)" % (time.time() - self.start_time)) + + #self.color("\n%s" % self.task.__doc__) + #self.color3(" warnings: %d" % len(self.warnings)) + stats = self.task.get_stats() + if len(self.warnings): + stats['warnings'] = len(self.warnings) + if stats: + stdout.write(" %s" % self.fmt_stats(stats)) + + if end: + stdout.write("\n") + sys.stdout.flush() + self.need_newline = True + def start(self, task, count = 1): self.start_time = time.time() @@ -261,11 +310,42 @@ class Logger(object): self.count = count self.position = 0 self.warnings = [] - self.print_progress(0, True) + #msg = "Running %s %s: %s\n" % (self.task.get_name(), self.task.get_type(), self.task.__doc__) + #self.color1(msg) + self.print_progress(0) + if self.report_filename: + self.report_file = open(self.report_filename, "a") + else: + self.report_file = None + + def fmt_stats(self, stats, colors = True): + if colors: + fmt = [k + ": " + self.CBOLD + self.CRED + str(stats[k]) + self.CDEFAULT for k in stats] + else: + fmt = [k + ": " + str(stats[k]) for k in stats] + + return ", ".join(fmt) + + def write_report(self, file): + file.write("========================================================================\n") + file.write("Task: %s\n" % self.task.get_name()) + file.write("Description: %s\n" % self.task.__doc__) + runtime = "%.2fs" % (self.end_time - self.start_time) + file.write("Finished: %s (%s)\n" % (time.strftime("%Y-%m-%d %H:%M:%S"), runtime)) + stats = self.task.get_stats() + if stats: + file.write("Stats: %s\n" % self.fmt_stats(stats, False)) + if self.warnings: + file.write("Warnings:\n") + for w in self.warnings: + file.write("%s\n" % w.encode("utf8").strip()) + file.write("\n") def end(self): - self.print_progress(1) - self.color2(" (%.2fs)\n" % (time.time() - self.start_time)) + self.end_time = time.time() + self.print_progress(1, True) + if self.report_file: + self.write_report(self.report_file) self.need_newline = False self.task = None @@ -276,3 +356,8 @@ class Logger(object): self.position = position self.print_progress(float(self.position) / self.count) + def interrupt(self): + self.color3("Interrupted\n") + stdout.write(self.CRESET) + + diff --git a/trunk/import/migration/tasks/geoethno.py b/trunk/import/migration/tasks/geoethno.py index e1a5433..a3074f0 100644 --- a/trunk/import/migration/tasks/geoethno.py +++ b/trunk/import/migration/tasks/geoethno.py @@ -44,11 +44,6 @@ class GeoEthnoImporter(DataMigrator): implements(IDataMigrator) - nlocations = 0 - nrelations = 0 - naliases = 0 - nhistoric_names = 0 - def setup(self, cfg, src_db, target_db, logger): super(GeoEthnoImporter, self).setup(cfg, src_db, target_db, logger) self.cursor = self.target_cursor @@ -99,26 +94,26 @@ class GeoEthnoImporter(DataMigrator): self.register_type(type) - self.nlocations += self.replace("INSERT INTO locations "+ - "(name, type, complete_type_id, current_name, is_authoritative) "+ - "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1)) + self.stats['locations'] += self.replace("INSERT INTO locations "+ + "(name, type, complete_type_id, current_name, is_authoritative) "+ + "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1)) if (len(parentName)): - self.nrelations += self.replace("INSERT INTO location_relations "+ - "(location_name, parent_location_name) "+ - "VALUE (%s, %s)", (name, parentName)) + self.stats['relations'] += self.replace("INSERT INTO location_relations "+ + "(location_name, parent_location_name) "+ + "VALUE (%s, %s)", (name, parentName)) for hname in historic_names: - self.nhistoric_names += self.replace("INSERT INTO locations "+ - "(name, type, complete_type_id, current_name, is_authoritative) "+ - "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1)) + self.stats['historical names'] += self.replace("INSERT INTO locations "+ + "(name, type, complete_type_id, current_name, is_authoritative) "+ + "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1)) def add_aliases(self, name, items): for alias in items: - self.naliases += self.replace("INSERT INTO location_aliases "+ - "(location_name, alias, is_authoritative) "+ - "VALUES (%s, %s, %s)", (name, alias, 1)) + self.stats['aliases'] += self.replace("INSERT INTO location_aliases "+ + "(location_name, alias, is_authoritative) "+ + "VALUES (%s, %s, %s)", (name, alias, 1)) @@ -180,14 +175,18 @@ class GeoEthnoImporter(DataMigrator): def process(self): self.path = [] self.start(len(self.dom.getElementsByTagName('TERMES-SPECIFIQUES'))) - self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '') + self.target("DELETE FROM locations") + self.target("DELETE FROM location_relations") + self.target("DELETE FROM location_aliases") + self.target("DELETE FROM location_types") self.stats = { 'types': len(self.known_types), - 'locations': self.nlocations, - 'relations': self.nrelations, - 'aliases': self.naliases, - 'historical names': self.nhistoric_names + 'locations': 0, + 'relations': 0, + 'aliases': 0, + 'historical names': 0 } + self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '') self.end() -- 2.39.5