]> git.parisson.com Git - telemeta-data.git/commitdiff
migration: improve output, write to logfile
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 6 Jul 2009 21:19:39 +0000 (21:19 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 6 Jul 2009 21:19:39 +0000 (21:19 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@113 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/migrate.py
trunk/import/migration/migration.ini.default
trunk/import/migration/tasks/api.py
trunk/import/migration/tasks/core.py
trunk/import/migration/tasks/geoethno.py

index ae7bee723c254570f7317926b52caf950be786a4..d2be87de0b743c2bcf08887b30e69fb283bd22ec 100644 (file)
@@ -92,16 +92,18 @@ class MigrationManager(Component):
             passwd  = self.cfg.get('target', 'pass'),
             charset = 'utf8')
 
-        self.logger = Logger()            
+        self.logger = Logger(self.cfg.get('report', 'filename'))            
 
     def run(self, only_task = None):
         """Run all tasks or a single one identified by its name with only_task"""
         self.done = []
-        self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task))
-        self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task))
-
-        if only_task and not len(self.done):
-            raise "No such task: %s" % only_task
+        try:
+            self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task))
+            self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task))
+            if only_task and not len(self.done):
+                raise "No such task: %s" % only_task
+        except KeyboardInterrupt:
+            self.logger.interrupt()
 
     def list_tasks(self):
         """Generator listing available tasks as DataMigrationTask instances"""
@@ -142,5 +144,4 @@ if __name__ == '__main__':
 
     manager.setup(sys.argv[1])
     manager.run(only_task)
-    manager.print_stats()
         
index ee87cc5410809e5002595b49b9243d7400d4e9f4..f162ddbc6dcdb963d90a49abe90e034ebf93a8f5 100644 (file)
@@ -12,3 +12,6 @@ host=127.0.0.1
 
 [geoethno]
 xml_file=data/geoEthnoTelemeta.xml
+
+[report]
+filename=migration.log
index 32b370fae16e21825e2455a83210e9671524c06b..a84a12e778633214b84a340562f5b8b2bc3f1a89 100644 (file)
@@ -45,7 +45,9 @@ class IDataMigrationTask(Interface):
 
     def process():
         """Run the migration task"""
-            
+           
+    def get_stats():
+        """Return statistics as a dictionary of keyword/value pairs"""
 
 class IDataInitializer(IDataMigrationTask):
     """Initialization migration tasks interface"""
index 1e84e2a47eb217de7b31c608663086d6dfa7c637..c7584b035b8f081a75b6655a36a2255838919876 100644 (file)
@@ -67,6 +67,9 @@ class DataMigrationTask(Component):
     def warn(self, msg):
         self.logger.warn(msg)
 
+    def get_stats(self):
+        return self.stats
+
 class DataMigrator(DataMigrationTask):
 
     def build_assignments(self, map):
@@ -218,42 +221,88 @@ class Logger(object):
     CRED      = CESC + "31m"
     CBLUE     = CESC + "34m"
     CCYAN     = CESC + "36m"
-    CRESET    = CESC + "0m" + CCURSOR
+    CDEFAULT  = CESC + "0m"
+    CRESET    = CDEFAULT + CCURSOR
+    CUP       = CESC + "1A" 
+    CREVERSENL= "\r" + CUP
+
+    def __init__(self, report_filename = None):
+        self.report_filename = report_filename
 
     def warn(self, message):
         self.warnings.append(message)
-        if self.need_newline:
-            sys.stdout.write("\n")
-        sys.stdout.write("%s\n" % message)
-        self.need_newline = False
+        #if self.need_newline:
+        #    sys.stdout.write("\n")
+        #sys.stdout.write("%s\n" % message.encode('utf8'))
+        #self.need_newline = False
+
+    def color(self, msg, color = None):
+        if color:
+            color = self.CBOLD + color
+        else:
+            color = self.CDEFAULT
+        stdout.write(color + self.CNOCURSOR + msg)
 
     def color1(self, msg):
-        stdout.write(self.CBOLD + self.CGREEN + msg + self.CRESET)
+        self.color(msg, self.CGREEN)
 
     def color2(self, msg):
-        stdout.write(self.CBOLD + self.CCYAN + msg + self.CRESET)
+        self.color(msg, self.CCYAN)
 
     def color3(self, msg):
-        stdout.write(self.CBOLD + self.CRED + msg + self.CRESET)
+        self.color(msg, self.CRED)
 
-    def print_progress(self, ratio, start = False):
+    def print_progress(self, ratio, end = False):
         msg = "Running %s %s..." % (self.task.get_name(), self.task.get_type())
         msg = "%-46s" % msg
 
+        barwidth = 10
         p = ""
-        r = int(round(ratio * 10))
+        r = int(round(ratio * barwidth))
         for i in range(0, r):
             p += '='
-        for i in range(r, 10):
+        for i in range(r, barwidth):
             p += ' '
 
         self.color1("\r" + msg)
         self.color2(" [%s] %d%%" % (p, ratio * 100))
-        self.color3(" warnings: %d" % len(self.warnings))
+        if end:
+            self.color2(" (%.2fs)" % (time.time() - self.start_time))
+            
+        self.color("\n%s" % self.task.__doc__)
+        #self.color3(" warnings: %d" % len(self.warnings))
+        stats = self.task.get_stats()
+        if len(self.warnings):
+            stats['warnings'] = len(self.warnings)
+        if stats:    
+            stdout.write("\n%s" % self.fmt_stats(stats))
+        stdout.write("\n")
+        if not end:
+            stdout.write(self.CREVERSENL + self.CREVERSENL)
+            if stats:
+                stdout.write(self.CREVERSENL)
 
         sys.stdout.flush()
         self.need_newline = True
         
+    def _print_progress(self, ratio, end = False):
+        self.color3("\r[%d%%]" % (ratio * 100))
+        #if end:
+        #    self.color2(" (%.2fs)" % (time.time() - self.start_time))
+            
+        #self.color("\n%s" % self.task.__doc__)
+        #self.color3(" warnings: %d" % len(self.warnings))
+        stats = self.task.get_stats()
+        if len(self.warnings):
+            stats['warnings'] = len(self.warnings)
+        if stats:    
+            stdout.write(" %s" % self.fmt_stats(stats))
+
+        if end:
+            stdout.write("\n")
+        sys.stdout.flush()
+        self.need_newline = True
+        
 
     def start(self, task, count = 1):
         self.start_time = time.time()
@@ -261,11 +310,42 @@ class Logger(object):
         self.count  = count
         self.position = 0
         self.warnings = []
-        self.print_progress(0, True)
+        #msg = "Running %s %s: %s\n" % (self.task.get_name(), self.task.get_type(), self.task.__doc__)
+        #self.color1(msg)
+        self.print_progress(0)
+        if self.report_filename:
+            self.report_file = open(self.report_filename, "a")
+        else:
+            self.report_file = None
+
+    def fmt_stats(self, stats, colors = True):
+        if colors:
+            fmt = [k + ": " + self.CBOLD + self.CRED + str(stats[k]) + self.CDEFAULT for k in stats]
+        else:
+            fmt = [k + ": " + str(stats[k]) for k in stats]
+            
+        return ", ".join(fmt)
+
+    def write_report(self, file):
+        file.write("========================================================================\n")
+        file.write("Task:        %s\n" % self.task.get_name())
+        file.write("Description: %s\n" % self.task.__doc__)
+        runtime = "%.2fs" % (self.end_time - self.start_time)
+        file.write("Finished:    %s (%s)\n" % (time.strftime("%Y-%m-%d %H:%M:%S"), runtime))
+        stats = self.task.get_stats()
+        if stats:
+            file.write("Stats:       %s\n" % self.fmt_stats(stats, False))
+        if self.warnings:
+            file.write("Warnings:\n")
+            for w in self.warnings:
+                file.write("%s\n" % w.encode("utf8").strip())
+        file.write("\n")
 
     def end(self):
-        self.print_progress(1)
-        self.color2(" (%.2fs)\n" % (time.time() - self.start_time))
+        self.end_time = time.time()
+        self.print_progress(1, True)
+        if self.report_file:
+            self.write_report(self.report_file)
         self.need_newline = False
         self.task = None
 
@@ -276,3 +356,8 @@ class Logger(object):
             self.position = position
         self.print_progress(float(self.position) / self.count)
 
+    def interrupt(self):
+        self.color3("Interrupted\n")
+        stdout.write(self.CRESET)
+        
+
index e1a543334adfd60172324d4f9805282b25439fc7..a3074f0691db2be5024690ec77be962e5ec06b07 100644 (file)
@@ -44,11 +44,6 @@ class GeoEthnoImporter(DataMigrator):
 
     implements(IDataMigrator)
 
-    nlocations = 0
-    nrelations = 0
-    naliases = 0
-    nhistoric_names = 0
-
     def setup(self, cfg, src_db, target_db, logger):
         super(GeoEthnoImporter, self).setup(cfg, src_db, target_db, logger)
         self.cursor = self.target_cursor
@@ -99,26 +94,26 @@ class GeoEthnoImporter(DataMigrator):
 
         self.register_type(type)
 
-        self.nlocations += self.replace("INSERT INTO locations "+
-                                        "(name, type, complete_type_id, current_name, is_authoritative) "+
-                                        "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1))
+        self.stats['locations'] += self.replace("INSERT INTO locations "+
+                                                "(name, type, complete_type_id, current_name, is_authoritative) "+
+                                                "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1))
 
         if (len(parentName)):
-            self.nrelations += self.replace("INSERT INTO location_relations "+
-                                            "(location_name, parent_location_name) "+
-                                            "VALUE (%s, %s)", (name, parentName))
+            self.stats['relations'] += self.replace("INSERT INTO location_relations "+
+                                                   "(location_name, parent_location_name) "+
+                                                   "VALUE (%s, %s)", (name, parentName))
 
         for hname in historic_names:
-            self.nhistoric_names += self.replace("INSERT INTO locations "+
-                                                 "(name, type, complete_type_id, current_name, is_authoritative) "+
-                                                 "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1))
+            self.stats['historical names'] += self.replace("INSERT INTO locations "+
+                                                           "(name, type, complete_type_id, current_name, is_authoritative) "+
+                                                           "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1))
                     
 
     def add_aliases(self, name, items):
         for alias in items:
-            self.naliases += self.replace("INSERT INTO location_aliases "+
-                                          "(location_name, alias, is_authoritative) "+
-                                          "VALUES (%s, %s, %s)", (name, alias, 1))
+            self.stats['aliases'] += self.replace("INSERT INTO location_aliases "+
+                                                  "(location_name, alias, is_authoritative) "+
+                                                  "VALUES (%s, %s, %s)", (name, alias, 1))
                 
 
 
@@ -180,14 +175,18 @@ class GeoEthnoImporter(DataMigrator):
     def process(self):
         self.path = []
         self.start(len(self.dom.getElementsByTagName('TERMES-SPECIFIQUES')))
-        self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '')
+        self.target("DELETE FROM locations")
+        self.target("DELETE FROM location_relations")
+        self.target("DELETE FROM location_aliases")
+        self.target("DELETE FROM location_types")
         self.stats = {
             'types':            len(self.known_types),
-            'locations':        self.nlocations,
-            'relations':        self.nrelations,
-            'aliases':          self.naliases,
-            'historical names': self.nhistoric_names
+            'locations':        0,
+            'relations':        0,
+            'aliases':          0,
+            'historical names': 0
         }
+        self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '')
         self.end()