passwd = self.cfg.get('target', 'pass'),
charset = 'utf8')
- self.logger = Logger()
+ self.logger = Logger(self.cfg.get('report', 'filename'))
def run(self, only_task = None):
"""Run all tasks or a single one identified by its name with only_task"""
self.done = []
- self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task))
- self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task))
-
- if only_task and not len(self.done):
- raise "No such task: %s" % only_task
+ try:
+ self.done.extend(self.run_tasks(self.initializers, "initializer", only_task = only_task))
+ self.done.extend(self.run_tasks(self.migrators, "migrator", only_task = only_task))
+ if only_task and not len(self.done):
+ raise "No such task: %s" % only_task
+ except KeyboardInterrupt:
+ self.logger.interrupt()
def list_tasks(self):
"""Generator listing available tasks as DataMigrationTask instances"""
manager.setup(sys.argv[1])
manager.run(only_task)
- manager.print_stats()
[geoethno]
xml_file=data/geoEthnoTelemeta.xml
+
+[report]
+filename=migration.log
def process():
"""Run the migration task"""
-
+
+ def get_stats():
+ """Return statistics as a dictionary of keyword/value pairs"""
class IDataInitializer(IDataMigrationTask):
"""Initialization migration tasks interface"""
def warn(self, msg):
self.logger.warn(msg)
+ def get_stats(self):
+ return self.stats
+
class DataMigrator(DataMigrationTask):
def build_assignments(self, map):
CRED = CESC + "31m"
CBLUE = CESC + "34m"
CCYAN = CESC + "36m"
- CRESET = CESC + "0m" + CCURSOR
+ CDEFAULT = CESC + "0m"
+ CRESET = CDEFAULT + CCURSOR
+ CUP = CESC + "1A"
+ CREVERSENL= "\r" + CUP
+
+ def __init__(self, report_filename = None):
+ self.report_filename = report_filename
def warn(self, message):
self.warnings.append(message)
- if self.need_newline:
- sys.stdout.write("\n")
- sys.stdout.write("%s\n" % message)
- self.need_newline = False
+ #if self.need_newline:
+ # sys.stdout.write("\n")
+ #sys.stdout.write("%s\n" % message.encode('utf8'))
+ #self.need_newline = False
+
+ def color(self, msg, color = None):
+ if color:
+ color = self.CBOLD + color
+ else:
+ color = self.CDEFAULT
+ stdout.write(color + self.CNOCURSOR + msg)
def color1(self, msg):
- stdout.write(self.CBOLD + self.CGREEN + msg + self.CRESET)
+ self.color(msg, self.CGREEN)
def color2(self, msg):
- stdout.write(self.CBOLD + self.CCYAN + msg + self.CRESET)
+ self.color(msg, self.CCYAN)
def color3(self, msg):
- stdout.write(self.CBOLD + self.CRED + msg + self.CRESET)
+ self.color(msg, self.CRED)
- def print_progress(self, ratio, start = False):
+ def print_progress(self, ratio, end = False):
msg = "Running %s %s..." % (self.task.get_name(), self.task.get_type())
msg = "%-46s" % msg
+ barwidth = 10
p = ""
- r = int(round(ratio * 10))
+ r = int(round(ratio * barwidth))
for i in range(0, r):
p += '='
- for i in range(r, 10):
+ for i in range(r, barwidth):
p += ' '
self.color1("\r" + msg)
self.color2(" [%s] %d%%" % (p, ratio * 100))
- self.color3(" warnings: %d" % len(self.warnings))
+ if end:
+ self.color2(" (%.2fs)" % (time.time() - self.start_time))
+
+ self.color("\n%s" % self.task.__doc__)
+ #self.color3(" warnings: %d" % len(self.warnings))
+ stats = self.task.get_stats()
+ if len(self.warnings):
+ stats['warnings'] = len(self.warnings)
+ if stats:
+ stdout.write("\n%s" % self.fmt_stats(stats))
+ stdout.write("\n")
+ if not end:
+ stdout.write(self.CREVERSENL + self.CREVERSENL)
+ if stats:
+ stdout.write(self.CREVERSENL)
sys.stdout.flush()
self.need_newline = True
+ def _print_progress(self, ratio, end = False):
+ self.color3("\r[%d%%]" % (ratio * 100))
+ #if end:
+ # self.color2(" (%.2fs)" % (time.time() - self.start_time))
+
+ #self.color("\n%s" % self.task.__doc__)
+ #self.color3(" warnings: %d" % len(self.warnings))
+ stats = self.task.get_stats()
+ if len(self.warnings):
+ stats['warnings'] = len(self.warnings)
+ if stats:
+ stdout.write(" %s" % self.fmt_stats(stats))
+
+ if end:
+ stdout.write("\n")
+ sys.stdout.flush()
+ self.need_newline = True
+
def start(self, task, count = 1):
self.start_time = time.time()
self.count = count
self.position = 0
self.warnings = []
- self.print_progress(0, True)
+ #msg = "Running %s %s: %s\n" % (self.task.get_name(), self.task.get_type(), self.task.__doc__)
+ #self.color1(msg)
+ self.print_progress(0)
+ if self.report_filename:
+ self.report_file = open(self.report_filename, "a")
+ else:
+ self.report_file = None
+
+ def fmt_stats(self, stats, colors = True):
+ if colors:
+ fmt = [k + ": " + self.CBOLD + self.CRED + str(stats[k]) + self.CDEFAULT for k in stats]
+ else:
+ fmt = [k + ": " + str(stats[k]) for k in stats]
+
+ return ", ".join(fmt)
+
+ def write_report(self, file):
+ file.write("========================================================================\n")
+ file.write("Task: %s\n" % self.task.get_name())
+ file.write("Description: %s\n" % self.task.__doc__)
+ runtime = "%.2fs" % (self.end_time - self.start_time)
+ file.write("Finished: %s (%s)\n" % (time.strftime("%Y-%m-%d %H:%M:%S"), runtime))
+ stats = self.task.get_stats()
+ if stats:
+ file.write("Stats: %s\n" % self.fmt_stats(stats, False))
+ if self.warnings:
+ file.write("Warnings:\n")
+ for w in self.warnings:
+ file.write("%s\n" % w.encode("utf8").strip())
+ file.write("\n")
def end(self):
- self.print_progress(1)
- self.color2(" (%.2fs)\n" % (time.time() - self.start_time))
+ self.end_time = time.time()
+ self.print_progress(1, True)
+ if self.report_file:
+ self.write_report(self.report_file)
self.need_newline = False
self.task = None
self.position = position
self.print_progress(float(self.position) / self.count)
+ def interrupt(self):
+ self.color3("Interrupted\n")
+ stdout.write(self.CRESET)
+
+
implements(IDataMigrator)
- nlocations = 0
- nrelations = 0
- naliases = 0
- nhistoric_names = 0
-
def setup(self, cfg, src_db, target_db, logger):
super(GeoEthnoImporter, self).setup(cfg, src_db, target_db, logger)
self.cursor = self.target_cursor
self.register_type(type)
- self.nlocations += self.replace("INSERT INTO locations "+
- "(name, type, complete_type_id, current_name, is_authoritative) "+
- "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1))
+ self.stats['locations'] += self.replace("INSERT INTO locations "+
+ "(name, type, complete_type_id, current_name, is_authoritative) "+
+ "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1))
if (len(parentName)):
- self.nrelations += self.replace("INSERT INTO location_relations "+
- "(location_name, parent_location_name) "+
- "VALUE (%s, %s)", (name, parentName))
+ self.stats['relations'] += self.replace("INSERT INTO location_relations "+
+ "(location_name, parent_location_name) "+
+ "VALUE (%s, %s)", (name, parentName))
for hname in historic_names:
- self.nhistoric_names += self.replace("INSERT INTO locations "+
- "(name, type, complete_type_id, current_name, is_authoritative) "+
- "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1))
+ self.stats['historical names'] += self.replace("INSERT INTO locations "+
+ "(name, type, complete_type_id, current_name, is_authoritative) "+
+ "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1))
def add_aliases(self, name, items):
for alias in items:
- self.naliases += self.replace("INSERT INTO location_aliases "+
- "(location_name, alias, is_authoritative) "+
- "VALUES (%s, %s, %s)", (name, alias, 1))
+ self.stats['aliases'] += self.replace("INSERT INTO location_aliases "+
+ "(location_name, alias, is_authoritative) "+
+ "VALUES (%s, %s, %s)", (name, alias, 1))
def process(self):
self.path = []
self.start(len(self.dom.getElementsByTagName('TERMES-SPECIFIQUES')))
- self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '')
+ self.target("DELETE FROM locations")
+ self.target("DELETE FROM location_relations")
+ self.target("DELETE FROM location_aliases")
+ self.target("DELETE FROM location_types")
self.stats = {
'types': len(self.known_types),
- 'locations': self.nlocations,
- 'relations': self.nrelations,
- 'aliases': self.naliases,
- 'historical names': self.nhistoric_names
+ 'locations': 0,
+ 'relations': 0,
+ 'aliases': 0,
+ 'historical names': 0
}
+ self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '')
self.end()