from telemeta.core import *
from api import IDataMigrator
from core import DataMigrator, EnumMapper
+import re
+import sys
class ItemsCopyMigrator(DataMigrator):
"""Perform a preliminary raw copy of the item table"""
def get_name(self):
return "items:locations"
- def is_descendant_of(self, location, ascendant, ascendant_type):
+ def is_descendant_of(self, location, ascendant, ascendant_type = None):
self.target("SELECT l.name, l.type FROM location_relations AS r "
"INNER JOIN locations AS l ON r.parent_location_name = l.name "
"WHERE r.location_name = %s",
if not row:
break
parent_name, parent_type = row
- if parent_name == ascendant and parent_type == ascendant_type:
+ if parent_name == ascendant and ((not ascendant_type) or parent_type == ascendant_type):
return True
else:
if self.is_descendant_of(parent_name, ascendant, ascendant_type):
return True
return False
- def find_location(self, name_or_alias, type):
- self.target("SELECT name FROM locations AS l INNER JOIN location_aliases AS a ON l.name = a.location_name "
- "WHERE l.type = %s AND (l.name LIKE %s OR a.alias LIKE %s)",
- (type, name_or_alias, name_or_alias))
+ def find_location(self, name_or_alias, type = None):
+ select = "SELECT name, type FROM locations AS l INNER JOIN location_aliases AS a ON l.name = a.location_name"
+ if type:
+ self.target(select + " WHERE l.type = %s AND (l.name LIKE %s OR a.alias LIKE %s)",
+ (type, name_or_alias, name_or_alias))
+ else:
+ self.target(select + " WHERE l.name LIKE %s OR a.alias LIKE %s",
+ (name_or_alias, name_or_alias))
+
if self.target_cursor.rowcount:
- return self.target_cursor.fetchone()[0]
- return None
+ return self.target_cursor.fetchone()
+ return (None, None)
def concat(self, locality, country, continent = None):
pieces = []
return u'-'.join(pieces)
+ def parse_location_str(self, str):
+ str = re.sub("\*", "", str.strip())
+ str = re.sub(" +", " ", str)
+ return re.split(" *[,;/] *", str)
+
+ def find_location_by_sequence(self, sequence, matched = None, unmatched = None):
+ found = None
+ found_type = None
+ broken = False
+ for l in sequence:
+ if not broken:
+ location, type = self.find_location(l)
+ if not location:
+ broken = True
+ if not found or self.is_descendant_of(location, found):
+ found = location
+ found_type = type
+ if not matched is None:
+ matched.append(found)
+ else:
+ broken = True
+ if broken:
+ if not unmatched is None:
+ unmatched.append(l)
+
+ return (found, found_type)
+
def process(self):
self.target("UPDATE media_items SET location_name = NULL, location_comment = ''")
if not row:
break
oldcode, continent, country, locality = row
- continent = continent.strip()
- country = country.strip()
- locality = locality.strip()
-
- comment = ''
- location = self.find_location(continent, "continent")
- if location:
- c = self.find_location(country, "country")
- if c and self.is_descendant_of(c, location, 'continent'):
- location = c
- l = self.find_location(locality, "other")
- if l and self.is_descendant_of(l, location, 'country'):
- location = l
+ sequence = []
+ sequence.extend(self.parse_location_str(continent))
+ sequence.extend(self.parse_location_str(country))
+ sequence.extend(self.parse_location_str(locality))
+
+ if sequence:
+ matched = []
+ unmatched = []
+ location, type = self.find_location_by_sequence(sequence, matched, unmatched)
+ if location:
+ if type == 'continent':
+ self.stats['continent'] += 1
+ elif type == 'country':
+ self.stats['country'] += 1
+ elif type == 'other':
self.stats['fullmap'] += 1
+ #sys.stdout.write("fullmap: %s\n" % str(matched))
else:
- comment = locality
- self.stats['country'] += 1
+ raise "Undetermined location type: %s" % type
else:
- comment = self.concat(locality, country)
- self.stats['continent'] += 1
- else:
- comment = self.concat(locality, country, continent)
-
- if location or comment:
+ self.stats['nomap'] += 1
+
+ comment = ", ".join(unmatched)
self.target("UPDATE media_items SET location_name = %s, location_comment = %s WHERE old_code = %s",
(location, comment, oldcode))
- elif not comment:
- self.stats['empty'] += 1
else:
- self.stats['nomap'] += 1
+ self.stats['empty'] += 1
self.step()