]> git.parisson.com Git - telemeta-data.git/commitdiff
migration: add ethnic groups and publishers migrators
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Wed, 4 Mar 2009 16:46:38 +0000 (16:46 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Wed, 4 Mar 2009 16:46:38 +0000 (16:46 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@66 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/core.py [new file with mode: 0644]
trunk/import/migration/ethnic.py [new file with mode: 0644]
trunk/import/migration/migrate.py
trunk/import/migration/publishers.py [new file with mode: 0644]

diff --git a/trunk/import/migration/core.py b/trunk/import/migration/core.py
new file mode 100644 (file)
index 0000000..3ff5e51
--- /dev/null
@@ -0,0 +1,50 @@
+
+class GroupedItemsManager(object):
+
+    def __init__(self):
+        self.groups = {}
+    
+    def append_group(self, group):
+        group = group.strip()
+        if not len(group):
+            return
+        try:
+            self.groups[group]
+        except KeyError:
+            self.groups[group] = []
+
+    def append_item(self, group, item, detect_group=False):
+        group = group.strip()
+        item = item.strip()
+        if not len(group) or not len(item):
+            return
+
+        g = None
+        
+        if detect_group:
+            try:
+                self.groups[group]
+                g = group
+                i = item
+            except KeyError:
+                try:
+                    self.groups[item]
+                    g = item
+                    i = group
+                except KeyError:
+                    pass
+        else:
+            g = group
+            i = item
+
+        if g:
+            try:
+                self.groups[g].index(i)
+            except ValueError:
+                self.groups[g].append(i)
+
+    def count_items(self):
+        nitems = 0
+        for i in self.groups:
+            nitems += len(self.groups[i])
+        return nitems
diff --git a/trunk/import/migration/ethnic.py b/trunk/import/migration/ethnic.py
new file mode 100644 (file)
index 0000000..349de6b
--- /dev/null
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+from telemeta.core import *
+from api import IDataMigrator
+from core import GroupedItemsManager
+
+class EthnicGroupsMigrator(Component):
+
+    implements(IDataMigrator)
+
+    def setup(self, cfg, src_db, target_db):
+        self.target_db = target_db
+        self.target_cursor = target_db.cursor()
+        self.src_cursor = src_db.cursor()
+        self.data = GroupedItemsManager()
+
+    def get_name(self):
+        return "ethnicgroups"
+    
+    def extract(self):
+        self.src_cursor.execute("SELECT DISTINCT Ethnie_GrSocial FROM Phono WHERE Ethnie_GrSocial <> ''");
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            self.data.append_group(row[0])
+
+        self.src_cursor.execute("SELECT DISTINCT Ethnie FROM Ethnie WHERE Ethnie <> ''");
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            self.data.append_group(row[0])
+
+
+        self.src_cursor.execute("SELECT t1.Alias, t2.Alias FROM Alias_Ethnie AS t1 INNER JOIN Alias_Ethnie AS t2 "+
+                                "ON t1.Numero = t2.Numero WHERE t1.Alias <> t2.Alias")
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            self.data.append_item(row[0], row[1], detect_group=True)
+
+    def insert(self):
+        for group in self.data.groups:
+            self.target_cursor.execute("INSERT INTO ethnic_groups (name) VALUES(%s)", (group,))
+            id = self.target_db.insert_id()
+            for alias in self.data.groups[group]:
+                self.target_cursor.execute("INSERT INTO ethnic_group_aliases (ethnic_group_id, name) "+
+                                           "VALUES(%s, %s)", (id, alias))
+
+    def process(self):
+        self.extract()
+        self.insert()
+        print "Ethnic groups/aliases: %d/%d\n" % (len(self.data.groups), self.data.count_items())
+                    
+        
+
+
+            
+            
+        
+
+
+
index 4bb5fd9abfbcdc2e920b0cc3e2ac7c6803bc1260..74ebe27d98ddb06cf18d5721c6b62c11a3cdec29 100644 (file)
@@ -9,6 +9,8 @@ from api import IDataMigrator
 import reset
 import enums
 import geoethno
+import ethnic
+import publishers
 
 class Migrator(Component):
     data_migrators = ExtensionPoint(IDataMigrator)
diff --git a/trunk/import/migration/publishers.py b/trunk/import/migration/publishers.py
new file mode 100644 (file)
index 0000000..f4caaa5
--- /dev/null
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+from telemeta.core import *
+from api import IDataMigrator
+from core import GroupedItemsManager
+
+class PublishersMigrator(Component):
+    groups = {}
+
+    implements(IDataMigrator)
+
+    def setup(self, cfg, src_db, target_db):
+        self.target_db = target_db
+        self.target_cursor = target_db.cursor()
+        self.src_cursor = src_db.cursor()
+        self.data = GroupedItemsManager()
+
+    def get_name(self):
+        return "publishers"
+    
+    def extract(self):
+        self.src_cursor.execute("SELECT DISTINCT Editeur, Collect_Serie FROM Support WHERE Editeur <> ''");
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            self.data.append_group(row[0])
+            self.data.append_item(row[0], row[1])
+
+    def insert(self):
+        for publisher in self.data.groups:
+            self.target_cursor.execute("INSERT INTO publishers (value) VALUES(%s)", (publisher,))
+            id = self.target_db.insert_id()
+            for collection in self.data.groups[publisher]:
+                self.target_cursor.execute("INSERT INTO publisher_collections (publisher_id, value) "+
+                                           "VALUES(%s, %s)", (id, collection))
+
+
+    def process(self):
+        self.extract()
+        self.insert()
+        print "Publishers/Collections: %d/%d\n" % (len(self.data.groups), self.data.count_items())
+