]> git.parisson.com Git - telemeta-data.git/commitdiff
add collections recording year migrator
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 9 Jun 2009 15:31:34 +0000 (15:31 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 9 Jun 2009 15:31:34 +0000 (15:31 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@91 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/tasks/collections.py

index a0aaf099c3d34203f0c76f8d5368487570b388ee..2d3978203aa886d5efeb19f1456af8c51494d6d2 100644 (file)
@@ -37,6 +37,7 @@ from core import DataMigrator
 from _mysql_exceptions import IntegrityError
 from MySQLdb.constants.ER import DUP_ENTRY
 import re
+from datetime import date
 
 class CollectionsCopyMigrator(DataMigrator):
     """Perform a preliminary raw copy of the collection table"""
@@ -384,4 +385,71 @@ class CollectionsPublishersMapper(DataMigrator):
         self.target_cursor.execute("SELECT COUNT(*) FROM media_collections WHERE publisher_collection_id IS NOT NULL")
         self.stats['with_collection'] = self.target_cursor.fetchone()[0]
 
+class CollectionsYearConverter(DataMigrator):
+    """Convert collection recording years"""
+
+    implements(IDataMigrator)
+
+    def get_name(self):
+        return "collections:date"
+
+    def parse_year(self, year):
+        if year < 100:
+            return year + 1900
+        elif year >= 1900 and year <= date.today().year:
+            return year
+
+        return 0
+
+    def process(self):
+        
+        self.stats = {
+            'total':          0,
+            'unsignificant':  0,
+            'unparsed':       0,
+            'migrated':       0,
+            'empty':          0,
+            'incoherent':     0
+        }
+   
+        self.src_cursor.execute("SELECT Cote, Annee_Enr FROM Support")
+
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+
+            old_code = row[0]                
+            year_str = row[1]
+
+            from_year = 0
+            to_year   = 0
+
+            self.stats['total'] += 1
+            if year_str == '':
+                self.stats['empty'] += 1
+            elif re.match('^[+-]?[Nn12?]$', year_str):
+                self.stats['unsignificant'] += 1
+            else:
+                match = re.match(r'^<?([0-9]{2,4})[;/,>-]\'?([0-9]{2,4})[\';/,>?-]?$', year_str)
+                if not match:
+                    match = re.match(r'^([0-9]{2,4})[;/,>?-]?$', year_str)
+                
+                if match:
+                    from_year = self.parse_year(int(match.group(1)))
+                    if match.lastindex > 1:
+                        to_year = self.parse_year(int(match.group(2)))
+                    else:
+                        to_year = from_year
+
+                    if from_year or to_year and (not to_year or to_year >= from_year):
+                        self.stats['migrated'] += 1
+                    else:    
+                        self.stats['incoherent'] += 1
+
+                else:
+                    self.stats['unparsed'] += 1
+
+            self.target_cursor.execute("UPDATE media_collections SET recorded_from_year = %s, "
+                                       "recorded_to_year = %s WHERE old_code = %s", (from_year, to_year, old_code))