]> git.parisson.com Git - telemeta.git/commitdiff
the geocoder now works by parsing the Geonames main data file ; removing geopy dependency
authorolivier <>
Wed, 17 Feb 2010 12:42:12 +0000 (12:42 +0000)
committerolivier <>
Wed, 17 Feb 2010 12:42:12 +0000 (12:42 +0000)
INSTALL
telemeta/management/commands/telemeta-geocode.py

diff --git a/INSTALL b/INSTALL
index 3f517d3409ad042652e5ca3c8db8ff9a69cbe2dd..586842231e8b56517773a088df666c08e764b3f6 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -14,7 +14,7 @@ The following third party applications or libraries are required:
                libsndfile1 (>= 1.0.17), python-numpy, python-ctypes (>= 1.0.1),
                python-scikits-audiolab (>= 0.10), python-setuptools (>= 0.6b3),
                python-support (>= 0.3), python-scipy, lame (>= 3.98.2),
-               python-docutils (>= 0.5), geopy >= 0.93 (http://code.google.com/p/geopy)
+               python-docutils (>= 0.5)
 
 :optional:     ecasound, festival, par2
 
index d9d666b802cc830c6cd4ad3a9e009fac5665f36c..166bc4082f48d81af85b819ed4b6a28bc7b05df6 100644 (file)
@@ -1,39 +1,54 @@
 from optparse import make_option
 from django.conf import settings
-from django.core.management.base import NoArgsCommand
+from django.core.management.base import BaseCommand, CommandError
 from telemeta.models import Location
 from telemeta.util.unaccent import unaccent
-import geopy
 import logging
+import codecs
 
-class Command(NoArgsCommand):
-    help = "Update Telemeta Locations latitudes and longitudes (currently only countries)"
+class Command(BaseCommand):
+    help = "Geocode Telemeta countries from a local Geonames data file"
+    args = "path to geoname's allCountries.txt"
 
-    def handle_noargs(self, **options):
+    def handle(self, datafile=None, *args, **options):
 
-        geocoder = geopy.geocoders.Google(settings.TELEMETA_GMAP_KEY)
-        logging.getLogger().setLevel(logging.WARNING)
+        if not datafile:
+            raise CommandError("Please provide the %s" % self.args)
 
+        try:
+            datafile = codecs.open(datafile, 'r', 'utf-8')
+        except IOError:
+            raise CommandError("Unable to open %s" % datafile)
+            
         locations = Location.objects.filter(type=Location.COUNTRY)
+        i = 0
+        geocoded = 0
         total = locations.count()
-        processed = 0
-        success = 0
-        for location in locations:
-            try:
-                r = geocoder.geocode(unaccent(unicode(location)), exactly_one=False)
-                try:
-                    place, (lat, lng) = r.next()
-                    location.latitude = lat
-                    location.longitude = lng
-                    location.save()
-                    success += 1
-                except StopIteration:
-                    pass
-            except ValueError, e: 
-                print "Failed on %s: %s" % (unaccent(unicode(location)), e.message)
-
-            processed += 1
-            if processed % 20 == 0 or processed == total:
-                print "Processed %d/%d locations (success: %d, failures: %d)" \
-                      % (processed, total, success, processed - success)
+        for line in datafile:
+            (geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class,
+             feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code,
+             admin4_code, population, elevation, gtopo30, timezone, modification_date) = line.strip().split("\t")
+           
+            if feature_class == 'A':
+                names = [asciiname.lower()]
+                if alternatenames:
+                    names.extend([unaccent(n).lower() for n in alternatenames.split(',')])
+
+                for l in locations:
+                    if unaccent(l.name).lower() in names:
+                        l.latitude = float(latitude)
+                        l.longitude = float(longitude)
+                        l.save()
+                        geocoded += 1
+
+            i += 1
+
+            if i % 200000 == 0:
+                print "Geocoded %d out of %d countries (parsed %d geonames)" % (geocoded, total, i)
+
+            if total == geocoded:
+                break
+
+        print "Geocoded %d out of %d countries (parsed %d geonames)" % (geocoded, total, i)
+        datafile.close()