From: olivier <> Date: Wed, 17 Feb 2010 12:42:12 +0000 (+0000) Subject: the geocoder now works by parsing the Geonames main data file ; removing geopy dependency X-Git-Tag: 1.1~516 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=86850cca58b855081fa7cf0b1c553cdb2937c706;p=telemeta.git the geocoder now works by parsing the Geonames main data file ; removing geopy dependency --- diff --git a/INSTALL b/INSTALL index 3f517d34..58684223 100644 --- a/INSTALL +++ b/INSTALL @@ -14,7 +14,7 @@ The following third party applications or libraries are required: libsndfile1 (>= 1.0.17), python-numpy, python-ctypes (>= 1.0.1), python-scikits-audiolab (>= 0.10), python-setuptools (>= 0.6b3), python-support (>= 0.3), python-scipy, lame (>= 3.98.2), - python-docutils (>= 0.5), geopy >= 0.93 (http://code.google.com/p/geopy) + python-docutils (>= 0.5) :optional: ecasound, festival, par2 diff --git a/telemeta/management/commands/telemeta-geocode.py b/telemeta/management/commands/telemeta-geocode.py index d9d666b8..166bc408 100644 --- a/telemeta/management/commands/telemeta-geocode.py +++ b/telemeta/management/commands/telemeta-geocode.py @@ -1,39 +1,54 @@ from optparse import make_option from django.conf import settings -from django.core.management.base import NoArgsCommand +from django.core.management.base import BaseCommand, CommandError from telemeta.models import Location from telemeta.util.unaccent import unaccent -import geopy import logging +import codecs -class Command(NoArgsCommand): - help = "Update Telemeta Locations latitudes and longitudes (currently only countries)" +class Command(BaseCommand): + help = "Geocode Telemeta countries from a local Geonames data file" + args = "path to geoname's allCountries.txt" - def handle_noargs(self, **options): + def handle(self, datafile=None, *args, **options): - geocoder = geopy.geocoders.Google(settings.TELEMETA_GMAP_KEY) - logging.getLogger().setLevel(logging.WARNING) + if not datafile: + raise CommandError("Please provide the %s" % self.args) + try: + datafile = codecs.open(datafile, 'r', 'utf-8') + except IOError: + raise CommandError("Unable to open %s" % datafile) + locations = Location.objects.filter(type=Location.COUNTRY) + i = 0 + geocoded = 0 total = locations.count() - processed = 0 - success = 0 - for location in locations: - try: - r = geocoder.geocode(unaccent(unicode(location)), exactly_one=False) - try: - place, (lat, lng) = r.next() - location.latitude = lat - location.longitude = lng - location.save() - success += 1 - except StopIteration: - pass - except ValueError, e: - print "Failed on %s: %s" % (unaccent(unicode(location)), e.message) - - processed += 1 - if processed % 20 == 0 or processed == total: - print "Processed %d/%d locations (success: %d, failures: %d)" \ - % (processed, total, success, processed - success) + for line in datafile: + (geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class, + feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code, + admin4_code, population, elevation, gtopo30, timezone, modification_date) = line.strip().split("\t") + + if feature_class == 'A': + names = [asciiname.lower()] + if alternatenames: + names.extend([unaccent(n).lower() for n in alternatenames.split(',')]) + + for l in locations: + if unaccent(l.name).lower() in names: + l.latitude = float(latitude) + l.longitude = float(longitude) + l.save() + geocoded += 1 + + i += 1 + + if i % 200000 == 0: + print "Geocoded %d out of %d countries (parsed %d geonames)" % (geocoded, total, i) + + if total == geocoded: + break + + print "Geocoded %d out of %d countries (parsed %d geonames)" % (geocoded, total, i) + datafile.close()