From: Stefan Huber Date: Fri, 17 Jan 2014 15:21:26 +0000 (+0100) Subject: Adding command -g for geocode fetch and save X-Git-Url: https://git.sthu.org/?a=commitdiff_plain;h=13d822db0d8d28c2dd9bfa2a5a344024fd387e2c;p=dvrdb.git Adding command -g for geocode fetch and save --- diff --git a/dvr-managedb b/dvr-managedb index a47d956..001ce39 100755 --- a/dvr-managedb +++ b/dvr-managedb @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import bs4 import getopt import os @@ -12,12 +11,95 @@ import urllib.parse import urllib.error import json import gzip - - -geocodeProviders = [ - "Google", - "Bing" - ] +import time + + +class GoogleGeolocation: + """A geoloction using google's service.""" + + def name(self): + """Return name of provider""" + return "Google" + + def getLocation(self, address): + """Get (lat, lon) pair for given address.""" + + urlargs = {} + urlargs['address'] = address + urlargs['sensor'] = "false" + urlparam = urllib.parse.urlencode(urlargs) + + url = "http://maps.googleapis.com/maps/api/geocode/json?" + urlparam + try: + response = urllib.request.urlopen(url).read().decode('utf-8') + data = json.loads(response) + loc = data['results'][0]['geometry']['location'] + return loc['lat'], loc['lng'] + except urllib.error.URLError as e: + print(e, file=sys.stderr) + return None + except IndexError: + return None + + +class BingGeolocation: + """A geoloction using google's service.""" + + def name(self): + """Return name of provider""" + return "Bing" + + def getLocation(self, address): + """Get (lat, lon) pair for given address.""" + + urlargs = {} + urlargs['q'] = address + urlargs['key'] = "Ap0fqhkPL7lSRlB79pq74ZmW-i91HHd6y8MsQ--0GoD7MzCp8v9SOQJJuXG8sguW" + urlargs['o'] = "json" + urlparam = urllib.parse.urlencode(urlargs) + + url = "http://dev.virtualearth.net/REST/v1/Locations?" + urlparam + try: + response = urllib.request.urlopen(url).read().decode('utf-8') + data = json.loads(response) + loc = data['resourceSets'][0]['resources'][0]['point']['coordinates'] + return loc + except urllib.error.URLError as e: + print(e, file=sys.stderr) + return None + except IndexError: + return None + + +class GeolocationProviders(object): + """Manages a list of geolocation providers.""" + + providers = [ + GoogleGeolocation(), + BingGeolocation() + ] + + @staticmethod + def isNameValid(name): + """Is provider of given name known?""" + return name.lower() in [p.name().lower() for p in GeolocationProviders.providers] + + @staticmethod + def isIndexValid(idx): + """Is provider of given index known?""" + return 0 <= idx and idx < len(GeolocationProviders.providers) + + @staticmethod + def getByIndex(idx): + """Get provider by index""" + assert(GeolocationProviders.isIndexValid(idx)) + return GeolocationProviders.providers[idx] + + @staticmethod + def getIndexByName(name): + """Get provider by its name""" + assert(GeolocationProviders.isNameValid(name)) + return [p.name().lower() for p in GeolocationProviders.providers].index(name.lower()) class Database: @@ -88,7 +170,8 @@ class Database: """Query database with given where statement. Return a list od IDs.""" stmt = "SELECT DISTINCT dvrtable.id FROM dvrtable, registrations, purposes" stmt += " WHERE dvrtable.id=registrations.dvrid AND dvrtable.id=purposes.dvrid" - stmt += " AND " + wherestmt + if len(wherestmt.strip()) > 0: + stmt += " AND " + wherestmt c = self.conn.cursor() c.execute(stmt) @@ -108,6 +191,16 @@ class Database: c.execute("SELECT * FROM registrations WHERE dvrid=?", (dvrid,)) return c.fetchone() + def get_address(self, dvrid): + """Return the address of given DVR-ID, if any.""" + c = self.conn.cursor() + c.execute("SELECT address FROM registrations WHERE dvrid=?", (dvrid,)) + res = c.fetchone() + + if res is None: + return None + return res[0] + def add_registration(self, dvrid, name, address): """Add a registration for the given DVR-ID.""" c = self.conn.cursor() @@ -137,12 +230,21 @@ class Database: c.execute("SELECT * FROM geolocations WHERE dvrid=?", (dvrid,)) return c.fetchall() - def add_geolocation(self, dvrid, lat, lon, provider): + def get_geolocation(self, dvrid, provider): + """Return geolocation of given DVR-ID and provider.""" + assert(GeolocationProviders.isIndexValid(provider)) + + c = self.conn.cursor() + c.execute("SELECT * FROM geolocations WHERE dvrid=? AND provider=?", \ + (dvrid, provider)) + return c.fetchone() + + def add_geolocation(self, dvrid, provider, lat, lon): """Add a geolocatoin for a given DVR-ID.""" - assert(0 <= provider and provider < len(geocodeProviders)) + assert(GeolocationProviders.isIndexValid(provider)) c = self.conn.cursor() - c.execute("INSERT INTO geolocation VALUES (?, ?, ?, ?)", \ + c.execute("INSERT INTO geolocations VALUES (?, ?, ?, ?)", \ (dvrid, provider, lat, lon)) c.close() self.conn.commit() @@ -174,7 +276,7 @@ def printDataset(db, id): geolocs = db.get_geolocations(id) for loc in geolocs: - p = geocodeProviders[loc[1]] + p = GeolocationProviders.getByIndex(loc[1]).name() print(" Coordinates: lat %f, lon %f (%s)" % (loc[2], loc[3], p)) purposes = db.get_purposes(id) @@ -190,6 +292,43 @@ def printDataset(db, id): return True +def processGeolocation(db, query, provider): + """Fetch and add geolocations for IDs selected by query.""" + + ids = db.query(query) + for id in ids: + print("Fetching location for ID %d" % id) + + loc = db.get_geolocation(id, provider) + if loc is not None: + print(" Location already known.") + continue + + address = db.get_address(id) + if address is None: + print(" No address given.") + continue + print(" Address:", address) + + p = GeolocationProviders.getByIndex(provider) + + for i in range(1, 3): + loc = p.getLocation(address) + + if loc is None: + print(" Could not get location. Retry...") + time.sleep(1) + continue + break + + if loc is None: + print(" Giving up. :(") + continue + + print(" Got location:", loc) + db.add_geolocation(id, provider, loc[0], loc[1]) + + def processQuery(db, query): """Process query for given ID.""" @@ -292,6 +431,7 @@ USAGE: {0} [OPTIONS] COMMAND [ARGS...] {0} -d FILE -a PATH [PATH...] {0} -d FILE -q COND -l + {0} -d FILE -q COND -g PROVIDER [PROVIDER...] {0} -h COMMAND: @@ -301,6 +441,8 @@ COMMAND: a directory, read all files ending with ".html" or ".html.gz". -h Print this help text. -l List the selected datasets. + -g Fetch and store geolocation of selected datasets from given + provider, if not existent. Provider is either 'google' or 'bing'. OPTIONS: -d FILE Use given sqlite3 database. @@ -315,10 +457,10 @@ if __name__ == "__main__": cmd = None try: - opts, args = getopt.getopt(sys.argv[1:], "ad:hq:l") + opts, args = getopt.getopt(sys.argv[1:], "ad:ghq:l") for opt, arg in opts: - if opt in ["-a", "-l"]: + if opt in ["-a", "-l", "-g"]: cmd = opt elif opt == "-d": dbfn = arg @@ -350,10 +492,22 @@ if __name__ == "__main__": for arg in args: processAdd(db, arg) - if cmd == "-l": + if cmd in ["-l", "-g"]: if query is None: print("No query option given.", file=sys.stderr) sys.exit(os.EX_USAGE) - processQuery(db, query) + + if cmd == "-l": + processQuery(db, query) + + if cmd == "-g": + providers = GeolocationProviders + for arg in args: + + if not providers.isNameValid(arg): + print("Unknown provider '%s'." % arg, file=sys.stderr) + sys.exit(os.EX_USAGE) + + processGeolocation(db, query, providers.getIndexByName(arg)) sys.exit(os.EX_OK)