]> git.sthu.org Git - dvrdb.git/commitdiff
Adding command -g for geocode fetch and save
authorStefan Huber <shuber@sthu.org>
Fri, 17 Jan 2014 15:21:26 +0000 (16:21 +0100)
committerStefan Huber <shuber@sthu.org>
Fri, 17 Jan 2014 17:41:16 +0000 (18:41 +0100)
dvr-managedb

index a47d956183aa2182b690cd8bc94dec5fd48716d6..001ce394eddb2640e726f2120727f860df9772ba 100755 (executable)
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 
-
 import bs4
 import getopt
 import os
@@ -12,12 +11,95 @@ import urllib.parse
 import urllib.error
 import json
 import gzip
-
-
-geocodeProviders = [
-        "Google",
-        "Bing"
-        ]
+import time
+
+
+class GoogleGeolocation:
+    """A geoloction using google's service."""
+
+    def name(self):
+        """Return name of provider"""
+        return "Google"
+
+    def getLocation(self, address):
+        """Get (lat, lon) pair for given address."""
+
+        urlargs = {}
+        urlargs['address'] = address
+        urlargs['sensor'] = "false"
+        urlparam = urllib.parse.urlencode(urlargs)
+
+        url = "http://maps.googleapis.com/maps/api/geocode/json?" + urlparam
+        try:
+            response = urllib.request.urlopen(url).read().decode('utf-8')
+            data = json.loads(response)
+            loc = data['results'][0]['geometry']['location']
+            return loc['lat'], loc['lng']
+        except urllib.error.URLError as e:
+            print(e, file=sys.stderr)
+            return None
+        except IndexError:
+            return None
+
+
+class BingGeolocation:
+    """A geoloction using google's service."""
+
+    def name(self):
+        """Return name of provider"""
+        return "Bing"
+
+    def getLocation(self, address):
+        """Get (lat, lon) pair for given address."""
+
+        urlargs = {}
+        urlargs['q'] = address
+        urlargs['key'] = "Ap0fqhkPL7lSRlB79pq74ZmW-i91HHd6y8MsQ--0GoD7MzCp8v9SOQJJuXG8sguW"
+        urlargs['o'] = "json"
+        urlparam = urllib.parse.urlencode(urlargs)
+
+        url = "http://dev.virtualearth.net/REST/v1/Locations?" + urlparam
+        try:
+            response = urllib.request.urlopen(url).read().decode('utf-8')
+            data = json.loads(response)
+            loc = data['resourceSets'][0]['resources'][0]['point']['coordinates']
+            return loc
+        except urllib.error.URLError as e:
+            print(e, file=sys.stderr)
+            return None
+        except IndexError:
+            return None
+
+
+class GeolocationProviders(object):
+    """Manages a list of geolocation providers."""
+
+    providers = [
+            GoogleGeolocation(),
+            BingGeolocation()
+            ]
+
+    @staticmethod
+    def isNameValid(name):
+        """Is provider of given name known?"""
+        return name.lower() in [p.name().lower() for p in GeolocationProviders.providers]
+
+    @staticmethod
+    def isIndexValid(idx):
+        """Is provider of given index known?"""
+        return 0 <= idx and idx < len(GeolocationProviders.providers)
+
+    @staticmethod
+    def getByIndex(idx):
+        """Get provider by index"""
+        assert(GeolocationProviders.isIndexValid(idx))
+        return GeolocationProviders.providers[idx]
+
+    @staticmethod
+    def getIndexByName(name):
+        """Get provider by its name"""
+        assert(GeolocationProviders.isNameValid(name))
+        return [p.name().lower() for p in GeolocationProviders.providers].index(name.lower())
 
 
 class Database:
@@ -88,7 +170,8 @@ class Database:
         """Query database with given where statement. Return a list od IDs."""
         stmt = "SELECT DISTINCT dvrtable.id FROM dvrtable, registrations, purposes"
         stmt += " WHERE dvrtable.id=registrations.dvrid AND dvrtable.id=purposes.dvrid"
-        stmt += " AND " + wherestmt
+        if len(wherestmt.strip()) > 0:
+            stmt += " AND " + wherestmt
 
         c = self.conn.cursor()
         c.execute(stmt)
@@ -108,6 +191,16 @@ class Database:
         c.execute("SELECT * FROM registrations WHERE dvrid=?", (dvrid,))
         return c.fetchone()
 
+    def get_address(self, dvrid):
+        """Return the address of given DVR-ID, if any."""
+        c = self.conn.cursor()
+        c.execute("SELECT address FROM registrations WHERE dvrid=?", (dvrid,))
+        res = c.fetchone()
+
+        if res is None:
+            return None
+        return res[0]
+
     def add_registration(self, dvrid, name, address):
         """Add a registration for the given DVR-ID."""
         c = self.conn.cursor()
@@ -137,12 +230,21 @@ class Database:
         c.execute("SELECT * FROM geolocations WHERE dvrid=?", (dvrid,))
         return c.fetchall()
 
-    def add_geolocation(self, dvrid, lat, lon, provider):
+    def get_geolocation(self, dvrid, provider):
+        """Return geolocation of given DVR-ID and provider."""
+        assert(GeolocationProviders.isIndexValid(provider))
+
+        c = self.conn.cursor()
+        c.execute("SELECT * FROM geolocations WHERE dvrid=? AND provider=?", \
+                  (dvrid, provider))
+        return c.fetchone()
+
+    def add_geolocation(self, dvrid, provider, lat, lon):
         """Add a geolocatoin for a given DVR-ID."""
-        assert(0 <= provider and provider < len(geocodeProviders))
+        assert(GeolocationProviders.isIndexValid(provider))
 
         c = self.conn.cursor()
-        c.execute("INSERT INTO geolocation VALUES (?, ?, ?, ?)", \
+        c.execute("INSERT INTO geolocations VALUES (?, ?, ?, ?)", \
                   (dvrid, provider, lat, lon))
         c.close()
         self.conn.commit()
@@ -174,7 +276,7 @@ def printDataset(db, id):
 
         geolocs = db.get_geolocations(id)
         for loc in geolocs:
-            p = geocodeProviders[loc[1]]
+            p = GeolocationProviders.getByIndex(loc[1]).name()
             print("  Coordinates: lat %f, lon %f (%s)" % (loc[2], loc[3], p))
 
         purposes = db.get_purposes(id)
@@ -190,6 +292,43 @@ def printDataset(db, id):
     return True
 
 
+def processGeolocation(db, query, provider):
+    """Fetch and add geolocations for IDs selected by query."""
+
+    ids = db.query(query)
+    for id in ids:
+        print("Fetching location for ID %d" % id)
+
+        loc = db.get_geolocation(id, provider)
+        if loc is not None:
+            print("  Location already known.")
+            continue
+
+        address = db.get_address(id)
+        if address is None:
+            print("  No address given.")
+            continue
+        print("  Address:", address)
+
+        p = GeolocationProviders.getByIndex(provider)
+
+        for i in range(1, 3):
+            loc = p.getLocation(address)
+
+            if loc is None:
+                print("  Could not get location. Retry...")
+                time.sleep(1)
+                continue
+            break
+
+        if loc is None:
+            print("  Giving up. :(")
+            continue
+
+        print("  Got location:", loc)
+        db.add_geolocation(id, provider, loc[0], loc[1])
+
+
 def processQuery(db, query):
     """Process query for given ID."""
 
@@ -292,6 +431,7 @@ USAGE:
   {0} [OPTIONS] COMMAND [ARGS...]
   {0} -d FILE -a PATH [PATH...]
   {0} -d FILE -q COND -l
+  {0} -d FILE -q COND -g PROVIDER [PROVIDER...]
   {0} -h
 
 COMMAND:
@@ -301,6 +441,8 @@ COMMAND:
             a directory, read all files ending with ".html" or ".html.gz".
   -h        Print this help text.
   -l        List the selected datasets.
+  -g        Fetch and store geolocation of selected datasets from given
+            provider, if not existent. Provider is either 'google' or 'bing'.
 
 OPTIONS:
   -d FILE   Use given sqlite3 database.
@@ -315,10 +457,10 @@ if __name__ == "__main__":
     cmd = None
 
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ad:hq:l")
+        opts, args = getopt.getopt(sys.argv[1:], "ad:ghq:l")
 
         for opt, arg in opts:
-            if opt in ["-a", "-l"]:
+            if opt in ["-a", "-l", "-g"]:
                 cmd = opt
             elif opt == "-d":
                 dbfn = arg
@@ -350,10 +492,22 @@ if __name__ == "__main__":
         for arg in args:
             processAdd(db, arg)
 
-    if cmd == "-l":
+    if cmd in ["-l", "-g"]:
         if query is None:
             print("No query option given.", file=sys.stderr)
             sys.exit(os.EX_USAGE)
-        processQuery(db, query)
+
+        if cmd == "-l":
+            processQuery(db, query)
+
+        if cmd == "-g":
+            providers = GeolocationProviders
+            for arg in args:
+
+                if not providers.isNameValid(arg):
+                    print("Unknown provider '%s'." % arg, file=sys.stderr)
+                    sys.exit(os.EX_USAGE)
+
+                processGeolocation(db, query, providers.getIndexByName(arg))
 
     sys.exit(os.EX_OK)