Opencellid download service

[loctrkd.git] / gps303 / ocid_dload.py
diff --git a/gps303/ocid_dload.py b/gps303/ocid_dload.py

new file mode 100644 (file)

index 0000000..aa70030
--- /dev/null
+++ b/gps303/ocid_dload.py
@@ -0,0 +1,116 @@
+import csv
+from logging import getLogger
+import requests
+from sqlite3 import connect
+from zlib import decompressobj, MAX_WBITS
+
+from . import common
+
+log = getLogger("gps303/ocid_dload")
+
+RURL = (
+    "https://opencellid.org/ocid/downloads"
+    "?token={token}&type={type}&file={mcc}.csv.gz"
+)
+
+SCHEMA = """create table if not exists cells (
+  "radio" text,
+  "mcc" int,
+  "net" int,
+  "area" int,
+  "cell" int,
+  "unit" int,
+  "lon" int,
+  "lat" int,
+  "range" int,
+  "samples" int,
+  "changeable" int,
+  "created" int,
+  "updated" int,
+  "averageSignal" int
+)"""
+DBINDEX = "create index if not exists cell_idx on cells (area, cell)"
+
+
+class unzipped:
+    """
+    File-like object that unzips http response body.
+    read(size) method returns chunks of binary data as bytes
+    When used as iterator, splits data to lines
+    and yelds them as strings.
+    """
+
+    def __init__(self, zstream):
+        self.zstream = zstream
+        self.decoder = decompressobj(16 + MAX_WBITS)
+        self.outdata = b""
+        self.line = b""
+
+    def read(self, n=None):
+        if self.decoder is None:
+            return b""
+        while len(self.outdata) < n:
+            raw_data = self.zstream.read(n)
+            self.outdata += self.decoder.decompress(raw_data)
+            if not raw_data:
+                self.decoder = None
+                break
+        if self.outdata:
+            data, self.outdata = self.outdata[:n], self.outdata[n:]
+            return data
+        return b""
+
+    def __next__(self):
+        while True:
+            splittry = self.line.split(b"\n", maxsplit=1)
+            if len(splittry) > 1:
+                break
+            moredata = self.read(256)
+            if not moredata:
+                raise StopIteration
+            self.line += moredata
+        line, rest = splittry
+        self.line = rest
+        return line.decode("utf-8")
+
+    def __iter__(self):
+        return self
+
+
+def main(conf):
+    try:
+        with open(
+            conf.get("opencellid", "downloadtoken"), encoding="ascii"
+        ) as fl:
+            token = fl.read().strip()
+    except FileNotFoundError:
+        log.warning("Opencellid access token not configured, cannot download")
+        return
+
+    mcc = conf.get("opencellid", "downloadmcc")
+    url = RURL.format(token=token, type="mcc", mcc=mcc)
+    # url = "http://localhost:8000/262.csv.gz"  # TESTING
+    dbfn = conf.get("opencellid", "dbfn")
+    count = 0
+    with requests.get(url, stream=True) as resp, connect(dbfn) as db:
+        log.debug("Requested %s, result %s", url, resp)
+        if resp.status_code != 200:
+            log.error("Error getting %s: %s", url, resp)
+            return
+        db.execute("pragma journal_mode = wal")
+        db.execute(SCHEMA)
+        db.execute("delete from cells")
+        rows = csv.reader(unzipped(resp.raw))
+        for row in rows:
+            db.execute(
+                """insert into cells
+                   values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                row,
+            )
+            count += 1
+        db.execute(DBINDEX)
+    log.info("repopulated %s with %d records for MCC %s", dbfn, count, mcc)
+
+
+if __name__.endswith("__main__"):
+    main(common.init(log))