diff --git a/engine.py b/engine.py index 55ff8fe..08d1476 100755 --- a/engine.py +++ b/engine.py @@ -1,23 +1,40 @@ #!/usr/bin/env python +import json +import os import overpy import numpy as np import pandas as pd -from tqdm import tqdm +from pathlib import Path # brandname : overpass query filters BRANDS: dict[str, str] = { "greggs": "[\"brand:wikidata\"=\"Q3403981\"]", + "tesco": "[\"brand:wikidata\"~\"^(Q487494|Q98456772|Q25172225|Q65954217)$\"]", # Includes Tesco Express, Tesco Extra, and One Stop } -DATA_FOLDER = "" +CACHE_FOLDER = Path(".cache") + +LOCS_COUNT = 3 +DISTS_COUNT = 100 + +FORMAT_FACTOR = 1e6 # μm EncodedLocation = list[tuple[float, list[float]]] -def fetch_data(brand: str) -> list[tuple[float, float]]: +def fetch_data(brand: str, cache: bool = True) -> list[tuple[float, float]]: """Fetch a list of locations from OSM.""" + cache_loc = (CACHE_FOLDER / f"{brand}.json") + + # Try load from cache + if cache and cache_loc.exists(): + with open(cache_loc, "r") as f: + data = json.load(f) + + return data + api = overpy.Overpass() filters = BRANDS[brand] @@ -35,6 +52,16 @@ def fetch_data(brand: str) -> list[tuple[float, float]]: if (lat is None) or (lon is None): raise ValueError("Item missing coords!") + # Save to cache + if cache: + if not CACHE_FOLDER.exists(): + os.makedirs(CACHE_FOLDER) + + with open(cache_loc, "w") as f: + json.dump(result, f) + + print(f"Got {len(result)} {brand}s") + return result @@ -59,28 +86,23 @@ def encode(location: tuple[float, float]) -> EncodedLocation: repeat_cols = np.transpose(repeat_rows, (1, 0, 2)) dist_matrix = spherical_dist(repeat_rows, repeat_cols) - #find closest greggs - # distances = pd.Series(np.zeros(len(greggs))) - # for i in range(len(greggs)): - # current = greggs[i] - # #distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2 - # distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000 - repeated = np.tile(location, (len(greggs), 1)) distances = spherical_dist(repeated, greggs) distances = pd.Series(distances) - - print(distances) distances = distances.sort_values() - top3 = distances.head(3) - print(top3) + closest = distances.head(LOCS_COUNT) + closest_dist = list(closest.values) + closest_ind = list(closest.index) + + result: EncodedLocation = [] + for v, i in zip(closest.values, closest.index): + greggs_distances = np.sort(dist_matrix[i])[1:DISTS_COUNT+1] + + result.append((v, list(map(float, greggs_distances)))) # Stub - return [ - (5., [1., 2., 3.]), - (6., [4., 5., 6.]), - ] + return result def decode(location: EncodedLocation) -> tuple[float, float]: @@ -90,9 +112,17 @@ def decode(location: EncodedLocation) -> tuple[float, float]: return (0.091659, 52.210796) +def format_dist(dist: float) -> str: + return f"{int(round(dist * FORMAT_FACTOR))}" + + +def parse_dist(dist: str) -> float: + return float(dist) / FORMAT_FACTOR + + def format_location(location: EncodedLocation) -> str: """Format an encoded location as a string.""" - return ";".join([f"{a}:{','.join(map(str, b))}" for (a, b) in location]) + return ";\n".join([f"{format_dist(a)}:{','.join(map(format_dist, b))}" for (a, b) in location]) def parse_location(location: str) -> EncodedLocation: @@ -107,11 +137,11 @@ def parse_location(location: str) -> EncodedLocation: def main(): """Testing.""" - print("Running query...") + #print("Running query...") #greggs = fetch_data("greggs") #print(f"Query done - got {len(greggs)} Greggs!") - print(format_location(encode((52.210796, 0.091659)))) + # print(format_location(encode((52.210796, 0.091659)))) if __name__ == "__main__":