From 78228cb74743d875f0db63f0e46caee32b16e14f Mon Sep 17 00:00:00 2001 From: Oliver Gaskell Date: Sat, 1 Nov 2025 16:17:54 +0000 Subject: [PATCH] engine: vectorise distance matrix calc --- engine.py | 55 +++++++++++++++++++++++++++--------------------- requirements.txt | 1 - 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/engine.py b/engine.py index fb5374c..55ff8fe 100755 --- a/engine.py +++ b/engine.py @@ -3,7 +3,6 @@ import overpy import numpy as np import pandas as pd -import geopy.distance as distance from tqdm import tqdm @@ -12,10 +11,12 @@ BRANDS: dict[str, str] = { "greggs": "[\"brand:wikidata\"=\"Q3403981\"]", } +DATA_FOLDER = "" + EncodedLocation = list[tuple[float, list[float]]] -def fetch_data(brand: str) -> list[tuple[float | None, float | None]]: +def fetch_data(brand: str) -> list[tuple[float, float]]: """Fetch a list of locations from OSM.""" api = overpy.Overpass() @@ -25,10 +26,10 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]: result = [] for way in query.ways: - result.append((way.center_lat, way.center_lon)) + result.append((float(way.center_lat), float(way.center_lon))) for node in query.nodes: - result.append((node.lat, node.lon)) + result.append((float(node.lat), float(node.lon))) for (lat, lon) in result: if (lat is None) or (lon is None): @@ -37,31 +38,37 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]: return result +def spherical_dist(pos1, pos2, r=6378137): + """Calculate sperical distances between two arrays of coordinates.""" + pos1 = pos1 * np.pi / 180 + pos2 = pos2 * np.pi / 180 + + cos_lat1 = np.cos(pos1[..., 0]) + cos_lat2 = np.cos(pos2[..., 0]) + cos_lat_d = np.cos(pos1[..., 0] - pos2[..., 0]) + cos_lon_d = np.cos(pos1[..., 1] - pos2[..., 1]) + return r * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d)) + + def encode(location: tuple[float, float]) -> EncodedLocation: """Encode a location.""" - print("fetching") - greggs = fetch_data("greggs") - print("fetched") - #greggs = [(55.85,-4.02),(52.443,-1.833),(51.28,-1.0)] + greggs = np.array(fetch_data("greggs")) - #relative distance matrix - dist_matrix = np.zeros((len(greggs),len(greggs))) - for i in tqdm(range(len(greggs))): - first=greggs[i] - for j in range(len(greggs)): - second=greggs[j] - #calculate the distance between i and j coordinates - #dist_matrix[i,j] = np.sqrt((first[0]-second[0])**2 + (first[1]-second[1])**2 ) - dist_matrix[i,j] = distance.distance(distance.lonlat(*first), distance.lonlat(*second)).km*1000 - print(dist_matrix) + repeat_rows = np.tile(greggs, (len(greggs), 1, 1)) + repeat_cols = np.transpose(repeat_rows, (1, 0, 2)) + dist_matrix = spherical_dist(repeat_rows, repeat_cols) #find closest greggs - distances = pd.Series(np.zeros(len(greggs))) - for i in range(len(greggs)): - current = greggs[i] - #distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2 - distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000 + # distances = pd.Series(np.zeros(len(greggs))) + # for i in range(len(greggs)): + # current = greggs[i] + # #distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2 + # distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000 + + repeated = np.tile(location, (len(greggs), 1)) + distances = spherical_dist(repeated, greggs) + distances = pd.Series(distances) print(distances) distances = distances.sort_values() @@ -104,7 +111,7 @@ def main(): #greggs = fetch_data("greggs") #print(f"Query done - got {len(greggs)} Greggs!") - print(format_location(encode((0.091659, 52.210796)))) + print(format_location(encode((52.210796, 0.091659)))) if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 821f95a..6a824c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,3 @@ overpy django numpy pandas -geopy