engine: vectorise distance matrix calc

This commit is contained in:
Oliver Gaskell 2025-11-01 16:17:54 +00:00
parent e8cd8795f1
commit 78228cb747
No known key found for this signature in database
GPG key ID: F971A08925FCC0AD
2 changed files with 31 additions and 25 deletions

View file

@ -3,7 +3,6 @@
import overpy import overpy
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import geopy.distance as distance
from tqdm import tqdm from tqdm import tqdm
@ -12,10 +11,12 @@ BRANDS: dict[str, str] = {
"greggs": "[\"brand:wikidata\"=\"Q3403981\"]", "greggs": "[\"brand:wikidata\"=\"Q3403981\"]",
} }
DATA_FOLDER = ""
EncodedLocation = list[tuple[float, list[float]]] EncodedLocation = list[tuple[float, list[float]]]
def fetch_data(brand: str) -> list[tuple[float | None, float | None]]: def fetch_data(brand: str) -> list[tuple[float, float]]:
"""Fetch a list of locations from OSM.""" """Fetch a list of locations from OSM."""
api = overpy.Overpass() api = overpy.Overpass()
@ -25,10 +26,10 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
result = [] result = []
for way in query.ways: for way in query.ways:
result.append((way.center_lat, way.center_lon)) result.append((float(way.center_lat), float(way.center_lon)))
for node in query.nodes: for node in query.nodes:
result.append((node.lat, node.lon)) result.append((float(node.lat), float(node.lon)))
for (lat, lon) in result: for (lat, lon) in result:
if (lat is None) or (lon is None): if (lat is None) or (lon is None):
@ -37,31 +38,37 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
return result return result
def spherical_dist(pos1, pos2, r=6378137):
"""Calculate sperical distances between two arrays of coordinates."""
pos1 = pos1 * np.pi / 180
pos2 = pos2 * np.pi / 180
cos_lat1 = np.cos(pos1[..., 0])
cos_lat2 = np.cos(pos2[..., 0])
cos_lat_d = np.cos(pos1[..., 0] - pos2[..., 0])
cos_lon_d = np.cos(pos1[..., 1] - pos2[..., 1])
return r * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))
def encode(location: tuple[float, float]) -> EncodedLocation: def encode(location: tuple[float, float]) -> EncodedLocation:
"""Encode a location.""" """Encode a location."""
print("fetching") greggs = np.array(fetch_data("greggs"))
greggs = fetch_data("greggs")
print("fetched")
#greggs = [(55.85,-4.02),(52.443,-1.833),(51.28,-1.0)]
#relative distance matrix repeat_rows = np.tile(greggs, (len(greggs), 1, 1))
dist_matrix = np.zeros((len(greggs),len(greggs))) repeat_cols = np.transpose(repeat_rows, (1, 0, 2))
for i in tqdm(range(len(greggs))): dist_matrix = spherical_dist(repeat_rows, repeat_cols)
first=greggs[i]
for j in range(len(greggs)):
second=greggs[j]
#calculate the distance between i and j coordinates
#dist_matrix[i,j] = np.sqrt((first[0]-second[0])**2 + (first[1]-second[1])**2 )
dist_matrix[i,j] = distance.distance(distance.lonlat(*first), distance.lonlat(*second)).km*1000
print(dist_matrix)
#find closest greggs #find closest greggs
distances = pd.Series(np.zeros(len(greggs))) # distances = pd.Series(np.zeros(len(greggs)))
for i in range(len(greggs)): # for i in range(len(greggs)):
current = greggs[i] # current = greggs[i]
#distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2 # #distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2
distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000 # distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000
repeated = np.tile(location, (len(greggs), 1))
distances = spherical_dist(repeated, greggs)
distances = pd.Series(distances)
print(distances) print(distances)
distances = distances.sort_values() distances = distances.sort_values()
@ -104,7 +111,7 @@ def main():
#greggs = fetch_data("greggs") #greggs = fetch_data("greggs")
#print(f"Query done - got {len(greggs)} Greggs!") #print(f"Query done - got {len(greggs)} Greggs!")
print(format_location(encode((0.091659, 52.210796)))) print(format_location(encode((52.210796, 0.091659))))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -2,4 +2,3 @@ overpy
django django
numpy numpy
pandas pandas
geopy