engine: vectorise distance matrix calc

This commit is contained in:
Oliver Gaskell 2025-11-01 16:17:54 +00:00
parent e8cd8795f1
commit 78228cb747
No known key found for this signature in database
GPG key ID: F971A08925FCC0AD
2 changed files with 31 additions and 25 deletions

View file

@ -3,7 +3,6 @@
import overpy
import numpy as np
import pandas as pd
import geopy.distance as distance
from tqdm import tqdm
@ -12,10 +11,12 @@ BRANDS: dict[str, str] = {
"greggs": "[\"brand:wikidata\"=\"Q3403981\"]",
}
DATA_FOLDER = ""
EncodedLocation = list[tuple[float, list[float]]]
def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
def fetch_data(brand: str) -> list[tuple[float, float]]:
"""Fetch a list of locations from OSM."""
api = overpy.Overpass()
@ -25,10 +26,10 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
result = []
for way in query.ways:
result.append((way.center_lat, way.center_lon))
result.append((float(way.center_lat), float(way.center_lon)))
for node in query.nodes:
result.append((node.lat, node.lon))
result.append((float(node.lat), float(node.lon)))
for (lat, lon) in result:
if (lat is None) or (lon is None):
@ -37,31 +38,37 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
return result
def spherical_dist(pos1, pos2, r=6378137):
"""Calculate sperical distances between two arrays of coordinates."""
pos1 = pos1 * np.pi / 180
pos2 = pos2 * np.pi / 180
cos_lat1 = np.cos(pos1[..., 0])
cos_lat2 = np.cos(pos2[..., 0])
cos_lat_d = np.cos(pos1[..., 0] - pos2[..., 0])
cos_lon_d = np.cos(pos1[..., 1] - pos2[..., 1])
return r * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))
def encode(location: tuple[float, float]) -> EncodedLocation:
"""Encode a location."""
print("fetching")
greggs = fetch_data("greggs")
print("fetched")
#greggs = [(55.85,-4.02),(52.443,-1.833),(51.28,-1.0)]
greggs = np.array(fetch_data("greggs"))
#relative distance matrix
dist_matrix = np.zeros((len(greggs),len(greggs)))
for i in tqdm(range(len(greggs))):
first=greggs[i]
for j in range(len(greggs)):
second=greggs[j]
#calculate the distance between i and j coordinates
#dist_matrix[i,j] = np.sqrt((first[0]-second[0])**2 + (first[1]-second[1])**2 )
dist_matrix[i,j] = distance.distance(distance.lonlat(*first), distance.lonlat(*second)).km*1000
print(dist_matrix)
repeat_rows = np.tile(greggs, (len(greggs), 1, 1))
repeat_cols = np.transpose(repeat_rows, (1, 0, 2))
dist_matrix = spherical_dist(repeat_rows, repeat_cols)
#find closest greggs
distances = pd.Series(np.zeros(len(greggs)))
for i in range(len(greggs)):
current = greggs[i]
#distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2
distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000
# distances = pd.Series(np.zeros(len(greggs)))
# for i in range(len(greggs)):
# current = greggs[i]
# #distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2
# distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000
repeated = np.tile(location, (len(greggs), 1))
distances = spherical_dist(repeated, greggs)
distances = pd.Series(distances)
print(distances)
distances = distances.sort_values()
@ -104,7 +111,7 @@ def main():
#greggs = fetch_data("greggs")
#print(f"Query done - got {len(greggs)} Greggs!")
print(format_location(encode((0.091659, 52.210796))))
print(format_location(encode((52.210796, 0.091659))))
if __name__ == "__main__":

View file

@ -2,4 +2,3 @@ overpy
django
numpy
pandas
geopy