engine: vectorise distance matrix calc
This commit is contained in:
parent
e8cd8795f1
commit
78228cb747
2 changed files with 31 additions and 25 deletions
55
engine.py
55
engine.py
|
|
@ -3,7 +3,6 @@
|
||||||
import overpy
|
import overpy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import geopy.distance as distance
|
|
||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
@ -12,10 +11,12 @@ BRANDS: dict[str, str] = {
|
||||||
"greggs": "[\"brand:wikidata\"=\"Q3403981\"]",
|
"greggs": "[\"brand:wikidata\"=\"Q3403981\"]",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DATA_FOLDER = ""
|
||||||
|
|
||||||
EncodedLocation = list[tuple[float, list[float]]]
|
EncodedLocation = list[tuple[float, list[float]]]
|
||||||
|
|
||||||
|
|
||||||
def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
|
def fetch_data(brand: str) -> list[tuple[float, float]]:
|
||||||
"""Fetch a list of locations from OSM."""
|
"""Fetch a list of locations from OSM."""
|
||||||
api = overpy.Overpass()
|
api = overpy.Overpass()
|
||||||
|
|
||||||
|
|
@ -25,10 +26,10 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
for way in query.ways:
|
for way in query.ways:
|
||||||
result.append((way.center_lat, way.center_lon))
|
result.append((float(way.center_lat), float(way.center_lon)))
|
||||||
|
|
||||||
for node in query.nodes:
|
for node in query.nodes:
|
||||||
result.append((node.lat, node.lon))
|
result.append((float(node.lat), float(node.lon)))
|
||||||
|
|
||||||
for (lat, lon) in result:
|
for (lat, lon) in result:
|
||||||
if (lat is None) or (lon is None):
|
if (lat is None) or (lon is None):
|
||||||
|
|
@ -37,31 +38,37 @@ def fetch_data(brand: str) -> list[tuple[float | None, float | None]]:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def spherical_dist(pos1, pos2, r=6378137):
|
||||||
|
"""Calculate sperical distances between two arrays of coordinates."""
|
||||||
|
pos1 = pos1 * np.pi / 180
|
||||||
|
pos2 = pos2 * np.pi / 180
|
||||||
|
|
||||||
|
cos_lat1 = np.cos(pos1[..., 0])
|
||||||
|
cos_lat2 = np.cos(pos2[..., 0])
|
||||||
|
cos_lat_d = np.cos(pos1[..., 0] - pos2[..., 0])
|
||||||
|
cos_lon_d = np.cos(pos1[..., 1] - pos2[..., 1])
|
||||||
|
return r * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))
|
||||||
|
|
||||||
|
|
||||||
def encode(location: tuple[float, float]) -> EncodedLocation:
|
def encode(location: tuple[float, float]) -> EncodedLocation:
|
||||||
"""Encode a location."""
|
"""Encode a location."""
|
||||||
|
|
||||||
print("fetching")
|
greggs = np.array(fetch_data("greggs"))
|
||||||
greggs = fetch_data("greggs")
|
|
||||||
print("fetched")
|
|
||||||
#greggs = [(55.85,-4.02),(52.443,-1.833),(51.28,-1.0)]
|
|
||||||
|
|
||||||
#relative distance matrix
|
repeat_rows = np.tile(greggs, (len(greggs), 1, 1))
|
||||||
dist_matrix = np.zeros((len(greggs),len(greggs)))
|
repeat_cols = np.transpose(repeat_rows, (1, 0, 2))
|
||||||
for i in tqdm(range(len(greggs))):
|
dist_matrix = spherical_dist(repeat_rows, repeat_cols)
|
||||||
first=greggs[i]
|
|
||||||
for j in range(len(greggs)):
|
|
||||||
second=greggs[j]
|
|
||||||
#calculate the distance between i and j coordinates
|
|
||||||
#dist_matrix[i,j] = np.sqrt((first[0]-second[0])**2 + (first[1]-second[1])**2 )
|
|
||||||
dist_matrix[i,j] = distance.distance(distance.lonlat(*first), distance.lonlat(*second)).km*1000
|
|
||||||
print(dist_matrix)
|
|
||||||
|
|
||||||
#find closest greggs
|
#find closest greggs
|
||||||
distances = pd.Series(np.zeros(len(greggs)))
|
# distances = pd.Series(np.zeros(len(greggs)))
|
||||||
for i in range(len(greggs)):
|
# for i in range(len(greggs)):
|
||||||
current = greggs[i]
|
# current = greggs[i]
|
||||||
#distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2
|
# #distances[i] = np.sqrt((current[0]-location[0])**2 + (current[1]-location[1])**2
|
||||||
distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000
|
# distances[i] = distance.distance(distance.lonlat(*current), distance.lonlat(*location)).km*1000
|
||||||
|
|
||||||
|
repeated = np.tile(location, (len(greggs), 1))
|
||||||
|
distances = spherical_dist(repeated, greggs)
|
||||||
|
distances = pd.Series(distances)
|
||||||
|
|
||||||
print(distances)
|
print(distances)
|
||||||
distances = distances.sort_values()
|
distances = distances.sort_values()
|
||||||
|
|
@ -104,7 +111,7 @@ def main():
|
||||||
#greggs = fetch_data("greggs")
|
#greggs = fetch_data("greggs")
|
||||||
#print(f"Query done - got {len(greggs)} Greggs!")
|
#print(f"Query done - got {len(greggs)} Greggs!")
|
||||||
|
|
||||||
print(format_location(encode((0.091659, 52.210796))))
|
print(format_location(encode((52.210796, 0.091659))))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -2,4 +2,3 @@ overpy
|
||||||
django
|
django
|
||||||
numpy
|
numpy
|
||||||
pandas
|
pandas
|
||||||
geopy
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue