summaryrefslogtreecommitdiff
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rw-r--r--utils.py151
1 files changed, 132 insertions, 19 deletions
diff --git a/utils.py b/utils.py
index e0cc295..2898b3d 100644
--- a/utils.py
+++ b/utils.py
@@ -1,11 +1,12 @@
import folium
+import numpy as np
import pandas as pd
import requests
from sklearn.cluster import KMeans
# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes
-def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff):
+def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2):
# Cluster the coordinates
kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
@@ -32,11 +33,16 @@ def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff):
trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end)
trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end)
- # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
- if abs(trip_hrs_1 - trip_hrs_2) > time_diff:
- route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
- route_2['gps'].values.tolist(),
- centroid_1, centroid_2, end, time_diff)
+ if minimize:
+ # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
+ if abs(trip_hrs_1 - trip_hrs_2) > time_diff:
+ route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
+ route_2['gps'].values.tolist(),
+ centroid_1, centroid_2, end, time_diff,
+ n=n)
+ else:
+ route_1_coordinates = route_1['gps'].values.tolist()
+ route_2_coordinates = route_2['gps'].values.tolist()
else:
route_1_coordinates = route_1['gps'].values.tolist()
route_2_coordinates = route_2['gps'].values.tolist()
@@ -49,7 +55,7 @@ def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff):
def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end,
- time_diff):
+ time_diff, n):
"""
Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference
"""
@@ -63,34 +69,141 @@ def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_s
# If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route
if route_time_diff > time_diff:
# Find which route is longer
- if len(route_1_coordinates) > len(route_2_coordinates):
+ if route_1_time > route_2_time:
longer_route = route_1_coordinates
shorter_route = route_2_coordinates
- # Move the closest coordinate from the longer route to the shorter route
- closest_coordinate = move_coordinate(longer_route, shorter_route)
- longer_route.remove(closest_coordinate)
- shorter_route.append(closest_coordinate)
+ for i in range(n):
+ # Move the closest coordinate from the longer route to the shorter route
+ closest_coordinate = move_coordinate(longer_route, shorter_route)
+ longer_route.remove(closest_coordinate)
+ shorter_route.append(closest_coordinate)
# Recursively call the function
- return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end, time_diff)
+ return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end,
+ time_diff, n)
else:
longer_route = route_2_coordinates
shorter_route = route_1_coordinates
- # Move the closest coordinate from the longer route to the shorter route
- closest_coordinate = move_coordinate(longer_route, shorter_route)
- longer_route.remove(closest_coordinate)
- shorter_route.append(closest_coordinate)
+ for i in range(n):
+ # Move the closest coordinate from the longer route to the shorter route
+ closest_coordinate = move_coordinate(longer_route, shorter_route)
+ longer_route.remove(closest_coordinate)
+ shorter_route.append(closest_coordinate)
# Recursively call the function
- return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end, time_diff)
+ return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end,
+ time_diff, n)
# If the difference in time is less than the time difference, return the routes
return route_1_coordinates, route_2_coordinates
+# Create a function to minimize the time difference between three routes
+def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2):
+ # Cluster the coordinates
+ kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
+
+ # Fit the coordinates to the clusters
+ kmeans.fit(df['normalized_gps'].values.tolist())
+
+ # Add the cluster labels to the dataframe
+ df['cluster'] = kmeans.labels_
+
+ # Create centroid strings
+ centroid_1 = list_to_string([centroids[0]])
+ centroid_2 = list_to_string([centroids[1]])
+ centroid_3 = list_to_string([centroids[2]])
+
+ # Return the list of locations in each cluster
+ route_1 = df[df['cluster'] == 0]
+ route_1_stops = len(route_1['gps'].values.tolist())
+ route_1_str = list_to_string(route_1['gps'].values.tolist())
+
+ route_2 = df[df['cluster'] == 1]
+ route_2_stops = len(route_2['gps'].values.tolist())
+ route_2_str = list_to_string(route_2['gps'].values.tolist())
+
+ route_3 = df[df['cluster'] == 2]
+ route_3_stops = len(route_3['gps'].values.tolist())
+ route_3_str = list_to_string(route_3['gps'].values.tolist())
+
+ # Get the trip time for each route
+ trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end)
+ trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end)
+ trip_hrs_3 = get_trip_time(route_3_str, route_3_stops, centroid_3, end)
+
+ average_time = (trip_hrs_1 + trip_hrs_2 + trip_hrs_3) / 3
+
+ times = [trip_hrs_1, trip_hrs_2, trip_hrs_3]
+ routes = [route_1_str, route_2_str, route_3_str]
+
+ sorted_indices = np.argsort(times)
+
+ if minimize:
+ # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
+ if times[sorted_indices[2]] - average_time > time_diff:
+ route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3(
+ route_1['gps'].values.tolist(),
+ route_2['gps'].values.tolist(),
+ route_3['gps'].values.tolist(),
+ centroid_1, centroid_2, centroid_3, end, time_diff,
+ n=n)
+ else:
+ route_1_coordinates = route_1['gps'].values.tolist()
+ route_2_coordinates = route_2['gps'].values.tolist()
+ route_3_coordinates = route_3['gps'].values.tolist()
+ else:
+ route_1_coordinates = route_1['gps'].values.tolist()
+ route_2_coordinates = route_2['gps'].values.tolist()
+ route_3_coordinates = route_3['gps'].values.tolist()
+
+ # Edit the dataframe to reflect the new coordinate clusters
+ df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
+ df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1
+ df.loc[df['gps'].astype(str).isin(map(str, route_3_coordinates)), 'cluster'] = 2
+
+ return df, route_1_coordinates, route_2_coordinates, route_3_coordinates
+
+
+def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3_coordinates,
+ route_1_start, route_2_start, route_3_start, end, time_diff, n):
+ """
+ Takes three routes and a time difference and returns routes that have time differences less than the time difference
+ """
+ # Find the trip time for each route
+ route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end)
+ route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end)
+ route_3_time = get_trip_time(list_to_string(route_3_coordinates), len(route_3_coordinates), route_3_start, end)
+
+ # Find the average trip time
+ average_time = (route_1_time + route_2_time + route_3_time) / 3
+
+ # Define a list of all times and route coordinates
+ times = [route_1_time, route_2_time, route_3_time]
+ routes = [route_1_coordinates, route_2_coordinates, route_3_coordinates]
+
+ # Sort the routes by time
+ sorted_indices = np.argsort(times)
+
+ # If the difference of the longest trip time from average is greater than the time difference
+ if times[sorted_indices[2]] - average_time > time_diff:
+ # Move the closest coordinate(s) from the longest route to the shortest route
+ for i in range(n):
+ closest_coordinate = move_coordinate(routes[sorted_indices[2]], routes[sorted_indices[0]])
+ routes[sorted_indices[2]].remove(closest_coordinate)
+ routes[sorted_indices[0]].append(closest_coordinate)
+
+ # Recursively call the function
+ return minimize_route_time_diff_3(routes[0], routes[1], routes[2], route_1_start, route_2_start, route_3_start,
+ end, time_diff, n)
+
+ # If the difference of the longest trip time from average is less than the time difference, return the routes
+ return routes[0], routes[1], routes[2]
+
+
def list_to_string(list_of_lists):
"""
Takes a list of lists of coordinates and returns a string of the coordinates
@@ -131,7 +244,7 @@ def get_trip_time(coordinate_string, num_waypoints, start, end):
coordinates = coordinates.json()
travel_time_seconds = int(coordinates['trips'][0]['duration'])
- waypoint_time_seconds = num_waypoints * 60
+ waypoint_time_seconds = num_waypoints * 90
total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600