summaryrefslogtreecommitdiff
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rw-r--r--utils.py224
1 files changed, 106 insertions, 118 deletions
diff --git a/utils.py b/utils.py
index 2898b3d..a4b789d 100644
--- a/utils.py
+++ b/utils.py
@@ -5,47 +5,26 @@ import requests
from sklearn.cluster import KMeans
-# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes
-def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2):
+def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2):
# Cluster the coordinates
kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
-
- # Fit the coordinates to the clusters
kmeans.fit(df['normalized_gps'].values.tolist())
- # Add the cluster labels to the dataframe
df['cluster'] = kmeans.labels_
-
- # Create centroid strings
centroid_1 = list_to_string([centroids[0]])
centroid_2 = list_to_string([centroids[1]])
# Return the list of locations in each cluster
route_1 = df[df['cluster'] == 0]
- route_1_stops = len(route_1['gps'].values.tolist())
- route_1_str = list_to_string(route_1['gps'].values.tolist())
-
route_2 = df[df['cluster'] == 1]
- route_2_stops = len(route_2['gps'].values.tolist())
- route_2_str = list_to_string(route_2['gps'].values.tolist())
-
- # Get the trip time for each route
- trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end)
- trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end)
-
- if minimize:
- # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
- if abs(trip_hrs_1 - trip_hrs_2) > time_diff:
- route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
- route_2['gps'].values.tolist(),
- centroid_1, centroid_2, end, time_diff,
- n=n)
- else:
- route_1_coordinates = route_1['gps'].values.tolist()
- route_2_coordinates = route_2['gps'].values.tolist()
- else:
- route_1_coordinates = route_1['gps'].values.tolist()
- route_2_coordinates = route_2['gps'].values.tolist()
+
+ route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
+ route_2['gps'].values.tolist(),
+ centroid_1, centroid_2, end, time_diff, n)
+
+ # Remove waypoints from the longest route until the trip time is less than the max time
+ route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time)
+ route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time)
# Edit the dataframe to reflect the new coordinate clusters
df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
@@ -57,52 +36,40 @@ def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, minimi
def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end,
time_diff, n):
"""
- Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference
+ Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a
+ time difference that is less than the time difference
"""
- # Find the difference in time between the two routes
- route_1_time = get_trip_time(list_to_string(route_1_coordinates),
- len(route_1_coordinates), route_1_start, end)
- route_2_time = get_trip_time(list_to_string(route_2_coordinates),
- len(route_2_coordinates), route_2_start, end)
- route_time_diff = abs(route_1_time - route_2_time)
-
- # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route
- if route_time_diff > time_diff:
- # Find which route is longer
- if route_1_time > route_2_time:
- longer_route = route_1_coordinates
- shorter_route = route_2_coordinates
-
- for i in range(n):
- # Move the closest coordinate from the longer route to the shorter route
- closest_coordinate = move_coordinate(longer_route, shorter_route)
- longer_route.remove(closest_coordinate)
- shorter_route.append(closest_coordinate)
-
- # Recursively call the function
- return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end,
- time_diff, n)
-
- else:
- longer_route = route_2_coordinates
- shorter_route = route_1_coordinates
-
- for i in range(n):
- # Move the closest coordinate from the longer route to the shorter route
- closest_coordinate = move_coordinate(longer_route, shorter_route)
- longer_route.remove(closest_coordinate)
- shorter_route.append(closest_coordinate)
-
- # Recursively call the function
- return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end,
- time_diff, n)
-
- # If the difference in time is less than the time difference, return the routes
- return route_1_coordinates, route_2_coordinates
+ # Find the trip time for each route
+ route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end)
+ route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end)
+
+ # Find the average trip time
+ average_time = (route_1_time + route_2_time) / 2
+
+ # Define a list of all times and route coordinates
+ times = [route_1_time, route_2_time]
+ routes = [route_1_coordinates, route_2_coordinates]
+
+ # Sort the routes by time
+ sorted_indices = np.argsort(times)
+
+ # If the difference of the longest trip time from average is greater than the time difference
+ if times[sorted_indices[1]] - average_time > time_diff:
+ # Move the closest coordinate(s) from the longest route to the shortest route
+ for i in range(n):
+ closest_coordinate = __move_coordinate__(routes[sorted_indices[1]], routes[sorted_indices[0]])
+ routes[sorted_indices[1]].remove(closest_coordinate)
+ routes[sorted_indices[0]].append(closest_coordinate)
+
+ # Recursively call the function
+ return minimize_route_time_diff(routes[0], routes[1], route_1_start, route_2_start, end, time_diff, n)
+
+ # If the difference of the longest trip time from average is less than the time difference, return the routes
+ return routes[0], routes[1]
# Create a function to minimize the time difference between three routes
-def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2):
+def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2):
# Cluster the coordinates
kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
@@ -119,46 +86,18 @@ def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, minimi
# Return the list of locations in each cluster
route_1 = df[df['cluster'] == 0]
- route_1_stops = len(route_1['gps'].values.tolist())
- route_1_str = list_to_string(route_1['gps'].values.tolist())
-
route_2 = df[df['cluster'] == 1]
- route_2_stops = len(route_2['gps'].values.tolist())
- route_2_str = list_to_string(route_2['gps'].values.tolist())
-
route_3 = df[df['cluster'] == 2]
- route_3_stops = len(route_3['gps'].values.tolist())
- route_3_str = list_to_string(route_3['gps'].values.tolist())
-
- # Get the trip time for each route
- trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end)
- trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end)
- trip_hrs_3 = get_trip_time(route_3_str, route_3_stops, centroid_3, end)
- average_time = (trip_hrs_1 + trip_hrs_2 + trip_hrs_3) / 3
+ # Minimize the time difference between the routes
+ route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3(
+ route_1['gps'].values.tolist(), route_2['gps'].values.tolist(), route_3['gps'].values.tolist(),
+ centroid_1, centroid_2, centroid_3, end, time_diff, n)
- times = [trip_hrs_1, trip_hrs_2, trip_hrs_3]
- routes = [route_1_str, route_2_str, route_3_str]
-
- sorted_indices = np.argsort(times)
-
- if minimize:
- # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
- if times[sorted_indices[2]] - average_time > time_diff:
- route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3(
- route_1['gps'].values.tolist(),
- route_2['gps'].values.tolist(),
- route_3['gps'].values.tolist(),
- centroid_1, centroid_2, centroid_3, end, time_diff,
- n=n)
- else:
- route_1_coordinates = route_1['gps'].values.tolist()
- route_2_coordinates = route_2['gps'].values.tolist()
- route_3_coordinates = route_3['gps'].values.tolist()
- else:
- route_1_coordinates = route_1['gps'].values.tolist()
- route_2_coordinates = route_2['gps'].values.tolist()
- route_3_coordinates = route_3['gps'].values.tolist()
+ # Remove waypoints from the longest route until the trip time is less than the max time
+ route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time)
+ route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time)
+ route_3_coordinates = __remove_longest_waypoints__(route_3_coordinates, centroid_3, end, max_time)
# Edit the dataframe to reflect the new coordinate clusters
df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
@@ -192,7 +131,7 @@ def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3
if times[sorted_indices[2]] - average_time > time_diff:
# Move the closest coordinate(s) from the longest route to the shortest route
for i in range(n):
- closest_coordinate = move_coordinate(routes[sorted_indices[2]], routes[sorted_indices[0]])
+ closest_coordinate = __move_coordinate__(routes[sorted_indices[2]], routes[sorted_indices[0]])
routes[sorted_indices[2]].remove(closest_coordinate)
routes[sorted_indices[0]].append(closest_coordinate)
@@ -204,6 +143,25 @@ def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3
return routes[0], routes[1], routes[2]
+def __remove_longest_waypoints__(route_coordinates, start, end, max_time):
+ """
+ Takes a list of lists of coordinates and returns a list of lists of coordinates that is the same length as the
+ original list but has a trip time less than the max time
+ """
+ # Find the trip time for the route
+ route_time = get_trip_time(list_to_string(route_coordinates), len(route_coordinates), start, end)
+
+ # If the trip time is greater than the max time, remove the waypoint with the longest distance from the mean
+ if route_time > max_time:
+ route_mean = __mean_center__(route_coordinates)
+ furthest_coordinate = __find_farthest_coordinate__(route_coordinates, route_mean)
+ route_coordinates.remove(furthest_coordinate)
+
+ return __remove_longest_waypoints__(route_coordinates, start, end, max_time)
+
+ return route_coordinates
+
+
def list_to_string(list_of_lists):
"""
Takes a list of lists of coordinates and returns a string of the coordinates
@@ -288,23 +246,53 @@ def __min_max_normalize__(value, min_value, max_value):
# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid
-def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates):
+def __move_coordinate__(larger_centroid_coordinates, smaller_centroid_coordinates):
# Calculate the centroid of the smaller cluster
- smaller_centroid = [sum([i[0] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates),
- sum([i[1] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates)]
+ smaller_centroid = __mean_center__(smaller_centroid_coordinates)
+
+ # Find the coordinate in the larger cluster that is closest to the centroid of the smaller cluster
+ closest_coordinate = __find_closest_coordinate__(larger_centroid_coordinates, smaller_centroid)
+
+ return closest_coordinate
+
- # Find the coordinate in larger_centroid_coordinates that is closest to smaller_centroid
- closest_coordinate = larger_centroid_coordinates[0]
- closest_coordinate_distance = __distance__(closest_coordinate, smaller_centroid)
+def __find_closest_coordinate__(coordinates, centroid):
+ """
+ Takes a list of coordinates and a centroid and returns the coordinate in the list that is closest to the centroid
+ """
+ closest_coordinate = coordinates[0]
+ closest_coordinate_distance = __distance__(closest_coordinate, centroid)
- for coordinate in larger_centroid_coordinates:
- if __distance__(coordinate, smaller_centroid) < closest_coordinate_distance:
+ for coordinate in coordinates:
+ if __distance__(coordinate, centroid) < closest_coordinate_distance:
closest_coordinate = coordinate
- closest_coordinate_distance = __distance__(coordinate, smaller_centroid)
+ closest_coordinate_distance = __distance__(coordinate, centroid)
return closest_coordinate
+def __find_farthest_coordinate__(coordinates, centroid):
+ """
+ Takes a list of coordinates and a centroid and returns the coordinate in the list that is furthest from the centroid
+ """
+ farthest_coordinate = coordinates[0]
+ farthest_coordinate_distance = __distance__(farthest_coordinate, centroid)
+
+ for coordinate in coordinates:
+ if __distance__(coordinate, centroid) > farthest_coordinate_distance:
+ farthest_coordinate = coordinate
+ farthest_coordinate_distance = __distance__(coordinate, centroid)
+
+ return farthest_coordinate
+
+
+def __mean_center__(coordinates):
+ """
+ Takes a list of coordinates and returns the mean center of the coordinates
+ """
+ return [sum([i[0] for i in coordinates]) / len(coordinates), sum([i[1] for i in coordinates]) / len(coordinates)]
+
+
def __distance__(coordinate1, coordinate2):
"""
Takes two coordinates and returns the distance between them