From 0025654ae13215ed87830d542ae88924e084ba1c Mon Sep 17 00:00:00 2001 From: itsGarrin Date: Tue, 7 Nov 2023 13:17:44 -0500 Subject: Added a max time parameter --- utils.py | 224 ++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 106 insertions(+), 118 deletions(-) (limited to 'utils.py') diff --git a/utils.py b/utils.py index 2898b3d..a4b789d 100644 --- a/utils.py +++ b/utils.py @@ -5,47 +5,26 @@ import requests from sklearn.cluster import KMeans -# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes -def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2): +def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2): # Cluster the coordinates kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) - - # Fit the coordinates to the clusters kmeans.fit(df['normalized_gps'].values.tolist()) - # Add the cluster labels to the dataframe df['cluster'] = kmeans.labels_ - - # Create centroid strings centroid_1 = list_to_string([centroids[0]]) centroid_2 = list_to_string([centroids[1]]) # Return the list of locations in each cluster route_1 = df[df['cluster'] == 0] - route_1_stops = len(route_1['gps'].values.tolist()) - route_1_str = list_to_string(route_1['gps'].values.tolist()) - route_2 = df[df['cluster'] == 1] - route_2_stops = len(route_2['gps'].values.tolist()) - route_2_str = list_to_string(route_2['gps'].values.tolist()) - - # Get the trip time for each route - trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end) - trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end) - - if minimize: - # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference - if abs(trip_hrs_1 - trip_hrs_2) > time_diff: - route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), - route_2['gps'].values.tolist(), - centroid_1, centroid_2, end, time_diff, - n=n) - else: - route_1_coordinates = route_1['gps'].values.tolist() - route_2_coordinates = route_2['gps'].values.tolist() - else: - route_1_coordinates = route_1['gps'].values.tolist() - route_2_coordinates = route_2['gps'].values.tolist() + + route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), + route_2['gps'].values.tolist(), + centroid_1, centroid_2, end, time_diff, n) + + # Remove waypoints from the longest route until the trip time is less than the max time + route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time) + route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time) # Edit the dataframe to reflect the new coordinate clusters df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 @@ -57,52 +36,40 @@ def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, minimi def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end, time_diff, n): """ - Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference + Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a + time difference that is less than the time difference """ - # Find the difference in time between the two routes - route_1_time = get_trip_time(list_to_string(route_1_coordinates), - len(route_1_coordinates), route_1_start, end) - route_2_time = get_trip_time(list_to_string(route_2_coordinates), - len(route_2_coordinates), route_2_start, end) - route_time_diff = abs(route_1_time - route_2_time) - - # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route - if route_time_diff > time_diff: - # Find which route is longer - if route_1_time > route_2_time: - longer_route = route_1_coordinates - shorter_route = route_2_coordinates - - for i in range(n): - # Move the closest coordinate from the longer route to the shorter route - closest_coordinate = move_coordinate(longer_route, shorter_route) - longer_route.remove(closest_coordinate) - shorter_route.append(closest_coordinate) - - # Recursively call the function - return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end, - time_diff, n) - - else: - longer_route = route_2_coordinates - shorter_route = route_1_coordinates - - for i in range(n): - # Move the closest coordinate from the longer route to the shorter route - closest_coordinate = move_coordinate(longer_route, shorter_route) - longer_route.remove(closest_coordinate) - shorter_route.append(closest_coordinate) - - # Recursively call the function - return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end, - time_diff, n) - - # If the difference in time is less than the time difference, return the routes - return route_1_coordinates, route_2_coordinates + # Find the trip time for each route + route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end) + route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end) + + # Find the average trip time + average_time = (route_1_time + route_2_time) / 2 + + # Define a list of all times and route coordinates + times = [route_1_time, route_2_time] + routes = [route_1_coordinates, route_2_coordinates] + + # Sort the routes by time + sorted_indices = np.argsort(times) + + # If the difference of the longest trip time from average is greater than the time difference + if times[sorted_indices[1]] - average_time > time_diff: + # Move the closest coordinate(s) from the longest route to the shortest route + for i in range(n): + closest_coordinate = __move_coordinate__(routes[sorted_indices[1]], routes[sorted_indices[0]]) + routes[sorted_indices[1]].remove(closest_coordinate) + routes[sorted_indices[0]].append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff(routes[0], routes[1], route_1_start, route_2_start, end, time_diff, n) + + # If the difference of the longest trip time from average is less than the time difference, return the routes + return routes[0], routes[1] # Create a function to minimize the time difference between three routes -def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2): +def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2): # Cluster the coordinates kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) @@ -119,46 +86,18 @@ def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, minimi # Return the list of locations in each cluster route_1 = df[df['cluster'] == 0] - route_1_stops = len(route_1['gps'].values.tolist()) - route_1_str = list_to_string(route_1['gps'].values.tolist()) - route_2 = df[df['cluster'] == 1] - route_2_stops = len(route_2['gps'].values.tolist()) - route_2_str = list_to_string(route_2['gps'].values.tolist()) - route_3 = df[df['cluster'] == 2] - route_3_stops = len(route_3['gps'].values.tolist()) - route_3_str = list_to_string(route_3['gps'].values.tolist()) - - # Get the trip time for each route - trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end) - trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end) - trip_hrs_3 = get_trip_time(route_3_str, route_3_stops, centroid_3, end) - average_time = (trip_hrs_1 + trip_hrs_2 + trip_hrs_3) / 3 + # Minimize the time difference between the routes + route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3( + route_1['gps'].values.tolist(), route_2['gps'].values.tolist(), route_3['gps'].values.tolist(), + centroid_1, centroid_2, centroid_3, end, time_diff, n) - times = [trip_hrs_1, trip_hrs_2, trip_hrs_3] - routes = [route_1_str, route_2_str, route_3_str] - - sorted_indices = np.argsort(times) - - if minimize: - # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference - if times[sorted_indices[2]] - average_time > time_diff: - route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3( - route_1['gps'].values.tolist(), - route_2['gps'].values.tolist(), - route_3['gps'].values.tolist(), - centroid_1, centroid_2, centroid_3, end, time_diff, - n=n) - else: - route_1_coordinates = route_1['gps'].values.tolist() - route_2_coordinates = route_2['gps'].values.tolist() - route_3_coordinates = route_3['gps'].values.tolist() - else: - route_1_coordinates = route_1['gps'].values.tolist() - route_2_coordinates = route_2['gps'].values.tolist() - route_3_coordinates = route_3['gps'].values.tolist() + # Remove waypoints from the longest route until the trip time is less than the max time + route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time) + route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time) + route_3_coordinates = __remove_longest_waypoints__(route_3_coordinates, centroid_3, end, max_time) # Edit the dataframe to reflect the new coordinate clusters df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 @@ -192,7 +131,7 @@ def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3 if times[sorted_indices[2]] - average_time > time_diff: # Move the closest coordinate(s) from the longest route to the shortest route for i in range(n): - closest_coordinate = move_coordinate(routes[sorted_indices[2]], routes[sorted_indices[0]]) + closest_coordinate = __move_coordinate__(routes[sorted_indices[2]], routes[sorted_indices[0]]) routes[sorted_indices[2]].remove(closest_coordinate) routes[sorted_indices[0]].append(closest_coordinate) @@ -204,6 +143,25 @@ def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3 return routes[0], routes[1], routes[2] +def __remove_longest_waypoints__(route_coordinates, start, end, max_time): + """ + Takes a list of lists of coordinates and returns a list of lists of coordinates that is the same length as the + original list but has a trip time less than the max time + """ + # Find the trip time for the route + route_time = get_trip_time(list_to_string(route_coordinates), len(route_coordinates), start, end) + + # If the trip time is greater than the max time, remove the waypoint with the longest distance from the mean + if route_time > max_time: + route_mean = __mean_center__(route_coordinates) + furthest_coordinate = __find_farthest_coordinate__(route_coordinates, route_mean) + route_coordinates.remove(furthest_coordinate) + + return __remove_longest_waypoints__(route_coordinates, start, end, max_time) + + return route_coordinates + + def list_to_string(list_of_lists): """ Takes a list of lists of coordinates and returns a string of the coordinates @@ -288,23 +246,53 @@ def __min_max_normalize__(value, min_value, max_value): # Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid -def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates): +def __move_coordinate__(larger_centroid_coordinates, smaller_centroid_coordinates): # Calculate the centroid of the smaller cluster - smaller_centroid = [sum([i[0] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates), - sum([i[1] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates)] + smaller_centroid = __mean_center__(smaller_centroid_coordinates) + + # Find the coordinate in the larger cluster that is closest to the centroid of the smaller cluster + closest_coordinate = __find_closest_coordinate__(larger_centroid_coordinates, smaller_centroid) + + return closest_coordinate + - # Find the coordinate in larger_centroid_coordinates that is closest to smaller_centroid - closest_coordinate = larger_centroid_coordinates[0] - closest_coordinate_distance = __distance__(closest_coordinate, smaller_centroid) +def __find_closest_coordinate__(coordinates, centroid): + """ + Takes a list of coordinates and a centroid and returns the coordinate in the list that is closest to the centroid + """ + closest_coordinate = coordinates[0] + closest_coordinate_distance = __distance__(closest_coordinate, centroid) - for coordinate in larger_centroid_coordinates: - if __distance__(coordinate, smaller_centroid) < closest_coordinate_distance: + for coordinate in coordinates: + if __distance__(coordinate, centroid) < closest_coordinate_distance: closest_coordinate = coordinate - closest_coordinate_distance = __distance__(coordinate, smaller_centroid) + closest_coordinate_distance = __distance__(coordinate, centroid) return closest_coordinate +def __find_farthest_coordinate__(coordinates, centroid): + """ + Takes a list of coordinates and a centroid and returns the coordinate in the list that is furthest from the centroid + """ + farthest_coordinate = coordinates[0] + farthest_coordinate_distance = __distance__(farthest_coordinate, centroid) + + for coordinate in coordinates: + if __distance__(coordinate, centroid) > farthest_coordinate_distance: + farthest_coordinate = coordinate + farthest_coordinate_distance = __distance__(coordinate, centroid) + + return farthest_coordinate + + +def __mean_center__(coordinates): + """ + Takes a list of coordinates and returns the mean center of the coordinates + """ + return [sum([i[0] for i in coordinates]) / len(coordinates), sum([i[1] for i in coordinates]) / len(coordinates)] + + def __distance__(coordinate1, coordinate2): """ Takes two coordinates and returns the distance between them -- cgit v1.2.3