diff options
| author | itsGarrin <garrin.shieh@gmail.com> | 2023-11-07 18:15:55 -0500 |
|---|---|---|
| committer | itsGarrin <garrin.shieh@gmail.com> | 2023-11-07 18:15:55 -0500 |
| commit | 679044dadde06e7bfee642d6c1aac517dd4eee03 (patch) | |
| tree | f2914578612de86c5583875b859d030d5fa272f9 /utils.py | |
| parent | 0025654ae13215ed87830d542ae88924e084ba1c (diff) | |
Made the algorithm work for n bike teams
Diffstat (limited to 'utils.py')
| -rw-r--r-- | utils.py | 253 |
1 files changed, 82 insertions, 171 deletions
@@ -5,161 +5,30 @@ import requests from sklearn.cluster import KMeans -def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2): - # Cluster the coordinates - kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) - kmeans.fit(df['normalized_gps'].values.tolist()) - - df['cluster'] = kmeans.labels_ - centroid_1 = list_to_string([centroids[0]]) - centroid_2 = list_to_string([centroids[1]]) - - # Return the list of locations in each cluster - route_1 = df[df['cluster'] == 0] - route_2 = df[df['cluster'] == 1] - - route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), - route_2['gps'].values.tolist(), - centroid_1, centroid_2, end, time_diff, n) - - # Remove waypoints from the longest route until the trip time is less than the max time - route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time) - route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time) - - # Edit the dataframe to reflect the new coordinate clusters - df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 - df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1 - - return df, route_1_coordinates, route_2_coordinates - +def cluster_and_optimize(df, centroids, end, time_diff=0.25, max_time=24, n=2): + # Create a new column with normalized gps coordinates and centroids + df['normalized_gps'], norm_centroids = __normalize_gps(df['gps'].values.tolist(), centroids) -def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end, - time_diff, n): - """ - Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a - time difference that is less than the time difference - """ - # Find the trip time for each route - route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end) - route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end) - - # Find the average trip time - average_time = (route_1_time + route_2_time) / 2 - - # Define a list of all times and route coordinates - times = [route_1_time, route_2_time] - routes = [route_1_coordinates, route_2_coordinates] - - # Sort the routes by time - sorted_indices = np.argsort(times) - - # If the difference of the longest trip time from average is greater than the time difference - if times[sorted_indices[1]] - average_time > time_diff: - # Move the closest coordinate(s) from the longest route to the shortest route - for i in range(n): - closest_coordinate = __move_coordinate__(routes[sorted_indices[1]], routes[sorted_indices[0]]) - routes[sorted_indices[1]].remove(closest_coordinate) - routes[sorted_indices[0]].append(closest_coordinate) - - # Recursively call the function - return minimize_route_time_diff(routes[0], routes[1], route_1_start, route_2_start, end, time_diff, n) - - # If the difference of the longest trip time from average is less than the time difference, return the routes - return routes[0], routes[1] - - -# Create a function to minimize the time difference between three routes -def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2): # Cluster the coordinates kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) - - # Fit the coordinates to the clusters kmeans.fit(df['normalized_gps'].values.tolist()) - # Add the cluster labels to the dataframe df['cluster'] = kmeans.labels_ - # Create centroid strings - centroid_1 = list_to_string([centroids[0]]) - centroid_2 = list_to_string([centroids[1]]) - centroid_3 = list_to_string([centroids[2]]) + routes = [] + starts = [] + for i in range(len(centroids)): + routes.append(df[df['cluster'] == i]['gps'].values.tolist()) + starts.append(list_to_string([centroids[i]])) - # Return the list of locations in each cluster - route_1 = df[df['cluster'] == 0] - route_2 = df[df['cluster'] == 1] - route_3 = df[df['cluster'] == 2] - - # Minimize the time difference between the routes - route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3( - route_1['gps'].values.tolist(), route_2['gps'].values.tolist(), route_3['gps'].values.tolist(), - centroid_1, centroid_2, centroid_3, end, time_diff, n) + routes = __minimize_route_time_diff(routes, starts, end, time_diff, n) # Remove waypoints from the longest route until the trip time is less than the max time - route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time) - route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time) - route_3_coordinates = __remove_longest_waypoints__(route_3_coordinates, centroid_3, end, max_time) - - # Edit the dataframe to reflect the new coordinate clusters - df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 - df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1 - df.loc[df['gps'].astype(str).isin(map(str, route_3_coordinates)), 'cluster'] = 2 - - return df, route_1_coordinates, route_2_coordinates, route_3_coordinates - - -def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3_coordinates, - route_1_start, route_2_start, route_3_start, end, time_diff, n): - """ - Takes three routes and a time difference and returns routes that have time differences less than the time difference - """ - # Find the trip time for each route - route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end) - route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end) - route_3_time = get_trip_time(list_to_string(route_3_coordinates), len(route_3_coordinates), route_3_start, end) - - # Find the average trip time - average_time = (route_1_time + route_2_time + route_3_time) / 3 - - # Define a list of all times and route coordinates - times = [route_1_time, route_2_time, route_3_time] - routes = [route_1_coordinates, route_2_coordinates, route_3_coordinates] - - # Sort the routes by time - sorted_indices = np.argsort(times) - - # If the difference of the longest trip time from average is greater than the time difference - if times[sorted_indices[2]] - average_time > time_diff: - # Move the closest coordinate(s) from the longest route to the shortest route - for i in range(n): - closest_coordinate = __move_coordinate__(routes[sorted_indices[2]], routes[sorted_indices[0]]) - routes[sorted_indices[2]].remove(closest_coordinate) - routes[sorted_indices[0]].append(closest_coordinate) - - # Recursively call the function - return minimize_route_time_diff_3(routes[0], routes[1], routes[2], route_1_start, route_2_start, route_3_start, - end, time_diff, n) - - # If the difference of the longest trip time from average is less than the time difference, return the routes - return routes[0], routes[1], routes[2] - - -def __remove_longest_waypoints__(route_coordinates, start, end, max_time): - """ - Takes a list of lists of coordinates and returns a list of lists of coordinates that is the same length as the - original list but has a trip time less than the max time - """ - # Find the trip time for the route - route_time = get_trip_time(list_to_string(route_coordinates), len(route_coordinates), start, end) - - # If the trip time is greater than the max time, remove the waypoint with the longest distance from the mean - if route_time > max_time: - route_mean = __mean_center__(route_coordinates) - furthest_coordinate = __find_farthest_coordinate__(route_coordinates, route_mean) - route_coordinates.remove(furthest_coordinate) + for i in range(len(routes)): + routes[i] = __remove_longest_waypoints(routes[i], starts[i], end, max_time) + df.loc[df['gps'].astype(str).isin(map(str, routes[i])), 'cluster'] = i - return __remove_longest_waypoints__(route_coordinates, start, end, max_time) - - return route_coordinates + return df, routes def list_to_string(list_of_lists): @@ -193,7 +62,7 @@ def create_json_df(coordinate_string, start, end): return df -def get_trip_time(coordinate_string, num_waypoints, start, end): +def get_trip_time(coordinate_string, num_waypoints, start, end, time_per_waypoint=90): """ Takes a list of lists of coordinates and returns the time of the trip in hours """ @@ -202,14 +71,67 @@ def get_trip_time(coordinate_string, num_waypoints, start, end): coordinates = coordinates.json() travel_time_seconds = int(coordinates['trips'][0]['duration']) - waypoint_time_seconds = num_waypoints * 90 + waypoint_time_seconds = num_waypoints * time_per_waypoint total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600 return total_time_hours -def normalize_gps(coordinates, centroids): +def __minimize_route_time_diff(routes, starts, end, + time_diff, n): + """ + Takes two routes and a time difference and returns routes that have time differences less than the time difference + """ + times = [] + + for i, route in enumerate(routes): + times.append(get_trip_time(list_to_string(route), len(route), starts[i], end)) + + # Find the average trip time + average_time = np.mean(times) + + # Sort the trip times + sorted_indices = np.argsort(times) + + # Find the difference between the longest trip time and the average trip time + time_difference = times[sorted_indices[-1]] - average_time + + # If the difference is greater than the time difference, move a coordinate from the longest route to the shortest route + if time_difference > time_diff: + # Move a coordinate from the longest route to the shortest route + closest_coordinate = __find_closest_coordinate(routes[sorted_indices[-1]], + __mean_center(routes[sorted_indices[0]])) + routes[sorted_indices[0]].append(closest_coordinate) + routes[sorted_indices[-1]].remove(closest_coordinate) + + # Recursively minimize the time difference between the routes + return __minimize_route_time_diff(routes, starts, end, time_diff, n) + + # If the difference of the longest trip time from average is less than the time difference, return the routes + return routes + + +def __remove_longest_waypoints(route_coordinates, start, end, max_time): + """ + Takes a list of lists of coordinates and returns a list of lists of coordinates that is the same length as the + original list but has a trip time less than the max time + """ + # Find the trip time for the route + route_time = get_trip_time(list_to_string(route_coordinates), len(route_coordinates), start, end) + + # If the trip time is greater than the max time, remove the waypoint with the longest distance from the mean + if route_time > max_time: + route_mean = __mean_center(route_coordinates) + furthest_coordinate = __find_farthest_coordinate(route_coordinates, route_mean) + route_coordinates.remove(furthest_coordinate) + + return __remove_longest_waypoints(route_coordinates, start, end, max_time) + + return route_coordinates + + +def __normalize_gps(coordinates, centroids): """ Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids """ @@ -230,70 +152,59 @@ def normalize_gps(coordinates, centroids): for i in coordinates: normalized_coordinates.append( - [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) + [__min_max_normalize(i[0], min_lat, max_lat), __min_max_normalize(i[1], min_lon, max_lon)]) for i in centroids: normalized_centroids.append( - [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) + [__min_max_normalize(i[0], min_lat, max_lat), __min_max_normalize(i[1], min_lon, max_lon)]) return normalized_coordinates, normalized_centroids -def __min_max_normalize__(value, min_value, max_value): +def __min_max_normalize(value, min_value, max_value): """ Takes a value, min value, and max value and returns the normalized value """ return (value - min_value) / (max_value - min_value) -# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid -def __move_coordinate__(larger_centroid_coordinates, smaller_centroid_coordinates): - # Calculate the centroid of the smaller cluster - smaller_centroid = __mean_center__(smaller_centroid_coordinates) - - # Find the coordinate in the larger cluster that is closest to the centroid of the smaller cluster - closest_coordinate = __find_closest_coordinate__(larger_centroid_coordinates, smaller_centroid) - - return closest_coordinate - - -def __find_closest_coordinate__(coordinates, centroid): +def __find_closest_coordinate(coordinates, centroid): """ Takes a list of coordinates and a centroid and returns the coordinate in the list that is closest to the centroid """ closest_coordinate = coordinates[0] - closest_coordinate_distance = __distance__(closest_coordinate, centroid) + closest_coordinate_distance = __distance(closest_coordinate, centroid) for coordinate in coordinates: - if __distance__(coordinate, centroid) < closest_coordinate_distance: + if __distance(coordinate, centroid) < closest_coordinate_distance: closest_coordinate = coordinate - closest_coordinate_distance = __distance__(coordinate, centroid) + closest_coordinate_distance = __distance(coordinate, centroid) return closest_coordinate -def __find_farthest_coordinate__(coordinates, centroid): +def __find_farthest_coordinate(coordinates, centroid): """ Takes a list of coordinates and a centroid and returns the coordinate in the list that is furthest from the centroid """ farthest_coordinate = coordinates[0] - farthest_coordinate_distance = __distance__(farthest_coordinate, centroid) + farthest_coordinate_distance = __distance(farthest_coordinate, centroid) for coordinate in coordinates: - if __distance__(coordinate, centroid) > farthest_coordinate_distance: + if __distance(coordinate, centroid) > farthest_coordinate_distance: farthest_coordinate = coordinate - farthest_coordinate_distance = __distance__(coordinate, centroid) + farthest_coordinate_distance = __distance(coordinate, centroid) return farthest_coordinate -def __mean_center__(coordinates): +def __mean_center(coordinates): """ Takes a list of coordinates and returns the mean center of the coordinates """ return [sum([i[0] for i in coordinates]) / len(coordinates), sum([i[1] for i in coordinates]) / len(coordinates)] -def __distance__(coordinate1, coordinate2): +def __distance(coordinate1, coordinate2): """ Takes two coordinates and returns the distance between them """ |
