From 8a9c5cc9cd62425d7f6d4571429f1c7a0e85cf51 Mon Sep 17 00:00:00 2001 From: itsGarrin Date: Mon, 6 Nov 2023 13:52:30 -0500 Subject: Added route minimization for 2 routes. --- utils.py | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 158 insertions(+), 3 deletions(-) (limited to 'utils.py') diff --git a/utils.py b/utils.py index 880dd2a..7f6a408 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,53 @@ import folium import pandas as pd import requests +from sklearn.cluster import KMeans + + +# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes +def cluster_and_minimize(df, centroids, norm_centroids, time_diff): + # Cluster the coordinates + kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) + + # Fit the coordinates to the clusters + kmeans.fit(df['normalized_gps'].values.tolist()) + + # Add the cluster labels to the dataframe + df['cluster'] = kmeans.labels_ + + # Create centroid strings + centroid_1 = list_to_string([centroids[0]]) + ';' + centroid_2 = list_to_string([centroids[1]]) + ';' + + # Return the list of locations in each cluster + route_1 = df[df['cluster'] == 0] + route_1_stops = len(route_1['gps'].values.tolist()) + route_1_str = list_to_string(route_1['gps'].values.tolist()) + + route_2 = df[df['cluster'] == 1] + route_2_stops = len(route_2['gps'].values.tolist()) + route_2_str = list_to_string(route_2['gps'].values.tolist()) + + # Get the trip time for each route + trip_hrs_1 = get_trip_time(centroid_1 + route_1_str, route_1_stops) + trip_hrs_2 = get_trip_time(centroid_2 + route_2_str, route_2_stops) + + # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference + if abs(trip_hrs_1 - trip_hrs_2) > time_diff: + route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), + route_2['gps'].values.tolist(), + centroid_1, centroid_2, time_diff) + else: + route_1_coordinates = route_1['gps'].values.tolist() + route_2_coordinates = route_2['gps'].values.tolist() + + # Edit the dataframe to reflect the new coordinate clusters + df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 + df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1 + + return df, route_1_coordinates, route_2_coordinates -# make a function that turns a list of lists of coordinates into a string def list_to_string(list_of_lists): """ Takes a list of lists of coordinates and returns a string of the coordinates @@ -11,6 +55,8 @@ def list_to_string(list_of_lists): string = '' for i in list_of_lists: string += str(i[1]) + ',' + str(i[0]) + ';' + + string = string[:-1] return string @@ -33,11 +79,120 @@ def create_json_df(coordinate_string): return df -def get_trip_time(coordinate_string): +def get_trip_time(coordinate_string, num_waypoints): """ Takes a list of lists of coordinates and returns the time of the trip in hours """ coordinates = requests.get('http://acetyl.net:5000/trip/v1/bike/' + coordinate_string) coordinates = coordinates.json() - return int(coordinates['trips'][0]['duration']) / 3600 + travel_time_seconds = int(coordinates['trips'][0]['duration']) + waypoint_time_seconds = num_waypoints * 60 + + total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600 + + return total_time_hours + + +def normalize_gps(coordinates, centroids): + """ + Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids + """ + + # Create a list of latitudes and longitudes + latitudes = [i[0] for i in coordinates] + longitudes = [i[1] for i in coordinates] + + # Find the minimum and maximum latitudes and longitudes + min_lat = min(latitudes) + max_lat = max(latitudes) + min_lon = min(longitudes) + max_lon = max(longitudes) + + # Normalize the coordinates and centroids using min-max normalization + normalized_coordinates = [] + normalized_centroids = [] + + for i in coordinates: + normalized_coordinates.append( + [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) + for i in centroids: + normalized_centroids.append( + [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) + + return normalized_coordinates, normalized_centroids + + +def __min_max_normalize__(value, min_value, max_value): + """ + Takes a value, min value, and max value and returns the normalized value + """ + return (value - min_value) / (max_value - min_value) + + +def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, + time_diff): + """ + Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference + """ + # Find the difference in time between the two routes + route_1_time = get_trip_time(route_1_start + list_to_string(route_1_coordinates), + len(route_1_coordinates)) + route_2_time = get_trip_time(route_2_start + list_to_string(route_2_coordinates), + len(route_2_coordinates)) + route_time_diff = abs(route_1_time - route_2_time) + + # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route + if route_time_diff > time_diff: + # Find which route is longer + if len(route_1_coordinates) > len(route_2_coordinates): + longer_route = route_1_coordinates + shorter_route = route_2_coordinates + + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, time_diff) + + else: + longer_route = route_2_coordinates + shorter_route = route_1_coordinates + + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, time_diff) + + # If the difference in time is less than the time difference, return the routes + return route_1_coordinates, route_2_coordinates + + +# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid +def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates): + # Calculate the centroid of the smaller cluster + smaller_centroid = [sum([i[0] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates), + sum([i[1] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates)] + + # Find the coordinate in larger_centroid_coordinates that is closest to smaller_centroid + closest_coordinate = larger_centroid_coordinates[0] + closest_coordinate_distance = __distance__(closest_coordinate, smaller_centroid) + + for coordinate in larger_centroid_coordinates: + if __distance__(coordinate, smaller_centroid) < closest_coordinate_distance: + closest_coordinate = coordinate + closest_coordinate_distance = __distance__(coordinate, smaller_centroid) + + return closest_coordinate + + +def __distance__(coordinate1, coordinate2): + """ + Takes two coordinates and returns the distance between them + """ + return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2) ** 0.5 -- cgit v1.2.3