From 65021e67cd575df51e31857ff7559fcaad9f588e Mon Sep 17 00:00:00 2001 From: itsGarrin Date: Mon, 6 Nov 2023 20:28:53 -0500 Subject: Finished 3 route algorithm --- utils.py | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 132 insertions(+), 19 deletions(-) (limited to 'utils.py') diff --git a/utils.py b/utils.py index e0cc295..2898b3d 100644 --- a/utils.py +++ b/utils.py @@ -1,11 +1,12 @@ import folium +import numpy as np import pandas as pd import requests from sklearn.cluster import KMeans # Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes -def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff): +def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2): # Cluster the coordinates kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) @@ -32,11 +33,16 @@ def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff): trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end) trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end) - # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference - if abs(trip_hrs_1 - trip_hrs_2) > time_diff: - route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), - route_2['gps'].values.tolist(), - centroid_1, centroid_2, end, time_diff) + if minimize: + # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference + if abs(trip_hrs_1 - trip_hrs_2) > time_diff: + route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), + route_2['gps'].values.tolist(), + centroid_1, centroid_2, end, time_diff, + n=n) + else: + route_1_coordinates = route_1['gps'].values.tolist() + route_2_coordinates = route_2['gps'].values.tolist() else: route_1_coordinates = route_1['gps'].values.tolist() route_2_coordinates = route_2['gps'].values.tolist() @@ -49,7 +55,7 @@ def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff): def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end, - time_diff): + time_diff, n): """ Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference """ @@ -63,34 +69,141 @@ def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_s # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route if route_time_diff > time_diff: # Find which route is longer - if len(route_1_coordinates) > len(route_2_coordinates): + if route_1_time > route_2_time: longer_route = route_1_coordinates shorter_route = route_2_coordinates - # Move the closest coordinate from the longer route to the shorter route - closest_coordinate = move_coordinate(longer_route, shorter_route) - longer_route.remove(closest_coordinate) - shorter_route.append(closest_coordinate) + for i in range(n): + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) # Recursively call the function - return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end, time_diff) + return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end, + time_diff, n) else: longer_route = route_2_coordinates shorter_route = route_1_coordinates - # Move the closest coordinate from the longer route to the shorter route - closest_coordinate = move_coordinate(longer_route, shorter_route) - longer_route.remove(closest_coordinate) - shorter_route.append(closest_coordinate) + for i in range(n): + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) # Recursively call the function - return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end, time_diff) + return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end, + time_diff, n) # If the difference in time is less than the time difference, return the routes return route_1_coordinates, route_2_coordinates +# Create a function to minimize the time difference between three routes +def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, minimize=True, n=2): + # Cluster the coordinates + kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) + + # Fit the coordinates to the clusters + kmeans.fit(df['normalized_gps'].values.tolist()) + + # Add the cluster labels to the dataframe + df['cluster'] = kmeans.labels_ + + # Create centroid strings + centroid_1 = list_to_string([centroids[0]]) + centroid_2 = list_to_string([centroids[1]]) + centroid_3 = list_to_string([centroids[2]]) + + # Return the list of locations in each cluster + route_1 = df[df['cluster'] == 0] + route_1_stops = len(route_1['gps'].values.tolist()) + route_1_str = list_to_string(route_1['gps'].values.tolist()) + + route_2 = df[df['cluster'] == 1] + route_2_stops = len(route_2['gps'].values.tolist()) + route_2_str = list_to_string(route_2['gps'].values.tolist()) + + route_3 = df[df['cluster'] == 2] + route_3_stops = len(route_3['gps'].values.tolist()) + route_3_str = list_to_string(route_3['gps'].values.tolist()) + + # Get the trip time for each route + trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end) + trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end) + trip_hrs_3 = get_trip_time(route_3_str, route_3_stops, centroid_3, end) + + average_time = (trip_hrs_1 + trip_hrs_2 + trip_hrs_3) / 3 + + times = [trip_hrs_1, trip_hrs_2, trip_hrs_3] + routes = [route_1_str, route_2_str, route_3_str] + + sorted_indices = np.argsort(times) + + if minimize: + # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference + if times[sorted_indices[2]] - average_time > time_diff: + route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3( + route_1['gps'].values.tolist(), + route_2['gps'].values.tolist(), + route_3['gps'].values.tolist(), + centroid_1, centroid_2, centroid_3, end, time_diff, + n=n) + else: + route_1_coordinates = route_1['gps'].values.tolist() + route_2_coordinates = route_2['gps'].values.tolist() + route_3_coordinates = route_3['gps'].values.tolist() + else: + route_1_coordinates = route_1['gps'].values.tolist() + route_2_coordinates = route_2['gps'].values.tolist() + route_3_coordinates = route_3['gps'].values.tolist() + + # Edit the dataframe to reflect the new coordinate clusters + df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 + df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1 + df.loc[df['gps'].astype(str).isin(map(str, route_3_coordinates)), 'cluster'] = 2 + + return df, route_1_coordinates, route_2_coordinates, route_3_coordinates + + +def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3_coordinates, + route_1_start, route_2_start, route_3_start, end, time_diff, n): + """ + Takes three routes and a time difference and returns routes that have time differences less than the time difference + """ + # Find the trip time for each route + route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end) + route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end) + route_3_time = get_trip_time(list_to_string(route_3_coordinates), len(route_3_coordinates), route_3_start, end) + + # Find the average trip time + average_time = (route_1_time + route_2_time + route_3_time) / 3 + + # Define a list of all times and route coordinates + times = [route_1_time, route_2_time, route_3_time] + routes = [route_1_coordinates, route_2_coordinates, route_3_coordinates] + + # Sort the routes by time + sorted_indices = np.argsort(times) + + # If the difference of the longest trip time from average is greater than the time difference + if times[sorted_indices[2]] - average_time > time_diff: + # Move the closest coordinate(s) from the longest route to the shortest route + for i in range(n): + closest_coordinate = move_coordinate(routes[sorted_indices[2]], routes[sorted_indices[0]]) + routes[sorted_indices[2]].remove(closest_coordinate) + routes[sorted_indices[0]].append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff_3(routes[0], routes[1], routes[2], route_1_start, route_2_start, route_3_start, + end, time_diff, n) + + # If the difference of the longest trip time from average is less than the time difference, return the routes + return routes[0], routes[1], routes[2] + + def list_to_string(list_of_lists): """ Takes a list of lists of coordinates and returns a string of the coordinates @@ -131,7 +244,7 @@ def get_trip_time(coordinate_string, num_waypoints, start, end): coordinates = coordinates.json() travel_time_seconds = int(coordinates['trips'][0]['duration']) - waypoint_time_seconds = num_waypoints * 60 + waypoint_time_seconds = num_waypoints * 90 total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600 -- cgit v1.2.3