import folium import numpy as np import pandas as pd import requests from sklearn.cluster import KMeans def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2): # Cluster the coordinates kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) kmeans.fit(df['normalized_gps'].values.tolist()) df['cluster'] = kmeans.labels_ centroid_1 = list_to_string([centroids[0]]) centroid_2 = list_to_string([centroids[1]]) # Return the list of locations in each cluster route_1 = df[df['cluster'] == 0] route_2 = df[df['cluster'] == 1] route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), route_2['gps'].values.tolist(), centroid_1, centroid_2, end, time_diff, n) # Remove waypoints from the longest route until the trip time is less than the max time route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time) route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time) # Edit the dataframe to reflect the new coordinate clusters df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1 return df, route_1_coordinates, route_2_coordinates def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end, time_diff, n): """ Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference """ # Find the trip time for each route route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end) route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end) # Find the average trip time average_time = (route_1_time + route_2_time) / 2 # Define a list of all times and route coordinates times = [route_1_time, route_2_time] routes = [route_1_coordinates, route_2_coordinates] # Sort the routes by time sorted_indices = np.argsort(times) # If the difference of the longest trip time from average is greater than the time difference if times[sorted_indices[1]] - average_time > time_diff: # Move the closest coordinate(s) from the longest route to the shortest route for i in range(n): closest_coordinate = __move_coordinate__(routes[sorted_indices[1]], routes[sorted_indices[0]]) routes[sorted_indices[1]].remove(closest_coordinate) routes[sorted_indices[0]].append(closest_coordinate) # Recursively call the function return minimize_route_time_diff(routes[0], routes[1], route_1_start, route_2_start, end, time_diff, n) # If the difference of the longest trip time from average is less than the time difference, return the routes return routes[0], routes[1] # Create a function to minimize the time difference between three routes def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2): # Cluster the coordinates kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) # Fit the coordinates to the clusters kmeans.fit(df['normalized_gps'].values.tolist()) # Add the cluster labels to the dataframe df['cluster'] = kmeans.labels_ # Create centroid strings centroid_1 = list_to_string([centroids[0]]) centroid_2 = list_to_string([centroids[1]]) centroid_3 = list_to_string([centroids[2]]) # Return the list of locations in each cluster route_1 = df[df['cluster'] == 0] route_2 = df[df['cluster'] == 1] route_3 = df[df['cluster'] == 2] # Minimize the time difference between the routes route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3( route_1['gps'].values.tolist(), route_2['gps'].values.tolist(), route_3['gps'].values.tolist(), centroid_1, centroid_2, centroid_3, end, time_diff, n) # Remove waypoints from the longest route until the trip time is less than the max time route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time) route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time) route_3_coordinates = __remove_longest_waypoints__(route_3_coordinates, centroid_3, end, max_time) # Edit the dataframe to reflect the new coordinate clusters df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1 df.loc[df['gps'].astype(str).isin(map(str, route_3_coordinates)), 'cluster'] = 2 return df, route_1_coordinates, route_2_coordinates, route_3_coordinates def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3_coordinates, route_1_start, route_2_start, route_3_start, end, time_diff, n): """ Takes three routes and a time difference and returns routes that have time differences less than the time difference """ # Find the trip time for each route route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end) route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end) route_3_time = get_trip_time(list_to_string(route_3_coordinates), len(route_3_coordinates), route_3_start, end) # Find the average trip time average_time = (route_1_time + route_2_time + route_3_time) / 3 # Define a list of all times and route coordinates times = [route_1_time, route_2_time, route_3_time] routes = [route_1_coordinates, route_2_coordinates, route_3_coordinates] # Sort the routes by time sorted_indices = np.argsort(times) # If the difference of the longest trip time from average is greater than the time difference if times[sorted_indices[2]] - average_time > time_diff: # Move the closest coordinate(s) from the longest route to the shortest route for i in range(n): closest_coordinate = __move_coordinate__(routes[sorted_indices[2]], routes[sorted_indices[0]]) routes[sorted_indices[2]].remove(closest_coordinate) routes[sorted_indices[0]].append(closest_coordinate) # Recursively call the function return minimize_route_time_diff_3(routes[0], routes[1], routes[2], route_1_start, route_2_start, route_3_start, end, time_diff, n) # If the difference of the longest trip time from average is less than the time difference, return the routes return routes[0], routes[1], routes[2] def __remove_longest_waypoints__(route_coordinates, start, end, max_time): """ Takes a list of lists of coordinates and returns a list of lists of coordinates that is the same length as the original list but has a trip time less than the max time """ # Find the trip time for the route route_time = get_trip_time(list_to_string(route_coordinates), len(route_coordinates), start, end) # If the trip time is greater than the max time, remove the waypoint with the longest distance from the mean if route_time > max_time: route_mean = __mean_center__(route_coordinates) furthest_coordinate = __find_farthest_coordinate__(route_coordinates, route_mean) route_coordinates.remove(furthest_coordinate) return __remove_longest_waypoints__(route_coordinates, start, end, max_time) return route_coordinates def list_to_string(list_of_lists): """ Takes a list of lists of coordinates and returns a string of the coordinates """ string = '' for i in list_of_lists: string += str(i[1]) + ',' + str(i[0]) + ';' return string def create_json_df(coordinate_string, start, end): coordinates = requests.get( 'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last') coordinates = coordinates.json() # Create a dataframe from the JSON df = pd.DataFrame(coordinates['waypoints']) # Separate the location column into lon and lat columns df['lat'] = df['location'].apply(lambda x: x[0]) df['lon'] = df['location'].apply(lambda x: x[1]) df['waypoint_index'] = df['waypoint_index'].astype(int) # Map out the waypoints in order of the waypoint index df = df.sort_values(by=['waypoint_index']) return df def get_trip_time(coordinate_string, num_waypoints, start, end): """ Takes a list of lists of coordinates and returns the time of the trip in hours """ coordinates = requests.get( 'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last') coordinates = coordinates.json() travel_time_seconds = int(coordinates['trips'][0]['duration']) waypoint_time_seconds = num_waypoints * 90 total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600 return total_time_hours def normalize_gps(coordinates, centroids): """ Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids """ # Create a list of latitudes and longitudes latitudes = [i[0] for i in coordinates] longitudes = [i[1] for i in coordinates] # Find the minimum and maximum latitudes and longitudes min_lat = min(latitudes) max_lat = max(latitudes) min_lon = min(longitudes) max_lon = max(longitudes) # Normalize the coordinates and centroids using min-max normalization normalized_coordinates = [] normalized_centroids = [] for i in coordinates: normalized_coordinates.append( [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) for i in centroids: normalized_centroids.append( [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) return normalized_coordinates, normalized_centroids def __min_max_normalize__(value, min_value, max_value): """ Takes a value, min value, and max value and returns the normalized value """ return (value - min_value) / (max_value - min_value) # Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid def __move_coordinate__(larger_centroid_coordinates, smaller_centroid_coordinates): # Calculate the centroid of the smaller cluster smaller_centroid = __mean_center__(smaller_centroid_coordinates) # Find the coordinate in the larger cluster that is closest to the centroid of the smaller cluster closest_coordinate = __find_closest_coordinate__(larger_centroid_coordinates, smaller_centroid) return closest_coordinate def __find_closest_coordinate__(coordinates, centroid): """ Takes a list of coordinates and a centroid and returns the coordinate in the list that is closest to the centroid """ closest_coordinate = coordinates[0] closest_coordinate_distance = __distance__(closest_coordinate, centroid) for coordinate in coordinates: if __distance__(coordinate, centroid) < closest_coordinate_distance: closest_coordinate = coordinate closest_coordinate_distance = __distance__(coordinate, centroid) return closest_coordinate def __find_farthest_coordinate__(coordinates, centroid): """ Takes a list of coordinates and a centroid and returns the coordinate in the list that is furthest from the centroid """ farthest_coordinate = coordinates[0] farthest_coordinate_distance = __distance__(farthest_coordinate, centroid) for coordinate in coordinates: if __distance__(coordinate, centroid) > farthest_coordinate_distance: farthest_coordinate = coordinate farthest_coordinate_distance = __distance__(coordinate, centroid) return farthest_coordinate def __mean_center__(coordinates): """ Takes a list of coordinates and returns the mean center of the coordinates """ return [sum([i[0] for i in coordinates]) / len(coordinates), sum([i[1] for i in coordinates]) / len(coordinates)] def __distance__(coordinate1, coordinate2): """ Takes two coordinates and returns the distance between them """ return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2) ** 0.5