summaryrefslogtreecommitdiff
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rw-r--r--utils.py161
1 files changed, 158 insertions, 3 deletions
diff --git a/utils.py b/utils.py
index 880dd2a..7f6a408 100644
--- a/utils.py
+++ b/utils.py
@@ -1,9 +1,53 @@
import folium
import pandas as pd
import requests
+from sklearn.cluster import KMeans
+
+
+# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes
+def cluster_and_minimize(df, centroids, norm_centroids, time_diff):
+ # Cluster the coordinates
+ kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
+
+ # Fit the coordinates to the clusters
+ kmeans.fit(df['normalized_gps'].values.tolist())
+
+ # Add the cluster labels to the dataframe
+ df['cluster'] = kmeans.labels_
+
+ # Create centroid strings
+ centroid_1 = list_to_string([centroids[0]]) + ';'
+ centroid_2 = list_to_string([centroids[1]]) + ';'
+
+ # Return the list of locations in each cluster
+ route_1 = df[df['cluster'] == 0]
+ route_1_stops = len(route_1['gps'].values.tolist())
+ route_1_str = list_to_string(route_1['gps'].values.tolist())
+
+ route_2 = df[df['cluster'] == 1]
+ route_2_stops = len(route_2['gps'].values.tolist())
+ route_2_str = list_to_string(route_2['gps'].values.tolist())
+
+ # Get the trip time for each route
+ trip_hrs_1 = get_trip_time(centroid_1 + route_1_str, route_1_stops)
+ trip_hrs_2 = get_trip_time(centroid_2 + route_2_str, route_2_stops)
+
+ # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
+ if abs(trip_hrs_1 - trip_hrs_2) > time_diff:
+ route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
+ route_2['gps'].values.tolist(),
+ centroid_1, centroid_2, time_diff)
+ else:
+ route_1_coordinates = route_1['gps'].values.tolist()
+ route_2_coordinates = route_2['gps'].values.tolist()
+
+ # Edit the dataframe to reflect the new coordinate clusters
+ df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
+ df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1
+
+ return df, route_1_coordinates, route_2_coordinates
-# make a function that turns a list of lists of coordinates into a string
def list_to_string(list_of_lists):
"""
Takes a list of lists of coordinates and returns a string of the coordinates
@@ -11,6 +55,8 @@ def list_to_string(list_of_lists):
string = ''
for i in list_of_lists:
string += str(i[1]) + ',' + str(i[0]) + ';'
+
+ string = string[:-1]
return string
@@ -33,11 +79,120 @@ def create_json_df(coordinate_string):
return df
-def get_trip_time(coordinate_string):
+def get_trip_time(coordinate_string, num_waypoints):
"""
Takes a list of lists of coordinates and returns the time of the trip in hours
"""
coordinates = requests.get('http://acetyl.net:5000/trip/v1/bike/' + coordinate_string)
coordinates = coordinates.json()
- return int(coordinates['trips'][0]['duration']) / 3600
+ travel_time_seconds = int(coordinates['trips'][0]['duration'])
+ waypoint_time_seconds = num_waypoints * 60
+
+ total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600
+
+ return total_time_hours
+
+
+def normalize_gps(coordinates, centroids):
+ """
+ Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids
+ """
+
+ # Create a list of latitudes and longitudes
+ latitudes = [i[0] for i in coordinates]
+ longitudes = [i[1] for i in coordinates]
+
+ # Find the minimum and maximum latitudes and longitudes
+ min_lat = min(latitudes)
+ max_lat = max(latitudes)
+ min_lon = min(longitudes)
+ max_lon = max(longitudes)
+
+ # Normalize the coordinates and centroids using min-max normalization
+ normalized_coordinates = []
+ normalized_centroids = []
+
+ for i in coordinates:
+ normalized_coordinates.append(
+ [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])
+ for i in centroids:
+ normalized_centroids.append(
+ [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])
+
+ return normalized_coordinates, normalized_centroids
+
+
+def __min_max_normalize__(value, min_value, max_value):
+ """
+ Takes a value, min value, and max value and returns the normalized value
+ """
+ return (value - min_value) / (max_value - min_value)
+
+
+def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start,
+ time_diff):
+ """
+ Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference
+ """
+ # Find the difference in time between the two routes
+ route_1_time = get_trip_time(route_1_start + list_to_string(route_1_coordinates),
+ len(route_1_coordinates))
+ route_2_time = get_trip_time(route_2_start + list_to_string(route_2_coordinates),
+ len(route_2_coordinates))
+ route_time_diff = abs(route_1_time - route_2_time)
+
+ # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route
+ if route_time_diff > time_diff:
+ # Find which route is longer
+ if len(route_1_coordinates) > len(route_2_coordinates):
+ longer_route = route_1_coordinates
+ shorter_route = route_2_coordinates
+
+ # Move the closest coordinate from the longer route to the shorter route
+ closest_coordinate = move_coordinate(longer_route, shorter_route)
+ longer_route.remove(closest_coordinate)
+ shorter_route.append(closest_coordinate)
+
+ # Recursively call the function
+ return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, time_diff)
+
+ else:
+ longer_route = route_2_coordinates
+ shorter_route = route_1_coordinates
+
+ # Move the closest coordinate from the longer route to the shorter route
+ closest_coordinate = move_coordinate(longer_route, shorter_route)
+ longer_route.remove(closest_coordinate)
+ shorter_route.append(closest_coordinate)
+
+ # Recursively call the function
+ return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, time_diff)
+
+ # If the difference in time is less than the time difference, return the routes
+ return route_1_coordinates, route_2_coordinates
+
+
+# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid
+def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates):
+ # Calculate the centroid of the smaller cluster
+ smaller_centroid = [sum([i[0] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates),
+ sum([i[1] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates)]
+
+ # Find the coordinate in larger_centroid_coordinates that is closest to smaller_centroid
+ closest_coordinate = larger_centroid_coordinates[0]
+ closest_coordinate_distance = __distance__(closest_coordinate, smaller_centroid)
+
+ for coordinate in larger_centroid_coordinates:
+ if __distance__(coordinate, smaller_centroid) < closest_coordinate_distance:
+ closest_coordinate = coordinate
+ closest_coordinate_distance = __distance__(coordinate, smaller_centroid)
+
+ return closest_coordinate
+
+
+def __distance__(coordinate1, coordinate2):
+ """
+ Takes two coordinates and returns the distance between them
+ """
+ return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2) ** 0.5