summaryrefslogtreecommitdiff
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rw-r--r--utils.py253
1 files changed, 82 insertions, 171 deletions
diff --git a/utils.py b/utils.py
index a4b789d..aeaf7ab 100644
--- a/utils.py
+++ b/utils.py
@@ -5,161 +5,30 @@ import requests
from sklearn.cluster import KMeans
-def cluster_and_minimize_2(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2):
- # Cluster the coordinates
- kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
- kmeans.fit(df['normalized_gps'].values.tolist())
-
- df['cluster'] = kmeans.labels_
- centroid_1 = list_to_string([centroids[0]])
- centroid_2 = list_to_string([centroids[1]])
-
- # Return the list of locations in each cluster
- route_1 = df[df['cluster'] == 0]
- route_2 = df[df['cluster'] == 1]
-
- route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
- route_2['gps'].values.tolist(),
- centroid_1, centroid_2, end, time_diff, n)
-
- # Remove waypoints from the longest route until the trip time is less than the max time
- route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time)
- route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time)
-
- # Edit the dataframe to reflect the new coordinate clusters
- df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
- df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1
-
- return df, route_1_coordinates, route_2_coordinates
-
+def cluster_and_optimize(df, centroids, end, time_diff=0.25, max_time=24, n=2):
+ # Create a new column with normalized gps coordinates and centroids
+ df['normalized_gps'], norm_centroids = __normalize_gps(df['gps'].values.tolist(), centroids)
-def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end,
- time_diff, n):
- """
- Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a
- time difference that is less than the time difference
- """
- # Find the trip time for each route
- route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end)
- route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end)
-
- # Find the average trip time
- average_time = (route_1_time + route_2_time) / 2
-
- # Define a list of all times and route coordinates
- times = [route_1_time, route_2_time]
- routes = [route_1_coordinates, route_2_coordinates]
-
- # Sort the routes by time
- sorted_indices = np.argsort(times)
-
- # If the difference of the longest trip time from average is greater than the time difference
- if times[sorted_indices[1]] - average_time > time_diff:
- # Move the closest coordinate(s) from the longest route to the shortest route
- for i in range(n):
- closest_coordinate = __move_coordinate__(routes[sorted_indices[1]], routes[sorted_indices[0]])
- routes[sorted_indices[1]].remove(closest_coordinate)
- routes[sorted_indices[0]].append(closest_coordinate)
-
- # Recursively call the function
- return minimize_route_time_diff(routes[0], routes[1], route_1_start, route_2_start, end, time_diff, n)
-
- # If the difference of the longest trip time from average is less than the time difference, return the routes
- return routes[0], routes[1]
-
-
-# Create a function to minimize the time difference between three routes
-def cluster_and_minimize_3(df, centroids, norm_centroids, end, time_diff, max_time=24, n=2):
# Cluster the coordinates
kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
-
- # Fit the coordinates to the clusters
kmeans.fit(df['normalized_gps'].values.tolist())
- # Add the cluster labels to the dataframe
df['cluster'] = kmeans.labels_
- # Create centroid strings
- centroid_1 = list_to_string([centroids[0]])
- centroid_2 = list_to_string([centroids[1]])
- centroid_3 = list_to_string([centroids[2]])
+ routes = []
+ starts = []
+ for i in range(len(centroids)):
+ routes.append(df[df['cluster'] == i]['gps'].values.tolist())
+ starts.append(list_to_string([centroids[i]]))
- # Return the list of locations in each cluster
- route_1 = df[df['cluster'] == 0]
- route_2 = df[df['cluster'] == 1]
- route_3 = df[df['cluster'] == 2]
-
- # Minimize the time difference between the routes
- route_1_coordinates, route_2_coordinates, route_3_coordinates = minimize_route_time_diff_3(
- route_1['gps'].values.tolist(), route_2['gps'].values.tolist(), route_3['gps'].values.tolist(),
- centroid_1, centroid_2, centroid_3, end, time_diff, n)
+ routes = __minimize_route_time_diff(routes, starts, end, time_diff, n)
# Remove waypoints from the longest route until the trip time is less than the max time
- route_1_coordinates = __remove_longest_waypoints__(route_1_coordinates, centroid_1, end, max_time)
- route_2_coordinates = __remove_longest_waypoints__(route_2_coordinates, centroid_2, end, max_time)
- route_3_coordinates = __remove_longest_waypoints__(route_3_coordinates, centroid_3, end, max_time)
-
- # Edit the dataframe to reflect the new coordinate clusters
- df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
- df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1
- df.loc[df['gps'].astype(str).isin(map(str, route_3_coordinates)), 'cluster'] = 2
-
- return df, route_1_coordinates, route_2_coordinates, route_3_coordinates
-
-
-def minimize_route_time_diff_3(route_1_coordinates, route_2_coordinates, route_3_coordinates,
- route_1_start, route_2_start, route_3_start, end, time_diff, n):
- """
- Takes three routes and a time difference and returns routes that have time differences less than the time difference
- """
- # Find the trip time for each route
- route_1_time = get_trip_time(list_to_string(route_1_coordinates), len(route_1_coordinates), route_1_start, end)
- route_2_time = get_trip_time(list_to_string(route_2_coordinates), len(route_2_coordinates), route_2_start, end)
- route_3_time = get_trip_time(list_to_string(route_3_coordinates), len(route_3_coordinates), route_3_start, end)
-
- # Find the average trip time
- average_time = (route_1_time + route_2_time + route_3_time) / 3
-
- # Define a list of all times and route coordinates
- times = [route_1_time, route_2_time, route_3_time]
- routes = [route_1_coordinates, route_2_coordinates, route_3_coordinates]
-
- # Sort the routes by time
- sorted_indices = np.argsort(times)
-
- # If the difference of the longest trip time from average is greater than the time difference
- if times[sorted_indices[2]] - average_time > time_diff:
- # Move the closest coordinate(s) from the longest route to the shortest route
- for i in range(n):
- closest_coordinate = __move_coordinate__(routes[sorted_indices[2]], routes[sorted_indices[0]])
- routes[sorted_indices[2]].remove(closest_coordinate)
- routes[sorted_indices[0]].append(closest_coordinate)
-
- # Recursively call the function
- return minimize_route_time_diff_3(routes[0], routes[1], routes[2], route_1_start, route_2_start, route_3_start,
- end, time_diff, n)
-
- # If the difference of the longest trip time from average is less than the time difference, return the routes
- return routes[0], routes[1], routes[2]
-
-
-def __remove_longest_waypoints__(route_coordinates, start, end, max_time):
- """
- Takes a list of lists of coordinates and returns a list of lists of coordinates that is the same length as the
- original list but has a trip time less than the max time
- """
- # Find the trip time for the route
- route_time = get_trip_time(list_to_string(route_coordinates), len(route_coordinates), start, end)
-
- # If the trip time is greater than the max time, remove the waypoint with the longest distance from the mean
- if route_time > max_time:
- route_mean = __mean_center__(route_coordinates)
- furthest_coordinate = __find_farthest_coordinate__(route_coordinates, route_mean)
- route_coordinates.remove(furthest_coordinate)
+ for i in range(len(routes)):
+ routes[i] = __remove_longest_waypoints(routes[i], starts[i], end, max_time)
+ df.loc[df['gps'].astype(str).isin(map(str, routes[i])), 'cluster'] = i
- return __remove_longest_waypoints__(route_coordinates, start, end, max_time)
-
- return route_coordinates
+ return df, routes
def list_to_string(list_of_lists):
@@ -193,7 +62,7 @@ def create_json_df(coordinate_string, start, end):
return df
-def get_trip_time(coordinate_string, num_waypoints, start, end):
+def get_trip_time(coordinate_string, num_waypoints, start, end, time_per_waypoint=90):
"""
Takes a list of lists of coordinates and returns the time of the trip in hours
"""
@@ -202,14 +71,67 @@ def get_trip_time(coordinate_string, num_waypoints, start, end):
coordinates = coordinates.json()
travel_time_seconds = int(coordinates['trips'][0]['duration'])
- waypoint_time_seconds = num_waypoints * 90
+ waypoint_time_seconds = num_waypoints * time_per_waypoint
total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600
return total_time_hours
-def normalize_gps(coordinates, centroids):
+def __minimize_route_time_diff(routes, starts, end,
+ time_diff, n):
+ """
+ Takes two routes and a time difference and returns routes that have time differences less than the time difference
+ """
+ times = []
+
+ for i, route in enumerate(routes):
+ times.append(get_trip_time(list_to_string(route), len(route), starts[i], end))
+
+ # Find the average trip time
+ average_time = np.mean(times)
+
+ # Sort the trip times
+ sorted_indices = np.argsort(times)
+
+ # Find the difference between the longest trip time and the average trip time
+ time_difference = times[sorted_indices[-1]] - average_time
+
+ # If the difference is greater than the time difference, move a coordinate from the longest route to the shortest route
+ if time_difference > time_diff:
+ # Move a coordinate from the longest route to the shortest route
+ closest_coordinate = __find_closest_coordinate(routes[sorted_indices[-1]],
+ __mean_center(routes[sorted_indices[0]]))
+ routes[sorted_indices[0]].append(closest_coordinate)
+ routes[sorted_indices[-1]].remove(closest_coordinate)
+
+ # Recursively minimize the time difference between the routes
+ return __minimize_route_time_diff(routes, starts, end, time_diff, n)
+
+ # If the difference of the longest trip time from average is less than the time difference, return the routes
+ return routes
+
+
+def __remove_longest_waypoints(route_coordinates, start, end, max_time):
+ """
+ Takes a list of lists of coordinates and returns a list of lists of coordinates that is the same length as the
+ original list but has a trip time less than the max time
+ """
+ # Find the trip time for the route
+ route_time = get_trip_time(list_to_string(route_coordinates), len(route_coordinates), start, end)
+
+ # If the trip time is greater than the max time, remove the waypoint with the longest distance from the mean
+ if route_time > max_time:
+ route_mean = __mean_center(route_coordinates)
+ furthest_coordinate = __find_farthest_coordinate(route_coordinates, route_mean)
+ route_coordinates.remove(furthest_coordinate)
+
+ return __remove_longest_waypoints(route_coordinates, start, end, max_time)
+
+ return route_coordinates
+
+
+def __normalize_gps(coordinates, centroids):
"""
Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids
"""
@@ -230,70 +152,59 @@ def normalize_gps(coordinates, centroids):
for i in coordinates:
normalized_coordinates.append(
- [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])
+ [__min_max_normalize(i[0], min_lat, max_lat), __min_max_normalize(i[1], min_lon, max_lon)])
for i in centroids:
normalized_centroids.append(
- [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])
+ [__min_max_normalize(i[0], min_lat, max_lat), __min_max_normalize(i[1], min_lon, max_lon)])
return normalized_coordinates, normalized_centroids
-def __min_max_normalize__(value, min_value, max_value):
+def __min_max_normalize(value, min_value, max_value):
"""
Takes a value, min value, and max value and returns the normalized value
"""
return (value - min_value) / (max_value - min_value)
-# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid
-def __move_coordinate__(larger_centroid_coordinates, smaller_centroid_coordinates):
- # Calculate the centroid of the smaller cluster
- smaller_centroid = __mean_center__(smaller_centroid_coordinates)
-
- # Find the coordinate in the larger cluster that is closest to the centroid of the smaller cluster
- closest_coordinate = __find_closest_coordinate__(larger_centroid_coordinates, smaller_centroid)
-
- return closest_coordinate
-
-
-def __find_closest_coordinate__(coordinates, centroid):
+def __find_closest_coordinate(coordinates, centroid):
"""
Takes a list of coordinates and a centroid and returns the coordinate in the list that is closest to the centroid
"""
closest_coordinate = coordinates[0]
- closest_coordinate_distance = __distance__(closest_coordinate, centroid)
+ closest_coordinate_distance = __distance(closest_coordinate, centroid)
for coordinate in coordinates:
- if __distance__(coordinate, centroid) < closest_coordinate_distance:
+ if __distance(coordinate, centroid) < closest_coordinate_distance:
closest_coordinate = coordinate
- closest_coordinate_distance = __distance__(coordinate, centroid)
+ closest_coordinate_distance = __distance(coordinate, centroid)
return closest_coordinate
-def __find_farthest_coordinate__(coordinates, centroid):
+def __find_farthest_coordinate(coordinates, centroid):
"""
Takes a list of coordinates and a centroid and returns the coordinate in the list that is furthest from the centroid
"""
farthest_coordinate = coordinates[0]
- farthest_coordinate_distance = __distance__(farthest_coordinate, centroid)
+ farthest_coordinate_distance = __distance(farthest_coordinate, centroid)
for coordinate in coordinates:
- if __distance__(coordinate, centroid) > farthest_coordinate_distance:
+ if __distance(coordinate, centroid) > farthest_coordinate_distance:
farthest_coordinate = coordinate
- farthest_coordinate_distance = __distance__(coordinate, centroid)
+ farthest_coordinate_distance = __distance(coordinate, centroid)
return farthest_coordinate
-def __mean_center__(coordinates):
+def __mean_center(coordinates):
"""
Takes a list of coordinates and returns the mean center of the coordinates
"""
return [sum([i[0] for i in coordinates]) / len(coordinates), sum([i[1] for i in coordinates]) / len(coordinates)]
-def __distance__(coordinate1, coordinate2):
+def __distance(coordinate1, coordinate2):
"""
Takes two coordinates and returns the distance between them
"""