1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
|
import folium
import pandas as pd
import requests
from sklearn.cluster import KMeans
# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes
def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff):
# Cluster the coordinates
kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)
# Fit the coordinates to the clusters
kmeans.fit(df['normalized_gps'].values.tolist())
# Add the cluster labels to the dataframe
df['cluster'] = kmeans.labels_
# Create centroid strings
centroid_1 = list_to_string([centroids[0]])
centroid_2 = list_to_string([centroids[1]])
# Return the list of locations in each cluster
route_1 = df[df['cluster'] == 0]
route_1_stops = len(route_1['gps'].values.tolist())
route_1_str = list_to_string(route_1['gps'].values.tolist())
route_2 = df[df['cluster'] == 1]
route_2_stops = len(route_2['gps'].values.tolist())
route_2_str = list_to_string(route_2['gps'].values.tolist())
# Get the trip time for each route
trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end)
trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end)
# if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
if abs(trip_hrs_1 - trip_hrs_2) > time_diff:
route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
route_2['gps'].values.tolist(),
centroid_1, centroid_2, end, time_diff)
else:
route_1_coordinates = route_1['gps'].values.tolist()
route_2_coordinates = route_2['gps'].values.tolist()
# Edit the dataframe to reflect the new coordinate clusters
df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1
return df, route_1_coordinates, route_2_coordinates
def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end,
time_diff):
"""
Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference
"""
# Find the difference in time between the two routes
route_1_time = get_trip_time(list_to_string(route_1_coordinates),
len(route_1_coordinates), route_1_start, end)
route_2_time = get_trip_time(list_to_string(route_2_coordinates),
len(route_2_coordinates), route_2_start, end)
route_time_diff = abs(route_1_time - route_2_time)
# If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route
if route_time_diff > time_diff:
# Find which route is longer
if len(route_1_coordinates) > len(route_2_coordinates):
longer_route = route_1_coordinates
shorter_route = route_2_coordinates
# Move the closest coordinate from the longer route to the shorter route
closest_coordinate = move_coordinate(longer_route, shorter_route)
longer_route.remove(closest_coordinate)
shorter_route.append(closest_coordinate)
# Recursively call the function
return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end, time_diff)
else:
longer_route = route_2_coordinates
shorter_route = route_1_coordinates
# Move the closest coordinate from the longer route to the shorter route
closest_coordinate = move_coordinate(longer_route, shorter_route)
longer_route.remove(closest_coordinate)
shorter_route.append(closest_coordinate)
# Recursively call the function
return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end, time_diff)
# If the difference in time is less than the time difference, return the routes
return route_1_coordinates, route_2_coordinates
def list_to_string(list_of_lists):
"""
Takes a list of lists of coordinates and returns a string of the coordinates
"""
string = ''
for i in list_of_lists:
string += str(i[1]) + ',' + str(i[0]) + ';'
return string
def create_json_df(coordinate_string, start, end):
coordinates = requests.get(
'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last')
coordinates = coordinates.json()
# Create a dataframe from the JSON
df = pd.DataFrame(coordinates['waypoints'])
# Separate the location column into lon and lat columns
df['lat'] = df['location'].apply(lambda x: x[0])
df['lon'] = df['location'].apply(lambda x: x[1])
df['waypoint_index'] = df['waypoint_index'].astype(int)
# Map out the waypoints in order of the waypoint index
df = df.sort_values(by=['waypoint_index'])
return df
def get_trip_time(coordinate_string, num_waypoints, start, end):
"""
Takes a list of lists of coordinates and returns the time of the trip in hours
"""
coordinates = requests.get(
'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last')
coordinates = coordinates.json()
travel_time_seconds = int(coordinates['trips'][0]['duration'])
waypoint_time_seconds = num_waypoints * 60
total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600
return total_time_hours
def normalize_gps(coordinates, centroids):
"""
Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids
"""
# Create a list of latitudes and longitudes
latitudes = [i[0] for i in coordinates]
longitudes = [i[1] for i in coordinates]
# Find the minimum and maximum latitudes and longitudes
min_lat = min(latitudes)
max_lat = max(latitudes)
min_lon = min(longitudes)
max_lon = max(longitudes)
# Normalize the coordinates and centroids using min-max normalization
normalized_coordinates = []
normalized_centroids = []
for i in coordinates:
normalized_coordinates.append(
[__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])
for i in centroids:
normalized_centroids.append(
[__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])
return normalized_coordinates, normalized_centroids
def __min_max_normalize__(value, min_value, max_value):
"""
Takes a value, min value, and max value and returns the normalized value
"""
return (value - min_value) / (max_value - min_value)
# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid
def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates):
# Calculate the centroid of the smaller cluster
smaller_centroid = [sum([i[0] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates),
sum([i[1] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates)]
# Find the coordinate in larger_centroid_coordinates that is closest to smaller_centroid
closest_coordinate = larger_centroid_coordinates[0]
closest_coordinate_distance = __distance__(closest_coordinate, smaller_centroid)
for coordinate in larger_centroid_coordinates:
if __distance__(coordinate, smaller_centroid) < closest_coordinate_distance:
closest_coordinate = coordinate
closest_coordinate_distance = __distance__(coordinate, smaller_centroid)
return closest_coordinate
def __distance__(coordinate1, coordinate2):
"""
Takes two coordinates and returns the distance between them
"""
return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2) ** 0.5
|