summaryrefslogtreecommitdiff
path: root/utils.py
blob: e0cc295404940a0a238c92b289096a0a3063cc33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import folium
import pandas as pd
import requests
from sklearn.cluster import KMeans


# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes
def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff):
    # Cluster the coordinates
    kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids)

    # Fit the coordinates to the clusters
    kmeans.fit(df['normalized_gps'].values.tolist())

    # Add the cluster labels to the dataframe
    df['cluster'] = kmeans.labels_

    # Create centroid strings
    centroid_1 = list_to_string([centroids[0]])
    centroid_2 = list_to_string([centroids[1]])

    # Return the list of locations in each cluster
    route_1 = df[df['cluster'] == 0]
    route_1_stops = len(route_1['gps'].values.tolist())
    route_1_str = list_to_string(route_1['gps'].values.tolist())

    route_2 = df[df['cluster'] == 1]
    route_2_stops = len(route_2['gps'].values.tolist())
    route_2_str = list_to_string(route_2['gps'].values.tolist())

    # Get the trip time for each route
    trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end)
    trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end)

    # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference
    if abs(trip_hrs_1 - trip_hrs_2) > time_diff:
        route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(),
                                                                            route_2['gps'].values.tolist(),
                                                                            centroid_1, centroid_2, end, time_diff)
    else:
        route_1_coordinates = route_1['gps'].values.tolist()
        route_2_coordinates = route_2['gps'].values.tolist()

    # Edit the dataframe to reflect the new coordinate clusters
    df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0
    df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1

    return df, route_1_coordinates, route_2_coordinates


def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end,
                             time_diff):
    """
    Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference
    """
    # Find the difference in time between the two routes
    route_1_time = get_trip_time(list_to_string(route_1_coordinates),
                                 len(route_1_coordinates), route_1_start, end)
    route_2_time = get_trip_time(list_to_string(route_2_coordinates),
                                 len(route_2_coordinates), route_2_start, end)
    route_time_diff = abs(route_1_time - route_2_time)

    # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route
    if route_time_diff > time_diff:
        # Find which route is longer
        if len(route_1_coordinates) > len(route_2_coordinates):
            longer_route = route_1_coordinates
            shorter_route = route_2_coordinates

            # Move the closest coordinate from the longer route to the shorter route
            closest_coordinate = move_coordinate(longer_route, shorter_route)
            longer_route.remove(closest_coordinate)
            shorter_route.append(closest_coordinate)

            # Recursively call the function
            return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end, time_diff)

        else:
            longer_route = route_2_coordinates
            shorter_route = route_1_coordinates

            # Move the closest coordinate from the longer route to the shorter route
            closest_coordinate = move_coordinate(longer_route, shorter_route)
            longer_route.remove(closest_coordinate)
            shorter_route.append(closest_coordinate)

            # Recursively call the function
            return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end, time_diff)

    # If the difference in time is less than the time difference, return the routes
    return route_1_coordinates, route_2_coordinates


def list_to_string(list_of_lists):
    """
    Takes a list of lists of coordinates and returns a string of the coordinates
    """
    string = ''
    for i in list_of_lists:
        string += str(i[1]) + ',' + str(i[0]) + ';'

    return string


def create_json_df(coordinate_string, start, end):
    coordinates = requests.get(
        'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last')
    coordinates = coordinates.json()

    # Create a dataframe from the JSON
    df = pd.DataFrame(coordinates['waypoints'])

    # Separate the location column into lon and lat columns
    df['lat'] = df['location'].apply(lambda x: x[0])
    df['lon'] = df['location'].apply(lambda x: x[1])

    df['waypoint_index'] = df['waypoint_index'].astype(int)

    # Map out the waypoints in order of the waypoint index
    df = df.sort_values(by=['waypoint_index'])

    return df


def get_trip_time(coordinate_string, num_waypoints, start, end):
    """
    Takes a list of lists of coordinates and returns the time of the trip in hours
    """
    coordinates = requests.get(
        'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last')
    coordinates = coordinates.json()

    travel_time_seconds = int(coordinates['trips'][0]['duration'])
    waypoint_time_seconds = num_waypoints * 60

    total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600

    return total_time_hours


def normalize_gps(coordinates, centroids):
    """
    Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids
    """

    # Create a list of latitudes and longitudes
    latitudes = [i[0] for i in coordinates]
    longitudes = [i[1] for i in coordinates]

    # Find the minimum and maximum latitudes and longitudes
    min_lat = min(latitudes)
    max_lat = max(latitudes)
    min_lon = min(longitudes)
    max_lon = max(longitudes)

    # Normalize the coordinates and centroids using min-max normalization
    normalized_coordinates = []
    normalized_centroids = []

    for i in coordinates:
        normalized_coordinates.append(
            [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])
    for i in centroids:
        normalized_centroids.append(
            [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)])

    return normalized_coordinates, normalized_centroids


def __min_max_normalize__(value, min_value, max_value):
    """
    Takes a value, min value, and max value and returns the normalized value
    """
    return (value - min_value) / (max_value - min_value)


# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid
def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates):
    # Calculate the centroid of the smaller cluster
    smaller_centroid = [sum([i[0] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates),
                        sum([i[1] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates)]

    # Find the coordinate in larger_centroid_coordinates that is closest to smaller_centroid
    closest_coordinate = larger_centroid_coordinates[0]
    closest_coordinate_distance = __distance__(closest_coordinate, smaller_centroid)

    for coordinate in larger_centroid_coordinates:
        if __distance__(coordinate, smaller_centroid) < closest_coordinate_distance:
            closest_coordinate = coordinate
            closest_coordinate_distance = __distance__(coordinate, smaller_centroid)

    return closest_coordinate


def __distance__(coordinate1, coordinate2):
    """
    Takes two coordinates and returns the distance between them
    """
    return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2) ** 0.5