In [16]:
import pandas as pd
import folium
import utils

In [17]:
# Load the data
ListA = pd.read_csv('List A.csv')
ListB = pd.read_csv('List B.csv')
ListC = pd.read_csv('List C.csv')
ListD = pd.read_csv('List D.csv')

In [18]:
# Create two centroids, one in the North End and one in the Financial District
centroids = [[42.364506, -71.054733], [42.358894, -71.056742]]

northeastern_coordinate = "-71.09033,42.33976"

In [19]:
# Combine the two lists and add a column to indicate the list
ListA['list'] = 'A'
ListB['list'] = 'B'
ListC['list'] = 'C'
ListD['list'] = 'D'

TotalList = pd.concat([ListA, ListB, ListC])

In [20]:
# Remove all columns but name and gps
TotalList = TotalList[['name', 'gps', 'list']]

In [21]:
# Convert the gps column to a list of lists for k-means
TotalList['gps'] = TotalList['gps'].apply(lambda x: x.strip('[]').split(','))
TotalList['gps'] = TotalList['gps'].apply(lambda x: [float(i) for i in x])

In [22]:
# Create a new column with normalized gps coordinates and centroids
TotalList['normalized_gps'], norm_centroids = utils.normalize_gps(TotalList['gps'].values.tolist(), centroids)
display(TotalList)

Unnamed: 0,name,gps,list,normalized_gps
0,521 Commercial Street #525,"[42.3688272, -71.0553792]",A,"[0.7251058917247415, 0.8141430878559053]"
1,Acorn St,"[42.3576234, -71.0688746]",A,"[0.6747391031099019, 0.778052752104061]"
2,Arlington's Great Meadows,"[42.4299758, -71.2038948]",A,"[1.0, 0.41697235794883575]"
3,Arthur Fiedler Statue,"[42.3565057, -71.0754527]",A,"[0.6697144722136962, 0.7604611403245493]"
4,BU Beach,"[42.3511927, -71.1060828]",A,"[0.6458298305822171, 0.6785480000609988]"
...,...,...,...,...
28,The Clam Box,"[42.2763168, -71.0092883]",C,"[0.30922451563130937, 0.9374025730216268]"
29,The Partisans,"[42.3478375, -71.0404428]",C,"[0.6307464973238023, 0.8540870458656248]"
30,Union Oyster House,"[42.361288, -71.056908]",C,"[0.6912133469876947, 0.8100546647415456]"
31,Victoria's Diner,"[42.3270498, -71.0667744]",C,"[0.5372951958288665, 0.7836692527743693]"


# Cluster and Minimize

In [23]:
# Cluster and minimize the data
df, route_1_coordinates, route_2_coordinates = utils.cluster_and_minimize(TotalList, centroids, norm_centroids,
                                                                          northeastern_coordinate, 0.5)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


In [24]:
# Create a JSON request for the API
# This is the data we want to get from the API
route_1 = utils.list_to_string(route_1_coordinates)
route_2 = utils.list_to_string(route_2_coordinates)

In [25]:
# Create a dataframe from the JSON
df1 = utils.create_json_df(route_1, utils.list_to_string([centroids[0]]), northeastern_coordinate)
df2 = utils.create_json_df(route_2, utils.list_to_string([centroids[1]]), northeastern_coordinate)

In [26]:
# Add columns for the route number
df1['route'] = 1
df2['route'] = 2

# Concatenate the two dataframes
df = pd.concat([df1, df2], ignore_index=True)

In [27]:
display(df2)

Unnamed: 0,waypoint_index,trips_index,hint,distance,name,location,lat,lon,route
0,0,0,dMQAgDTDAIAuAAAAEgAAAAAAAAAAAAAAiaamQKk960AAAA...,1.113855,State Street,"[-71.056741, 42.358884]",-71.056741,42.358884,2
13,1,0,oLwsgCS9LIBHAAAA2AAAAAAAAABgAQAAkQwAQdo1v0EAAA...,2.532529,,"[-71.056995, 42.36049]",-71.056995,42.360490,2
68,2,0,CL0sgBS9LIAhAAAAagAAAAAAAAAAAAAAfoF0QPCwOkEAAA...,7.608103,,"[-71.056994, 42.361263]",-71.056994,42.361263,2
40,3,0,YbwsgEO9LIBbAAAAEgAAAAAAAAAPAAAA5ua1QcswjkAAAA...,0.468602,Creek Square,"[-71.056819, 42.361534]",-71.056819,42.361534,2
22,4,0,UkAEgFxABIB8AAAAAAAAAAAAAAAYAgAAVjBdQQAAAAAAAA...,6.397300,,"[-71.059255, 42.359295]",-71.059255,42.359295,2
...,...,...,...,...,...,...,...,...,...
14,67,0,-mUsgHZmLIATAAAAYgEAAL0AAADpAAAALf8HQHZ8HUK-9a...,55.355565,,"[-71.049204, 42.325624]",-71.049204,42.325624,2
11,68,0,43YhgPN2IYA1AAAAJAAAAAAAAAA5AAAAEha0QWgpbEEAAA...,18.896385,Lucy Street,"[-71.06221, 42.324934]",-71.062210,42.324934,2
69,69,0,CdQhgB0OA4AYAAAAHgAAADkAAAAAAAAALdMlQdSMQ0Fd0r...,10.970598,,"[-71.066844, 42.327134]",-71.066844,42.327134,2
19,70,0,XAAigHIAIoBKAAAASwAAAFUAAABDAQAARGUEQURlBEG2ZR...,11.054154,,"[-71.071196, 42.34085]",-71.071196,42.340850,2


In [28]:
# Create a map
m = folium.Map(location=[df['lon'].mean(), df['lat'].mean()], zoom_start=11)

# Add the points and lines for the two routes with different colors
colors = ['red', 'blue']

for route in df['route'].unique():
    df_route = df[df['route'] == route]
    folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route - 1]).add_to(m)
    for i in range(len(df_route)):
        folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route - 1]).add_to(
            m)

# Display the map
m

In [29]:
# Get the number of waypoints for each route
route_1_waypoints = len(route_1_coordinates)
route_2_waypoints = len(route_2_coordinates)
print("Route 1 has {} waypoints".format(route_1_waypoints))
print("Route 2 has {} waypoints".format(route_2_waypoints))

Route 1 has 61 waypoints
Route 2 has 70 waypoints


In [30]:
trip_hrs_1 = utils.get_trip_time(route_1, route_1_waypoints, utils.list_to_string([centroids[0]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_1))
trip_hrs_2 = utils.get_trip_time(route_2, route_2_waypoints, utils.list_to_string([centroids[1]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_2))

The trip will take 10.36111111111111 hours
The trip will take 10.586666666666666 hours
