In [1]:
import pandas as pd
import folium
import utils

In [2]:
# Load the data
ListA = pd.read_csv('List A.csv')
ListB = pd.read_csv('List B.csv')
ListC = pd.read_csv('List C.csv')
ListD = pd.read_csv('List D.csv')

In [3]:
# Create three centroids, one in the North End, one in the Financial District, and one in the Back Bay
centroids = [[42.364506, -71.054733], [42.358894, -71.056742], [42.3505, -71.0760]]

northeastern_coordinate = "-71.09033,42.33976"

In [4]:
# Combine the two lists and add a column to indicate the list
ListA['list'] = 'A'
ListB['list'] = 'B'
ListC['list'] = 'C'
ListD['list'] = 'D'

TotalList = pd.concat([ListA, ListB, ListC, ListD])

In [5]:
# Remove all columns but name and gps
TotalList = TotalList[['name', 'gps', 'list']]

In [6]:
# Convert the gps column to a list of lists for k-means
TotalList['gps'] = TotalList['gps'].apply(lambda x: x.strip('[]').split(','))
TotalList['gps'] = TotalList['gps'].apply(lambda x: [float(i) for i in x])

In [7]:
# Create a new column with normalized gps coordinates and centroids
TotalList['normalized_gps'], norm_centroids = utils.normalize_gps(TotalList['gps'].values.tolist(), centroids)
display(TotalList)

Unnamed: 0,name,gps,list,normalized_gps
0,521 Commercial Street #525,"[42.3688272, -71.0553792]",A,"[0.7251058917247415, 0.7797482353989729]"
1,Acorn St,"[42.3576234, -71.0688746]",A,"[0.6747391031099019, 0.7451825969538083]"
2,Arlington's Great Meadows,"[42.4299758, -71.2038948]",A,"[1.0, 0.3993566550776867]"
3,Arthur Fiedler Statue,"[42.3565057, -71.0754527]",A,"[0.6697144722136962, 0.7283341725828262]"
4,BU Beach,"[42.3511927, -71.1060828]",A,"[0.6458298305822171, 0.6498815915448888]"
...,...,...,...,...
33,The Quiet Few,"[42.3670906, -71.0359889]",D,"[0.717298990038831, 0.8294124246148072]"
34,The Tall Ship Boston,"[42.3649544, -71.0414523]",D,"[0.7076956827824702, 0.8154190706511427]"
35,Toasted Flats,"[42.3711266, -71.0371343]",D,"[0.7354428661210094, 0.8264787225922622]"
36,Vega Market,"[42.3891835, -71.033703]",D,"[0.8166178304491644, 0.8352672783369615]"


# 2 Routes

## Cluster and Minimize

In [8]:
# Cluster and minimize the data
norm_centroids_2 = norm_centroids[:2]
_, route_1_coordinates, route_2_coordinates = utils.cluster_and_minimize_2(TotalList, centroids, norm_centroids_2,
                                                                           northeastern_coordinate, 0.5, minimize=False)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


## Create JSON

In [9]:
# Create a JSON request for the API
# This is the data we want to get from the API
route_1 = utils.list_to_string(route_1_coordinates)
route_2 = utils.list_to_string(route_2_coordinates)

In [10]:
# Create a dataframe from the JSON
df1 = utils.create_json_df(route_1, utils.list_to_string([centroids[0]]), northeastern_coordinate)
df2 = utils.create_json_df(route_2, utils.list_to_string([centroids[1]]), northeastern_coordinate)

In [11]:
# Add columns for the route number
df1['route'] = 1
df2['route'] = 2

# Concatenate the two dataframes
df = pd.concat([df1, df2], ignore_index=True)

In [12]:
display(df)

Unnamed: 0,waypoint_index,trips_index,hint,distance,name,location,lat,lon,route
0,0,0,t4YsgAGHLIAAAAAAVQEAAAAAAAAwAAAAAAAAAHV0F0IAAA...,19.432511,,"[-71.054865, 42.364361]",-71.054865,42.364361,1
1,1,0,IzYEgGw1BIASAAAArwAAADMAAACUAwAAynkIQGUkmkEXlL...,6.024489,,"[-71.055569, 42.364032]",-71.055569,42.364032,1
2,2,0,5IgsgAqJLID7AAAAHgAAAAwAAAAIAAAAz5ffQcMBVEDFYK...,5.871835,,"[-71.055582, 42.365251]",-71.055582,42.365251,1
3,3,0,G4gsgDiILICSAwAA5gAAAOkAAAAAAAAAQljLQnyXy0Fhy8...,2.602121,,"[-71.056164, 42.366918]",-71.056164,42.366918,1
4,4,0,gIosgLaKLIDOAAAArgAAAFwBAAAAAAAAp3O3QafxmUEQiR...,15.458439,,"[-71.055561, 42.368861]",-71.055561,42.368861,1
...,...,...,...,...,...,...,...,...,...
168,84,0,7hAigPYQIoA2AgAAYwEAAAAAAAAAAAAAnsd7Qq9XHUIAAA...,7.478611,,"[-71.096959, 42.344689]",-71.096959,42.344689,2
169,85,0,bwwigH0MIoAFAAAAEAAAAFUAAAArAAAAag0xP3921D-BFx...,8.340476,,"[-71.095003, 42.342001]",-71.095003,42.342001,2
170,86,0,MQwigFwMIoAoAAAANQAAABwAAAB-AAAAoidqQSAYl0GvUh...,11.504463,,"[-71.094327, 42.341231]",-71.094327,42.341231,2
171,87,0,k4chgBiIIYAKAAAAFwAAAPQDAAB_AgAAHn2aP-biHUBi6e...,36.240351,,"[-71.093834, 42.339096]",-71.093834,42.339096,2


## Map

In [13]:
# Create a map
m = folium.Map(location=[df['lon'].mean(), df['lat'].mean()], zoom_start=11)

# Add the points and lines for the two routes with different colors
colors = ['red', 'blue']

for route in df['route'].unique():
    df_route = df[df['route'] == route]
    folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route - 1]).add_to(m)
    for i in range(len(df_route)):
        folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route - 1]).add_to(
            m)

# Display the map
m

## Results

In [14]:
# Get the number of waypoints for each route
route_1_waypoints = len(route_1_coordinates)
route_2_waypoints = len(route_2_coordinates)
print("Route 1 has {} waypoints".format(route_1_waypoints))
print("Route 2 has {} waypoints".format(route_2_waypoints))

Route 1 has 82 waypoints
Route 2 has 87 waypoints


In [15]:
trip_hrs_1 = utils.get_trip_time(route_1, route_1_waypoints, utils.list_to_string([centroids[0]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_1))
trip_hrs_2 = utils.get_trip_time(route_2, route_2_waypoints, utils.list_to_string([centroids[1]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_2))

The trip will take 11.154166666666667 hours
The trip will take 14.8425 hours


# 3 Routes

In [16]:
# Cluster and minimize the data
_, route_1_coordinates, route_2_coordinates, route_3_coordinates = utils.cluster_and_minimize_3(TotalList, centroids,
                                                                                               norm_centroids,
                                                                                               northeastern_coordinate,
                                                                                               0.2, minimize=False)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


## Create JSON

In [17]:
# Create a JSON request for the API
# This is the data we want to get from the API
route_1 = utils.list_to_string(route_1_coordinates)
route_2 = utils.list_to_string(route_2_coordinates)
route_3 = utils.list_to_string(route_3_coordinates)

In [18]:
# Create a dataframe from the JSON
df1 = utils.create_json_df(route_1, utils.list_to_string([centroids[0]]), northeastern_coordinate)
df2 = utils.create_json_df(route_2, utils.list_to_string([centroids[1]]), northeastern_coordinate)
df3 = utils.create_json_df(route_3, utils.list_to_string([centroids[2]]), northeastern_coordinate)

In [19]:
# Add columns for the route number
df1['route'] = 1
df2['route'] = 2
df3['route'] = 3

# Concatenate the three dataframes
df = pd.concat([df1, df2, df3], ignore_index=True)

In [20]:
display(df)

Unnamed: 0,waypoint_index,trips_index,hint,distance,name,location,lat,lon,route
0,0,0,t4YsgAGHLIAAAAAAVQEAAAAAAAAwAAAAAAAAAHV0F0IAAA...,19.432511,,"[-71.054865, 42.364361]",-71.054865,42.364361,1
1,1,0,e1kugJlZLoBmAAAA6QAAAAAAAAAAAAAAZ6M2QSewzkEAAA...,4.756158,,"[-71.060933, 42.376178]",-71.060933,42.376178,1
2,2,0,tFkugHVaLoAOAAAAAAAAABgAAAAAAAAAwMG2QAAAAAB6ii...,4.525535,,"[-71.060753, 42.376391]",-71.060753,42.376391,1
3,3,0,sJAugLOQLoBuAQAAlAEAAAAAAAAAAAAAHFcjQvEZM0IAAA...,7.844897,,"[-71.060948, 42.380436]",-71.060948,42.380436,1
4,4,0,VREtgNlJBIBCAAAAYAAAAAAAAAARAAAAOOzeQU7vHkIAAA...,22.681980,Factory Street,"[-71.061206, 42.398809]",-71.061206,42.398809,1
...,...,...,...,...,...,...,...,...,...
170,39,0,cX8hgJF_IYA1AAAAMAAAAGcAAABOAAAATyWxQQ77nUEHMC...,22.776295,Alleghany Street,"[-71.099348, 42.33047]",-71.099348,42.330470,3
171,40,0,s9QhgLbUIYAwAAAAkAAAAAAAAAAAAAAA2XmpQNgrgEEAAA...,4.111715,,"[-71.09454, 42.325354]",-71.094540,42.325354,3
172,41,0,5tYhgJHXIYAIAAAArQAAADwAAABCAQAAaRlbQD16mUGpAc...,17.374491,Dudley Street,"[-71.090904, 42.329829]",-71.090904,42.329829,3
173,42,0,k4chgBiIIYAKAAAAFwAAAPQDAAB_AgAAHn2aP-biHUBi6e...,36.240351,,"[-71.093834, 42.339096]",-71.093834,42.339096,3


## Map

In [21]:
# Create a map
m = folium.Map(location=[df['lon'].mean(), df['lat'].mean()], zoom_start=11)

# Add the points and lines for the three routes with different colors
colors = ['red', 'blue', 'green']

for route in df['route'].unique():
    df_route = df[df['route'] == route]
    folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route - 1]).add_to(m)
    for i in range(len(df_route)):
        folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route - 1]).add_to(
            m)
        
# Display the map
m

## Results

In [22]:
# Get the number of waypoints for each route
route_1_waypoints = len(route_1_coordinates)
route_2_waypoints = len(route_2_coordinates)
route_3_waypoints = len(route_3_coordinates)
print("Route 1 has {} waypoints".format(route_1_waypoints))
print("Route 2 has {} waypoints".format(route_2_waypoints))
print("Route 3 has {} waypoints".format(route_3_waypoints))

Route 1 has 55 waypoints
Route 2 has 72 waypoints
Route 3 has 42 waypoints


In [23]:
# Get the trip time for each route
trip_hrs_1 = utils.get_trip_time(route_1, route_1_waypoints, utils.list_to_string([centroids[0]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_1))
trip_hrs_2 = utils.get_trip_time(route_2, route_2_waypoints, utils.list_to_string([centroids[1]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_2))
trip_hrs_3 = utils.get_trip_time(route_3, route_3_waypoints, utils.list_to_string([centroids[2]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_3))

The trip will take 8.883333333333333 hours
The trip will take 8.458055555555555 hours
The trip will take 10.230555555555556 hours
