In [1]:
import pandas as pd
import folium
import utils

In [2]:
# Load the data
ListA = pd.read_csv('List A.csv')
ListB = pd.read_csv('List B.csv')
ListC = pd.read_csv('List C.csv')
ListD = pd.read_csv('List D.csv')

In [3]:
# Create two centroids, one in the North End and one in the Seaport District
centroids = [[42.365, -71.054], [42.351, -71.045]]

northeastern_coordinate = "-71.09033,42.33976"

In [4]:
# Combine the two lists and add a column to indicate the list
ListA['list'] = 'A'
ListB['list'] = 'B'
ListC['list'] = 'C'
ListD['list'] = 'D'

TotalList = pd.concat([ListA, ListB, ListC, ListD])

In [5]:
# Remove all columns but name and gps
TotalList = TotalList[['name', 'gps', 'list']]

In [6]:
# Convert the gps column to a list of lists for k-means
TotalList['gps'] = TotalList['gps'].apply(lambda x: x.strip('[]').split(','))
TotalList['gps'] = TotalList['gps'].apply(lambda x: [float(i) for i in x])

In [7]:
display(TotalList)

Unnamed: 0,name,gps,list
0,521 Commercial Street #525,"[42.3688272, -71.0553792]",A
1,Acorn St,"[42.3576234, -71.0688746]",A
2,Arlington's Great Meadows,"[42.4299758, -71.2038948]",A
3,Arthur Fiedler Statue,"[42.3565057, -71.0754527]",A
4,BU Beach,"[42.3511927, -71.1060828]",A
...,...,...,...
33,The Quiet Few,"[42.3670906, -71.0359889]",D
34,The Tall Ship Boston,"[42.3649544, -71.0414523]",D
35,Toasted Flats,"[42.3711266, -71.0371343]",D
36,Vega Market,"[42.3891835, -71.033703]",D


# 2 Routes

## Cluster and Minimize

In [8]:
# Cluster and minimize the data
_, routes = utils.cluster_and_optimize(TotalList, centroids, northeastern_coordinate,
                                       time_diff=0.25, max_time=24)

route_1_coordinates = routes[0]
route_2_coordinates = routes[1]

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


## Create JSON

In [9]:
# Create a JSON request for the API
# This is the data we want to get from the API
route_1 = utils.list_to_string(route_1_coordinates)
route_2 = utils.list_to_string(route_2_coordinates)

In [10]:
# Create a dataframe from the JSON
df1 = utils.create_json_df(route_1, utils.list_to_string([centroids[0]]), northeastern_coordinate)
df2 = utils.create_json_df(route_2, utils.list_to_string([centroids[1]]), northeastern_coordinate)

In [11]:
# Add columns for the route number
df1['route'] = 1
df2['route'] = 2

# Concatenate the two dataframes
df = pd.concat([df1, df2], ignore_index=True)

In [12]:
display(df)

Unnamed: 0,waypoint_index,trips_index,hint,distance,name,location,lat,lon,route
0,0,0,1IwsgDuNLIBFAAAAWgEAAA8AAAAAAAAAFQP1QGa9GUI7qN...,8.262982,,"[-71.053931, 42.365054]",-71.053931,42.365054,1
1,1,0,G4gsgDiILICSAwAA5gAAAOkAAAAAAAAAQljLQnyXy0Fhy8...,2.602121,,"[-71.056164, 42.366918]",-71.056164,42.366918,1
2,2,0,gIosgLaKLIDOAAAArgAAAFwBAAAAAAAAp3O3QafxmUEQiR...,15.458439,,"[-71.055561, 42.368861]",-71.055561,42.368861,1
3,3,0,HpwsgCKcLIAAAAAAEgAAAAAAAAAAAAAAAAAAACg870AAAA...,39.201677,,"[-71.062507, 42.365968]",-71.062507,42.365968,1
4,4,0,qn8sgKt_LIAfAAAAAAAAAAAAAAAAAAAA2ElcQAAAAAAAAA...,39.331841,,"[-71.064277, 42.358851]",-71.064277,42.358851,1
...,...,...,...,...,...,...,...,...,...
168,61,0,7hAigPYQIoA2AgAAYwEAAAAAAAAAAAAAnsd7Qq9XHUIAAA...,7.478611,,"[-71.096959, 42.344689]",-71.096959,42.344689,2
169,62,0,bwwigH0MIoAFAAAAEAAAAFUAAAArAAAAag0xP3921D-BFx...,8.340476,,"[-71.095003, 42.342001]",-71.095003,42.342001,2
170,63,0,MQwigFwMIoAoAAAANQAAABwAAAB-AAAAoidqQSAYl0GvUh...,11.504463,,"[-71.094327, 42.341231]",-71.094327,42.341231,2
171,64,0,k4chgBiIIYAKAAAAFwAAAPQDAAB_AgAAHn2aP-biHUBi6e...,36.240351,,"[-71.093834, 42.339096]",-71.093834,42.339096,2


## Map

In [13]:
# Create a map
m = folium.Map(location=[df['lon'].mean(), df['lat'].mean()], zoom_start=11)

# Add the points and lines for the two routes with different colors
colors = ['red', 'blue']

for route in df['route'].unique():
    df_route = df[df['route'] == route]
    folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route - 1]).add_to(m)
    for i in range(len(df_route)):
        folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route - 1]).add_to(
            m)

# Display the map
m

## Results

In [14]:
# Get the number of waypoints for each route
route_1_waypoints = len(route_1_coordinates)
route_2_waypoints = len(route_2_coordinates)
print("Route 1 has {} waypoints".format(route_1_waypoints))
print("Route 2 has {} waypoints".format(route_2_waypoints))

Route 1 has 105 waypoints
Route 2 has 64 waypoints


In [15]:
trip_hrs_1 = utils.get_trip_time(route_1, route_1_waypoints, utils.list_to_string([centroids[0]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_1))
trip_hrs_2 = utils.get_trip_time(route_2, route_2_waypoints, utils.list_to_string([centroids[1]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_2))

The trip will take 13.1925 hours
The trip will take 13.017777777777777 hours


# 3 Routes

In [16]:
# Cluster and minimize the data
# Add a third centroid in the Financial District
centroids.append([42.356, -71.055])
_, routes = utils.cluster_and_optimize(TotalList, centroids, northeastern_coordinate, time_diff=0.3, max_time=24)

route_1_coordinates = routes[0]
route_2_coordinates = routes[1]
route_3_coordinates = routes[2]

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


## Create JSON

In [17]:
# Create a JSON request for the API
# This is the data we want to get from the API
route_1 = utils.list_to_string(route_1_coordinates)
route_2 = utils.list_to_string(route_2_coordinates)
route_3 = utils.list_to_string(route_3_coordinates)

In [18]:
# Create a dataframe from the JSON
df1 = utils.create_json_df(route_1, utils.list_to_string([centroids[0]]), northeastern_coordinate)
df2 = utils.create_json_df(route_2, utils.list_to_string([centroids[1]]), northeastern_coordinate)
df3 = utils.create_json_df(route_3, utils.list_to_string([centroids[2]]), northeastern_coordinate)

In [19]:
# Add columns for the route number
df1['route'] = 1
df2['route'] = 2
df3['route'] = 3

# Concatenate the three dataframes
df = pd.concat([df1, df2, df3], ignore_index=True)

In [20]:
display(df)

Unnamed: 0,waypoint_index,trips_index,hint,distance,name,location,lat,lon,route
0,0,0,1IwsgDuNLIBFAAAAWgEAAA8AAAAAAAAAFQP1QGa9GUI7qN...,8.262982,,"[-71.053931, 42.365054]",-71.053931,42.365054,1
1,1,0,G4gsgDiILICSAwAA5gAAAOkAAAAAAAAAQljLQnyXy0Fhy8...,2.602121,,"[-71.056164, 42.366918]",-71.056164,42.366918,1
2,2,0,gIosgLaKLIDOAAAArgAAAFwBAAAAAAAAp3O3QafxmUEQiR...,15.458439,,"[-71.055561, 42.368861]",-71.055561,42.368861,1
3,3,0,HpwsgCKcLIAAAAAAEgAAAAAAAAAAAAAAAAAAACg870AAAA...,39.201677,,"[-71.062507, 42.365968]",-71.062507,42.365968,1
4,4,0,LRUugHAVLoA1AAAA7wEAAKAAAADqAAAAYZa9QBEBXEIOWo...,1.865658,,"[-71.061735, 42.369195]",-71.061735,42.369195,1
...,...,...,...,...,...,...,...,...,...
170,49,0,7hAigPYQIoA2AgAAYwEAAAAAAAAAAAAAnsd7Qq9XHUIAAA...,7.478611,,"[-71.096959, 42.344689]",-71.096959,42.344689,3
171,50,0,bwwigH0MIoAFAAAAEAAAAFUAAAArAAAAag0xP3921D-BFx...,8.340476,,"[-71.095003, 42.342001]",-71.095003,42.342001,3
172,51,0,MQwigFwMIoAoAAAANQAAABwAAAB-AAAAoidqQSAYl0GvUh...,11.504463,,"[-71.094327, 42.341231]",-71.094327,42.341231,3
173,52,0,k4chgBiIIYAKAAAAFwAAAPQDAAB_AgAAHn2aP-biHUBi6e...,36.240351,,"[-71.093834, 42.339096]",-71.093834,42.339096,3


## Map

In [21]:
# Create a map
m = folium.Map(location=[df['lon'].mean(), df['lat'].mean()], zoom_start=11)

# Add the points and lines for the three routes with different colors
colors = ['red', 'blue', 'green']

for route in df['route'].unique():
    df_route = df[df['route'] == route]
    folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route - 1]).add_to(m)
    for i in range(len(df_route)):
        folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route - 1]).add_to(
            m)

# Display the map
m

## Results

In [22]:
# Get the number of waypoints for each route
route_1_waypoints = len(route_1_coordinates)
route_2_waypoints = len(route_2_coordinates)
route_3_waypoints = len(route_3_coordinates)
print("Route 1 has {} waypoints".format(route_1_waypoints))
print("Route 2 has {} waypoints".format(route_2_waypoints))
print("Route 3 has {} waypoints".format(route_3_waypoints))

Route 1 has 61 waypoints
Route 2 has 56 waypoints
Route 3 has 52 waypoints


In [23]:
# Get the trip time for each route
trip_hrs_1 = utils.get_trip_time(route_1, route_1_waypoints, utils.list_to_string([centroids[0]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_1))
trip_hrs_2 = utils.get_trip_time(route_2, route_2_waypoints, utils.list_to_string([centroids[1]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_2))
trip_hrs_3 = utils.get_trip_time(route_3, route_3_waypoints, utils.list_to_string([centroids[2]]),
                                 northeastern_coordinate)
print("The trip will take {} hours".format(trip_hrs_3))

The trip will take 9.394444444444444 hours
The trip will take 8.852777777777778 hours
The trip will take 9.325555555555555 hours


# 10 ROUTES (because I can)

In [24]:
# Cluster and minimize the data
# Add seven more centroids around Boston with different latitudes and longitudes
for i in range(7):
    centroids.append([42.365 + i * 0.01, -71.054 + i * 0.01])

_, routes = utils.cluster_and_optimize(TotalList, centroids, northeastern_coordinate, time_diff=0.5, max_time=24)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


## Create JSON

In [25]:
# Create a JSON request for the API
# This is the data we want to get from the API
route_strings = []
for route in routes:
    route_strings.append(utils.list_to_string(route))

In [26]:
# Create a dataframe from the JSON
dfs = []
for i in range(len(routes)):
    dfs.append(utils.create_json_df(route_strings[i], utils.list_to_string([centroids[i]]), northeastern_coordinate))
    
# Concatenate the dataframes
df = pd.concat(dfs, ignore_index=True)

In [30]:
# Add columns for the route number
for i in range(len(routes)):
    df['route'].iloc[i * len(routes[i]):(i + 1) * len(routes[i])] = i + 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['route'].iloc[i * len(routes[i]):(i + 1) * len(routes[i])] = i + 1


In [31]:
# Display the dataframe
display(df)

Unnamed: 0,waypoint_index,trips_index,hint,distance,name,location,lat,lon,route
0,0,0,1IwsgDuNLIBFAAAAWgEAAA8AAAAAAAAAFQP1QGa9GUI7qN...,8.262982,,"[-71.053931, 42.365054]",-71.053931,42.365054,1
1,1,0,LRUugHAVLoA1AAAA7wEAAKAAAADqAAAAYZa9QBEBXEIOWo...,1.865658,,"[-71.061735, 42.369195]",-71.061735,42.369195,1
2,2,0,lM4AgM3LAIAEAAAAHAAAAJEAAAC_AgAAyLv6PxJ7NEGyPn...,2.242639,Miller's River Littoral Way,"[-71.065634, 42.371832]",-71.065634,42.371832,1
3,3,0,ZQ0fgPINH4AgAAAAEQAAAFEAAAAqAAAArYRYQRHu20BfWQ...,48.627645,,"[-71.06828, 42.369868]",-71.068280,42.369868,4
4,4,0,HR8ugIJiBICVAQAARwAAAAAAAACLAAAAQ1M0Qu3l-EAAAA...,0.645763,,"[-71.094764, 42.377355]",-71.094764,42.377355,1
...,...,...,...,...,...,...,...,...,...
184,11,0,-2EugABiLoCcAQAAigAAAAAAAAAAAAAAMQI3QqZ0dUEAAA...,7.363621,,"[-71.102659, 42.382131]",-71.102659,42.382131,10
185,12,0,VSIfgAYjH4AUAAAAAAAAACUBAADDAAAAaIcPQAAAAADYBw...,18.888832,,"[-71.110851, 42.374259]",-71.110851,42.374259,6
186,13,0,0OEhgPvhIYADAAAABgAAAA8AAAA0AAAA2lq-PipQFD-Y-N...,2.009578,,"[-71.085166, 42.349997]",-71.085166,42.349997,6
187,14,0,C-AhgGbgIYBZAAAAMQAAAAAAAABqAAAAj5QfQS1zq0AAAA...,4.887502,,"[-71.091358, 42.348977]",-71.091358,42.348977,6


## Map

In [37]:
# Create a map
m = folium.Map(location=[df['lon'].mean(), df['lat'].mean()], zoom_start=11)

# Add the points and lines for the three routes with different colors
colors = ['red', 'blue', 'green', 'orange', 'purple', 'pink', 'black', 'gray', 'brown', 'yellow']

for route in df['route'].unique():
    df_route = df[df['route'] == route]
    folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route - 1]).add_to(m)
    for i in range(len(df_route)):
        folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route - 1]).add_to(
            m)
        
# Display the map
m

## Results

In [36]:
# Get the number of waypoints for each route
route_waypoints = []
for route in routes:
    route_waypoints.append(len(route))
for i in range(len(route_waypoints)):
    print("Route {} has {} waypoints".format(i + 1, route_waypoints[i]))

Route 1 has 20 waypoints
Route 2 has 10 waypoints
Route 3 has 28 waypoints
Route 4 has 1 waypoints
Route 5 has 15 waypoints
Route 6 has 37 waypoints
Route 7 has 18 waypoints
Route 8 has 16 waypoints
Route 9 has 10 waypoints
Route 10 has 14 waypoints


In [34]:
# Get the trip time for each route
trip_hrs = []
for i in range(len(routes)):
    trip_hrs.append(utils.get_trip_time(route_strings[i], route_waypoints[i], utils.list_to_string([centroids[i]]),
                                        northeastern_coordinate))
for i in range(len(trip_hrs)):
    print("The trip will take {} hours".format(trip_hrs[i]))

The trip will take 3.1816666666666666 hours
The trip will take 3.8113888888888887 hours
The trip will take 3.9852777777777777 hours
The trip will take 3.8975 hours
The trip will take 4.088611111111111 hours
The trip will take 4.039444444444444 hours
The trip will take 3.17 hours
The trip will take 3.209722222222222 hours
The trip will take 4.1275 hours
The trip will take 3.2069444444444444 hours
