From 921a49433ccb34f2481f5f88de59f596976193cb Mon Sep 17 00:00:00 2001 From: itsGarrin Date: Mon, 6 Nov 2023 19:08:49 -0500 Subject: Refer to ZestySalesman.ipynb for the latest route --- Clustering2.0.ipynb | 58 +++++----- ZestySalesman.ipynb | 296 ++++++++++++++++++++++++++++++++++++++-------------- utils.py | 109 +++++++++---------- 3 files changed, 302 insertions(+), 161 deletions(-) diff --git a/Clustering2.0.ipynb b/Clustering2.0.ipynb index 5ff2d63..e312a2b 100644 --- a/Clustering2.0.ipynb +++ b/Clustering2.0.ipynb @@ -7,15 +7,14 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.475082Z", - "start_time": "2023-11-06T18:51:21.667023Z" + "end_time": "2023-11-06T23:41:01.990393Z", + "start_time": "2023-11-06T23:41:01.040992Z" } }, "outputs": [], "source": [ "import folium\n", "import pandas as pd\n", - "from sklearn.cluster import KMeans\n", "import utils" ] }, @@ -33,8 +32,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.495242Z", - "start_time": "2023-11-06T18:51:22.473334Z" + "end_time": "2023-11-06T23:41:02.000278Z", + "start_time": "2023-11-06T23:41:01.991024Z" } }, "id": "bb6f57eef695cf76" @@ -47,13 +46,13 @@ "# Create two centroids, one in the North End and one in the Financial District\n", "centroids = [[42.364506, -71.054733], [42.358894, -71.056742]]\n", "\n", - "northeastern_coordinate = \"-71.09033,42.33976;\"" + "northeastern_coordinate = \"-71.09033,42.33976\"" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.495492Z", - "start_time": "2023-11-06T18:51:22.483246Z" + "end_time": "2023-11-06T23:41:02.003984Z", + "start_time": "2023-11-06T23:41:02.000633Z" } }, "id": "fe8a5b9bc06cf2e0" @@ -84,8 +83,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.496051Z", - "start_time": "2023-11-06T18:51:22.487941Z" + "end_time": "2023-11-06T23:41:02.012929Z", + "start_time": "2023-11-06T23:41:02.005863Z" } }, "id": "dc434958d5e4a3a8" @@ -101,8 +100,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.504898Z", - "start_time": "2023-11-06T18:51:22.496235Z" + "end_time": "2023-11-06T23:41:02.016943Z", + "start_time": "2023-11-06T23:41:02.013583Z" } }, "id": "2873c16423fe3119" @@ -119,8 +118,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.522522Z", - "start_time": "2023-11-06T18:51:22.498651Z" + "end_time": "2023-11-06T23:41:02.021308Z", + "start_time": "2023-11-06T23:41:02.016226Z" } }, "id": "29f9155ef8d75fda" @@ -136,8 +135,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.548654Z", - "start_time": "2023-11-06T18:51:22.503769Z" + "end_time": "2023-11-06T23:41:02.021459Z", + "start_time": "2023-11-06T23:41:02.018449Z" } }, "id": "5b985f1a6df84a6c" @@ -161,8 +160,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:22.609058Z", - "start_time": "2023-11-06T18:51:22.509542Z" + "end_time": "2023-11-06T23:41:02.028731Z", + "start_time": "2023-11-06T23:41:02.023494Z" } }, "id": "a03a7c5dacebddd0" @@ -194,13 +193,14 @@ ], "source": [ "# Cluster and minimize the data\n", - "df, route_1_coordinates, route_2_coordinates = utils.cluster_and_minimize(TotalList, centroids, norm_centroids, 0.5)" + "df, route_1_coordinates, route_2_coordinates = utils.cluster_and_minimize(TotalList, centroids, norm_centroids,\n", + " northeastern_coordinate, 0.5)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:45.784650Z", - "start_time": "2023-11-06T18:51:22.513160Z" + "end_time": "2023-11-06T23:41:28.583623Z", + "start_time": "2023-11-06T23:41:02.026243Z" } }, "id": "a1a3e446594e8c20" @@ -217,14 +217,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "outputs": [ { "data": { - "text/plain": "", - "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -268,8 +268,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:45.869346Z", - "start_time": "2023-11-06T18:51:45.791672Z" + "end_time": "2023-11-06T23:41:34.488607Z", + "start_time": "2023-11-06T23:41:34.368207Z" } }, "id": "de9c2f7b892b1bee" @@ -282,8 +282,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T18:51:45.869482Z", - "start_time": "2023-11-06T18:51:45.865159Z" + "end_time": "2023-11-06T23:41:28.671570Z", + "start_time": "2023-11-06T23:41:28.667750Z" } }, "id": "b50ee3d4d6e09be9" diff --git a/ZestySalesman.ipynb b/ZestySalesman.ipynb index 7e74f46..f39f5bc 100644 --- a/ZestySalesman.ipynb +++ b/ZestySalesman.ipynb @@ -2,65 +2,224 @@ "cells": [ { "cell_type": "code", - "execution_count": 10, + "execution_count": 16, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2023-11-06T01:23:16.767323Z", - "start_time": "2023-11-06T01:23:16.761053Z" + "end_time": "2023-11-07T00:07:54.400654Z", + "start_time": "2023-11-07T00:07:54.375821Z" } }, "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np\n", - "import requests\n", "import folium\n", "import utils" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, + "outputs": [], + "source": [ + "# Load the data\n", + "ListA = pd.read_csv('List A.csv')\n", + "ListB = pd.read_csv('List B.csv')\n", + "ListC = pd.read_csv('List C.csv')\n", + "ListD = pd.read_csv('List D.csv')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-07T00:07:54.430515Z", + "start_time": "2023-11-07T00:07:54.381537Z" + } + }, + "id": "73b780e762c9de37" + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [], + "source": [ + "# Create two centroids, one in the North End and one in the Financial District\n", + "centroids = [[42.364506, -71.054733], [42.358894, -71.056742]]\n", + "\n", + "northeastern_coordinate = \"-71.09033,42.33976\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-07T00:07:54.431407Z", + "start_time": "2023-11-07T00:07:54.392677Z" + } + }, + "id": "65e208650eb43b4" + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [], + "source": [ + "# Combine the two lists and add a column to indicate the list\n", + "ListA['list'] = 'A'\n", + "ListB['list'] = 'B'\n", + "ListC['list'] = 'C'\n", + "ListD['list'] = 'D'\n", + "\n", + "TotalList = pd.concat([ListA, ListB, ListC])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-07T00:07:54.431829Z", + "start_time": "2023-11-07T00:07:54.397279Z" + } + }, + "id": "ffe4025e97a6c6b9" + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [], + "source": [ + "# Remove all columns but name and gps\n", + "TotalList = TotalList[['name', 'gps', 'list']]" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-07T00:07:54.432180Z", + "start_time": "2023-11-07T00:07:54.401907Z" + } + }, + "id": "72657779b4484aae" + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [], + "source": [ + "# Convert the gps column to a list of lists for k-means\n", + "TotalList['gps'] = TotalList['gps'].apply(lambda x: x.strip('[]').split(','))\n", + "TotalList['gps'] = TotalList['gps'].apply(lambda x: [float(i) for i in x])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-07T00:07:54.432238Z", + "start_time": "2023-11-07T00:07:54.405216Z" + } + }, + "id": "a157ffaec020a29a" + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [ + { + "data": { + "text/plain": " name gps list \\\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A \n1 Acorn St [42.3576234, -71.0688746] A \n2 Arlington's Great Meadows [42.4299758, -71.2038948] A \n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A \n4 BU Beach [42.3511927, -71.1060828] A \n.. ... ... ... \n28 The Clam Box [42.2763168, -71.0092883] C \n29 The Partisans [42.3478375, -71.0404428] C \n30 Union Oyster House [42.361288, -71.056908] C \n31 Victoria's Diner [42.3270498, -71.0667744] C \n32 Wollaston Beach [42.2806539, -71.0119933] C \n\n normalized_gps \n0 [0.7251058917247415, 0.8141430878559053] \n1 [0.6747391031099019, 0.778052752104061] \n2 [1.0, 0.41697235794883575] \n3 [0.6697144722136962, 0.7604611403245493] \n4 [0.6458298305822171, 0.6785480000609988] \n.. ... \n28 [0.30922451563130937, 0.9374025730216268] \n29 [0.6307464973238023, 0.8540870458656248] \n30 [0.6912133469876947, 0.8100546647415456] \n31 [0.5372951958288665, 0.7836692527743693] \n32 [0.32872198960456106, 0.9301686741961767] \n\n[131 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistnormalized_gps
0521 Commercial Street #525[42.3688272, -71.0553792]A[0.7251058917247415, 0.8141430878559053]
1Acorn St[42.3576234, -71.0688746]A[0.6747391031099019, 0.778052752104061]
2Arlington's Great Meadows[42.4299758, -71.2038948]A[1.0, 0.41697235794883575]
3Arthur Fiedler Statue[42.3565057, -71.0754527]A[0.6697144722136962, 0.7604611403245493]
4BU Beach[42.3511927, -71.1060828]A[0.6458298305822171, 0.6785480000609988]
...............
28The Clam Box[42.2763168, -71.0092883]C[0.30922451563130937, 0.9374025730216268]
29The Partisans[42.3478375, -71.0404428]C[0.6307464973238023, 0.8540870458656248]
30Union Oyster House[42.361288, -71.056908]C[0.6912133469876947, 0.8100546647415456]
31Victoria's Diner[42.3270498, -71.0667744]C[0.5372951958288665, 0.7836692527743693]
32Wollaston Beach[42.2806539, -71.0119933]C[0.32872198960456106, 0.9301686741961767]
\n

131 rows × 4 columns

\n
" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create a new column with normalized gps coordinates and centroids\n", + "TotalList['normalized_gps'], norm_centroids = utils.normalize_gps(TotalList['gps'].values.tolist(), centroids)\n", + "display(TotalList)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-07T00:07:54.432731Z", + "start_time": "2023-11-07T00:07:54.412279Z" + } + }, + "id": "a03ebde91b87fa3b" + }, + { + "cell_type": "markdown", + "source": [ + "# Cluster and Minimize" + ], + "metadata": { + "collapsed": false + }, + "id": "4bd41be9aca5094b" + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/garrinshieh/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", + " super()._check_params_vs_input(X, default_n_init=10)\n", + "/Users/garrinshieh/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: RuntimeWarning: Explicit initial center position passed: performing only one init in KMeans instead of n_init=10.\n", + " super()._check_params_vs_input(X, default_n_init=10)\n" + ] + } + ], + "source": [ + "# Cluster and minimize the data\n", + "df, route_1_coordinates, route_2_coordinates = utils.cluster_and_minimize(TotalList, centroids, norm_centroids,\n", + " northeastern_coordinate, 0.5)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-07T00:08:20.577006Z", + "start_time": "2023-11-07T00:07:54.416349Z" + } + }, + "id": "ee9b3c1ecb360976" + }, + { + "cell_type": "code", + "execution_count": 24, "outputs": [], "source": [ "# Create a JSON request for the API\n", "# This is the data we want to get from the API\n", - "northeastern_coordinate = \"-71.09033,42.33976;\"\n", - "route_1 = '-71.2038948,42.4299758;-71.1060828,42.3511927;-71.0969274,42.3446263;-71.130887,42.35304;-71.1459593,42.3501823;-71.1460435,42.3495825;-71.1217152,42.3426377;-71.1258765,42.331864;-71.1095021,42.3364675;-71.133103,42.3890049;-71.1123834,42.3360385;-71.2273649,42.3145041;-71.0992038,42.3306454;-71.0990577,42.3381442;-71.0949218,42.3419564;-71.0942861,42.3413301;-71.0913583,42.3490205;-71.1000217,42.3323776;-71.1241295,42.3518397;-71.1618052,42.3245965;-71.1625829,42.340795;-71.167854,42.4107892;-71.155555,42.3317473;-71.1227278,42.3965778;-71.1126695,42.3836229;-71.119149,42.3884;-71.1427371,42.3433772;-71.1438455,42.3569102;-71.1313443,42.3525708;-71.1284677,42.3631904;-71.119301,42.388547;-71.097883,42.381008;-71.1107166,42.3741209;-71.1194344,42.3754427;-71.1013044,42.3627462;-71.1108423,42.3838224;-71.1026937,42.3820702;-71.1189467,42.373465;-71.1208817,42.3732344;-71.0968274,42.3799095;-71.094048,42.339381;-71.1854722,42.3621177;-71.1146697,42.3782386;-71.0935443,42.3817274;-71.0906355,42.3616095;-71.1161887,42.3766442;-71.0962734,42.3627993;-71.1155576,42.3784629;-71.0949101,42.3797674;-71.1087411,42.3640287;-71.09476,42.37736;-71.1014951,42.3614115;-71.1024769,42.3822934;-71.1011111,42.3636597;-71.0898829,42.3463992;-71.0983169,42.3319001'\n", - "route_2 = '-71.0553792,42.3688272;-71.0688746,42.3576234;-71.0754527,42.3565057;-71.0620802,42.3579151;-71.0586014,42.357357;-71.0572023,42.3587627;-71.0556268,42.36521;-71.0720926,42.3489004;-71.067859,42.3500079;-71.0632036,42.3556154;-71.0620134,42.3248471;-71.0851891,42.3500031;-71.066414,42.354296;-71.0834061,42.341987;-71.0569649,42.3604952;-71.0498714,42.3256817;-71.0908104,42.329969;-71.0616035,42.3537983;-71.0359433,42.3485465;-71.0638101,42.3587772;-71.0555003,42.3640137;-71.0712561,42.3407613;-71.0561781,42.3668968;-71.0664019,42.3554589;-71.059228,42.359349;-71.0668408,42.3524116;-71.0872846,42.2961434;-71.062146,42.366198;-71.0651214,42.3553972;-71.0596124,42.3509517;-71.0359354,42.3478381;-71.061757,42.3691906;-71.0609962,42.3803747;-71.0516339,42.3609921;-71.0809932,42.3675275;-71.0545357,42.3597994;-71.0342146,42.316274;-71.0756902,42.3695046;-71.0678704,42.3701829;-71.0656594,42.3718401;-71.0611749,42.3551807;-71.0554239,42.3739796;-71.0631664,42.3741694;-71.056823,42.361531;-71.0632852,42.2857047;-71.0637877,42.2845163;-71.0496839,42.3519736;-71.0454645,42.3162356;-71.0336324,42.3441918;-71.0487437,42.3508756;-71.0512911,42.3521821;-71.0013637,42.2075316;-71.0607764,42.3763541;-71.0374911,42.316031;-71.0125206,42.3378699;-71.0672898,42.3523158;-71.02832,42.2576602;-71.0502126,42.3516479;-71.0331956,42.3639107;-71.0432778,42.3528151;-71.0035279,42.2392354;-71.0470633,42.3537343;-71.0352443,42.3291218;-71.0240951,42.2743442;-71.0234949,42.3358743;-70.985881,42.420226;-71.0005483,42.2454086;-71.0096371,42.3367603;-71.0447796,42.3509709;-71.0092883,42.2763168;-71.0404428,42.3478375;-71.056908,42.361288;-71.0667744,42.3270498;-71.0119933,42.2806539'" + "route_1 = utils.list_to_string(route_1_coordinates)\n", + "route_2 = utils.list_to_string(route_2_coordinates)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:16.774149Z", - "start_time": "2023-11-06T01:23:16.765417Z" + "end_time": "2023-11-07T00:08:20.591584Z", + "start_time": "2023-11-07T00:08:20.577492Z" } }, "id": "aa618161182b5b07" }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 25, "outputs": [], "source": [ "# Create a dataframe from the JSON\n", - "df1 = utils.create_json_df(northeastern_coordinate + route_1)\n", - "df2 = utils.create_json_df(northeastern_coordinate + route_2)" + "df1 = utils.create_json_df(route_1, utils.list_to_string([centroids[0]]), northeastern_coordinate)\n", + "df2 = utils.create_json_df(route_2, utils.list_to_string([centroids[1]]), northeastern_coordinate)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:18.694403Z", - "start_time": "2023-11-06T01:23:16.768656Z" + "end_time": "2023-11-07T00:08:22.409355Z", + "start_time": "2023-11-07T00:08:20.579890Z" } }, "id": "32c485788eedd94" }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 26, "outputs": [], "source": [ "# Add columns for the route number\n", @@ -73,20 +232,20 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:18.698699Z", - "start_time": "2023-11-06T01:23:18.696008Z" + "end_time": "2023-11-07T00:08:22.425179Z", + "start_time": "2023-11-07T00:08:22.412707Z" } }, "id": "49dba1f17ca8337e" }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 27, "outputs": [ { "data": { - "text/plain": " waypoint_index trips_index \\\n0 0 0 \n17 1 0 \n22 2 0 \n73 3 0 \n11 4 0 \n.. ... ... \n3 70 0 \n12 71 0 \n9 72 0 \n8 73 0 \n14 74 0 \n\n hint distance \\\n0 DoUhgBeFIYCcAAAAJgAAAAAAAAARAAAAm0CKQdkZiEAAAA... 0.236958 \n17 5tYhgJHXIYAIAAAArQAAADwAAABCAQAAaRlbQD16mUGpAc... 17.374491 \n22 XAAigHIAIoBKAAAASwAAAFUAAABDAQAARGUEQURlBEG2ZR... 11.054154 \n73 CdQhgB0OA4AYAAAAHgAAADkAAAAAAAAALdMlQdSMQ0Fd0r... 10.970598 \n11 43YhgPN2IYA1AAAAJAAAAAAAAAA5AAAAEha0QWgpbEEAAA... 18.896385 \n.. ... ... \n3 jt4hgJLeIYA7AAAALQAAAAAAAAAAAAAA4gPGQasVlUEAAA... 4.709088 \n12 0OEhgPvhIYADAAAABgAAAA8AAAA0AAAA2lq-PipQFD-Y-N... 2.009578 \n9 m8shgJ7LIYAOAAAAXgEAAAAAAAAAAAAAOFW-QDE5G0IAAA... 1.716409 \n8 YQ0DgBTPIYDvAAAAdAAAAAAAAAAAAAAAsgLVQbMxTUEAAA... 4.830022 \n14 lhgDgIkYA4BkAAAAIgEAAFoBAAAaAAAAJyAzQWNrAEI8Ax... 7.134933 \n\n name location lat lon \\\n0 Northeastern (Inbound) [-71.090331, 42.339762] -71.090331 42.339762 \n17 Dudley Street [-71.090904, 42.329829] -71.090904 42.329829 \n22 [-71.071196, 42.34085] -71.071196 42.340850 \n73 [-71.066844, 42.327134] -71.066844 42.327134 \n11 Lucy Street [-71.06221, 42.324934] -71.062210 42.324934 \n.. ... ... ... ... \n3 [-71.075414, 42.356537] -71.075414 42.356537 \n12 [-71.085166, 42.349997] -71.085166 42.349997 \n9 Piedmont Street [-71.067854, 42.349993] -71.067854 42.349993 \n8 [-71.072038, 42.348915] -71.072038 42.348915 \n14 [-71.083465, 42.34194] -71.083465 42.341940 \n\n route \n0 2 \n17 2 \n22 2 \n73 2 \n11 2 \n.. ... \n3 2 \n12 2 \n9 2 \n8 2 \n14 2 \n\n[75 rows x 9 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
waypoint_indextrips_indexhintdistancenamelocationlatlonroute
000DoUhgBeFIYCcAAAAJgAAAAAAAAARAAAAm0CKQdkZiEAAAA...0.236958Northeastern (Inbound)[-71.090331, 42.339762]-71.09033142.3397622
17105tYhgJHXIYAIAAAArQAAADwAAABCAQAAaRlbQD16mUGpAc...17.374491Dudley Street[-71.090904, 42.329829]-71.09090442.3298292
2220XAAigHIAIoBKAAAASwAAAFUAAABDAQAARGUEQURlBEG2ZR...11.054154[-71.071196, 42.34085]-71.07119642.3408502
7330CdQhgB0OA4AYAAAAHgAAADkAAAAAAAAALdMlQdSMQ0Fd0r...10.970598[-71.066844, 42.327134]-71.06684442.3271342
114043YhgPN2IYA1AAAAJAAAAAAAAAA5AAAAEha0QWgpbEEAAA...18.896385Lucy Street[-71.06221, 42.324934]-71.06221042.3249342
..............................
3700jt4hgJLeIYA7AAAALQAAAAAAAAAAAAAA4gPGQasVlUEAAA...4.709088[-71.075414, 42.356537]-71.07541442.3565372
127100OEhgPvhIYADAAAABgAAAA8AAAA0AAAA2lq-PipQFD-Y-N...2.009578[-71.085166, 42.349997]-71.08516642.3499972
9720m8shgJ7LIYAOAAAAXgEAAAAAAAAAAAAAOFW-QDE5G0IAAA...1.716409Piedmont Street[-71.067854, 42.349993]-71.06785442.3499932
8730YQ0DgBTPIYDvAAAAdAAAAAAAAAAAAAAAsgLVQbMxTUEAAA...4.830022[-71.072038, 42.348915]-71.07203842.3489152
14740lhgDgIkYA4BkAAAAIgEAAFoBAAAaAAAAJyAzQWNrAEI8Ax...7.134933[-71.083465, 42.34194]-71.08346542.3419402
\n

75 rows × 9 columns

\n
" + "text/plain": " waypoint_index trips_index \\\n0 0 0 \n13 1 0 \n68 2 0 \n40 3 0 \n22 4 0 \n.. ... ... \n14 67 0 \n11 68 0 \n69 69 0 \n19 70 0 \n71 71 0 \n\n hint distance \\\n0 dMQAgDTDAIAuAAAAEgAAAAAAAAAAAAAAiaamQKk960AAAA... 1.113855 \n13 oLwsgCS9LIBHAAAA2AAAAAAAAABgAQAAkQwAQdo1v0EAAA... 2.532529 \n68 CL0sgBS9LIAhAAAAagAAAAAAAAAAAAAAfoF0QPCwOkEAAA... 7.608103 \n40 YbwsgEO9LIBbAAAAEgAAAAAAAAAPAAAA5ua1QcswjkAAAA... 0.468602 \n22 UkAEgFxABIB8AAAAAAAAAAAAAAAYAgAAVjBdQQAAAAAAAA... 6.397300 \n.. ... ... \n14 -mUsgHZmLIATAAAAYgEAAL0AAADpAAAALf8HQHZ8HUK-9a... 55.355565 \n11 43YhgPN2IYA1AAAAJAAAAAAAAAA5AAAAEha0QWgpbEEAAA... 18.896385 \n69 CdQhgB0OA4AYAAAAHgAAADkAAAAAAAAALdMlQdSMQ0Fd0r... 10.970598 \n19 XAAigHIAIoBKAAAASwAAAFUAAABDAQAARGUEQURlBEG2ZR... 11.054154 \n71 DoUhgBeFIYCcAAAAJgAAAAAAAAARAAAAm0CKQdkZiEAAAA... 0.236958 \n\n name location lat lon \\\n0 State Street [-71.056741, 42.358884] -71.056741 42.358884 \n13 [-71.056995, 42.36049] -71.056995 42.360490 \n68 [-71.056994, 42.361263] -71.056994 42.361263 \n40 Creek Square [-71.056819, 42.361534] -71.056819 42.361534 \n22 [-71.059255, 42.359295] -71.059255 42.359295 \n.. ... ... ... ... \n14 [-71.049204, 42.325624] -71.049204 42.325624 \n11 Lucy Street [-71.06221, 42.324934] -71.062210 42.324934 \n69 [-71.066844, 42.327134] -71.066844 42.327134 \n19 [-71.071196, 42.34085] -71.071196 42.340850 \n71 Northeastern (Inbound) [-71.090331, 42.339762] -71.090331 42.339762 \n\n route \n0 2 \n13 2 \n68 2 \n40 2 \n22 2 \n.. ... \n14 2 \n11 2 \n69 2 \n19 2 \n71 2 \n\n[72 rows x 9 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
waypoint_indextrips_indexhintdistancenamelocationlatlonroute
000dMQAgDTDAIAuAAAAEgAAAAAAAAAAAAAAiaamQKk960AAAA...1.113855State Street[-71.056741, 42.358884]-71.05674142.3588842
1310oLwsgCS9LIBHAAAA2AAAAAAAAABgAQAAkQwAQdo1v0EAAA...2.532529[-71.056995, 42.36049]-71.05699542.3604902
6820CL0sgBS9LIAhAAAAagAAAAAAAAAAAAAAfoF0QPCwOkEAAA...7.608103[-71.056994, 42.361263]-71.05699442.3612632
4030YbwsgEO9LIBbAAAAEgAAAAAAAAAPAAAA5ua1QcswjkAAAA...0.468602Creek Square[-71.056819, 42.361534]-71.05681942.3615342
2240UkAEgFxABIB8AAAAAAAAAAAAAAAYAgAAVjBdQQAAAAAAAA...6.397300[-71.059255, 42.359295]-71.05925542.3592952
..............................
14670-mUsgHZmLIATAAAAYgEAAL0AAADpAAAALf8HQHZ8HUK-9a...55.355565[-71.049204, 42.325624]-71.04920442.3256242
1168043YhgPN2IYA1AAAAJAAAAAAAAAA5AAAAEha0QWgpbEEAAA...18.896385Lucy Street[-71.06221, 42.324934]-71.06221042.3249342
69690CdQhgB0OA4AYAAAAHgAAADkAAAAAAAAALdMlQdSMQ0Fd0r...10.970598[-71.066844, 42.327134]-71.06684442.3271342
19700XAAigHIAIoBKAAAASwAAAFUAAABDAQAARGUEQURlBEG2ZR...11.054154[-71.071196, 42.34085]-71.07119642.3408502
71710DoUhgBeFIYCcAAAAJgAAAAAAAAARAAAAm0CKQdkZiEAAAA...0.236958Northeastern (Inbound)[-71.090331, 42.339762]-71.09033142.3397622
\n

72 rows × 9 columns

\n
" }, "metadata": {}, "output_type": "display_data" @@ -98,22 +257,22 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:18.708601Z", - "start_time": "2023-11-06T01:23:18.705324Z" + "end_time": "2023-11-07T00:08:22.440853Z", + "start_time": "2023-11-07T00:08:22.424158Z" } }, "id": "f231d9a35358988c" }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 28, "outputs": [ { "data": { - "text/plain": "", - "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" }, - "execution_count": 15, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -127,9 +286,10 @@ "\n", "for route in df['route'].unique():\n", " df_route = df[df['route'] == route]\n", - " folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route-1]).add_to(m)\n", + " folium.PolyLine(df_route[['lon', 'lat']].values.tolist(), color=colors[route - 1]).add_to(m)\n", " for i in range(len(df_route)):\n", - " folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route-1]).add_to(m)\n", + " folium.CircleMarker(df_route[['lon', 'lat']].iloc[i].values.tolist(), radius=3, color=colors[route - 1]).add_to(\n", + " m)\n", "\n", "# Display the map\n", "m" @@ -137,101 +297,81 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:18.781393Z", - "start_time": "2023-11-06T01:23:18.709803Z" + "end_time": "2023-11-07T00:08:22.513542Z", + "start_time": "2023-11-07T00:08:22.430363Z" } }, "id": "80fd847da2833913" }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 29, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The trip will take 6.789722222222222 hours\n", - "The trip will take 9.955833333333333 hours\n" + "Route 1 has 61 waypoints\n", + "Route 2 has 70 waypoints\n" ] } ], "source": [ - "trip_hrs_1 = utils.get_trip_time(route_1)\n", - "print(\"The trip will take {} hours\".format(trip_hrs_1))\n", - "trip_hrs_2 = utils.get_trip_time(route_2)\n", - "print(\"The trip will take {} hours\".format(trip_hrs_2))" + "# Get the number of waypoints for each route\n", + "route_1_waypoints = len(route_1_coordinates)\n", + "route_2_waypoints = len(route_2_coordinates)\n", + "print(\"Route 1 has {} waypoints\".format(route_1_waypoints))\n", + "print(\"Route 2 has {} waypoints\".format(route_2_waypoints))" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:20.448487Z", - "start_time": "2023-11-06T01:23:18.761079Z" + "end_time": "2023-11-07T00:08:22.513689Z", + "start_time": "2023-11-07T00:08:22.488854Z" } }, - "id": "a3ec09dfb5cbb5b3" + "id": "f53c97acec1c2fc4" }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 30, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " waypoint_index trips_index \\\n", - "20 20 0 \n", - "\n", - " hint distance name \\\n", - "20 2M4pgNrOKYCCAQAADAAAAAAAAAAAAAAALKILQ27Ah0AAAA... 0.0 Echo Bridge \n", - "\n", - " location lat lon route \n", - "20 [-71.227365, 42.314504] -71.227365 42.314504 1 \n" + "The trip will take 10.36111111111111 hours\n", + "The trip will take 10.586666666666666 hours\n" ] } ], "source": [ - "# Find the westmost point in Route 1\n", - "df1 = df[df['route'] == 1]\n", - "west = df1[df1['lon'] == df1['lon'].min()]\n", - "print(west)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-11-06T01:23:20.455714Z", - "start_time": "2023-11-06T01:23:20.453647Z" - } - }, - "id": "be94c3708a1bd250" - }, - { - "cell_type": "code", - "execution_count": 18, - "outputs": [], - "source": [ - "# Remove the westmost point from Route 1\n", - "df = df.drop(west.index)" + "trip_hrs_1 = utils.get_trip_time(route_1, route_1_waypoints, utils.list_to_string([centroids[0]]),\n", + " northeastern_coordinate)\n", + "print(\"The trip will take {} hours\".format(trip_hrs_1))\n", + "trip_hrs_2 = utils.get_trip_time(route_2, route_2_waypoints, utils.list_to_string([centroids[1]]),\n", + " northeastern_coordinate)\n", + "print(\"The trip will take {} hours\".format(trip_hrs_2))" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:20.460791Z", - "start_time": "2023-11-06T01:23:20.456599Z" + "end_time": "2023-11-07T00:08:24.460727Z", + "start_time": "2023-11-07T00:08:22.491469Z" } }, - "id": "21fef07e5b2a03a0" + "id": "a3ec09dfb5cbb5b3" }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 30, "outputs": [], "source": [], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:23:20.460900Z", - "start_time": "2023-11-06T01:23:20.458522Z" + "end_time": "2023-11-07T00:08:24.471189Z", + "start_time": "2023-11-07T00:08:24.460431Z" } }, "id": "eafe5678c44e94fd" diff --git a/utils.py b/utils.py index 7f6a408..e0cc295 100644 --- a/utils.py +++ b/utils.py @@ -5,7 +5,7 @@ from sklearn.cluster import KMeans # Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes -def cluster_and_minimize(df, centroids, norm_centroids, time_diff): +def cluster_and_minimize(df, centroids, norm_centroids, end, time_diff): # Cluster the coordinates kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) @@ -16,8 +16,8 @@ def cluster_and_minimize(df, centroids, norm_centroids, time_diff): df['cluster'] = kmeans.labels_ # Create centroid strings - centroid_1 = list_to_string([centroids[0]]) + ';' - centroid_2 = list_to_string([centroids[1]]) + ';' + centroid_1 = list_to_string([centroids[0]]) + centroid_2 = list_to_string([centroids[1]]) # Return the list of locations in each cluster route_1 = df[df['cluster'] == 0] @@ -29,14 +29,14 @@ def cluster_and_minimize(df, centroids, norm_centroids, time_diff): route_2_str = list_to_string(route_2['gps'].values.tolist()) # Get the trip time for each route - trip_hrs_1 = get_trip_time(centroid_1 + route_1_str, route_1_stops) - trip_hrs_2 = get_trip_time(centroid_2 + route_2_str, route_2_stops) + trip_hrs_1 = get_trip_time(route_1_str, route_1_stops, centroid_1, end) + trip_hrs_2 = get_trip_time(route_2_str, route_2_stops, centroid_2, end) # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference if abs(trip_hrs_1 - trip_hrs_2) > time_diff: route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), route_2['gps'].values.tolist(), - centroid_1, centroid_2, time_diff) + centroid_1, centroid_2, end, time_diff) else: route_1_coordinates = route_1['gps'].values.tolist() route_2_coordinates = route_2['gps'].values.tolist() @@ -48,6 +48,49 @@ def cluster_and_minimize(df, centroids, norm_centroids, time_diff): return df, route_1_coordinates, route_2_coordinates +def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, end, + time_diff): + """ + Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference + """ + # Find the difference in time between the two routes + route_1_time = get_trip_time(list_to_string(route_1_coordinates), + len(route_1_coordinates), route_1_start, end) + route_2_time = get_trip_time(list_to_string(route_2_coordinates), + len(route_2_coordinates), route_2_start, end) + route_time_diff = abs(route_1_time - route_2_time) + + # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route + if route_time_diff > time_diff: + # Find which route is longer + if len(route_1_coordinates) > len(route_2_coordinates): + longer_route = route_1_coordinates + shorter_route = route_2_coordinates + + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, end, time_diff) + + else: + longer_route = route_2_coordinates + shorter_route = route_1_coordinates + + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, end, time_diff) + + # If the difference in time is less than the time difference, return the routes + return route_1_coordinates, route_2_coordinates + + def list_to_string(list_of_lists): """ Takes a list of lists of coordinates and returns a string of the coordinates @@ -56,12 +99,12 @@ def list_to_string(list_of_lists): for i in list_of_lists: string += str(i[1]) + ',' + str(i[0]) + ';' - string = string[:-1] return string -def create_json_df(coordinate_string): - coordinates = requests.get('http://acetyl.net:5000/trip/v1/bike/' + coordinate_string) +def create_json_df(coordinate_string, start, end): + coordinates = requests.get( + 'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last') coordinates = coordinates.json() # Create a dataframe from the JSON @@ -79,11 +122,12 @@ def create_json_df(coordinate_string): return df -def get_trip_time(coordinate_string, num_waypoints): +def get_trip_time(coordinate_string, num_waypoints, start, end): """ Takes a list of lists of coordinates and returns the time of the trip in hours """ - coordinates = requests.get('http://acetyl.net:5000/trip/v1/bike/' + coordinate_string) + coordinates = requests.get( + 'http://acetyl.net:5000/trip/v1/bike/' + start + coordinate_string + end + '?roundtrip=false&source=first&destination=last') coordinates = coordinates.json() travel_time_seconds = int(coordinates['trips'][0]['duration']) @@ -130,49 +174,6 @@ def __min_max_normalize__(value, min_value, max_value): return (value - min_value) / (max_value - min_value) -def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, - time_diff): - """ - Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference - """ - # Find the difference in time between the two routes - route_1_time = get_trip_time(route_1_start + list_to_string(route_1_coordinates), - len(route_1_coordinates)) - route_2_time = get_trip_time(route_2_start + list_to_string(route_2_coordinates), - len(route_2_coordinates)) - route_time_diff = abs(route_1_time - route_2_time) - - # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route - if route_time_diff > time_diff: - # Find which route is longer - if len(route_1_coordinates) > len(route_2_coordinates): - longer_route = route_1_coordinates - shorter_route = route_2_coordinates - - # Move the closest coordinate from the longer route to the shorter route - closest_coordinate = move_coordinate(longer_route, shorter_route) - longer_route.remove(closest_coordinate) - shorter_route.append(closest_coordinate) - - # Recursively call the function - return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, time_diff) - - else: - longer_route = route_2_coordinates - shorter_route = route_1_coordinates - - # Move the closest coordinate from the longer route to the shorter route - closest_coordinate = move_coordinate(longer_route, shorter_route) - longer_route.remove(closest_coordinate) - shorter_route.append(closest_coordinate) - - # Recursively call the function - return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, time_diff) - - # If the difference in time is less than the time difference, return the routes - return route_1_coordinates, route_2_coordinates - - # Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates): # Calculate the centroid of the smaller cluster -- cgit v1.2.3