From 8a9c5cc9cd62425d7f6d4571429f1c7a0e85cf51 Mon Sep 17 00:00:00 2001 From: itsGarrin Date: Mon, 6 Nov 2023 13:52:30 -0500 Subject: Added route minimization for 2 routes. --- Clustering.ipynb | 658 +++++++++++++++++++++++++++++++--------------------- Clustering2.0.ipynb | 313 +++++++++++++++++++++++++ utils.py | 161 ++++++++++++- 3 files changed, 864 insertions(+), 268 deletions(-) create mode 100644 Clustering2.0.ipynb diff --git a/Clustering.ipynb b/Clustering.ipynb index e658dcf..a84f8e7 100644 --- a/Clustering.ipynb +++ b/Clustering.ipynb @@ -2,31 +2,26 @@ "cells": [ { "cell_type": "code", - "execution_count": 78, + "execution_count": 1, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2023-11-06T01:14:10.536728Z", - "start_time": "2023-11-06T01:14:10.525881Z" + "end_time": "2023-11-06T17:13:47.429577Z", + "start_time": "2023-11-06T17:13:46.508767Z" } }, "outputs": [], "source": [ "import folium\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", "import pandas as pd\n", - "from scipy.cluster.hierarchy import dendrogram, linkage\n", - "from scipy.cluster.hierarchy import fcluster\n", - "from sklearn.metrics import silhouette_score\n", "from sklearn.cluster import KMeans\n", "import utils" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 2, "outputs": [], "source": [ "# Load the data\n", @@ -38,15 +33,34 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:10.821794Z", - "start_time": "2023-11-06T01:14:10.808507Z" + "end_time": "2023-11-06T17:13:47.436966Z", + "start_time": "2023-11-06T17:13:47.428637Z" } }, "id": "bb6f57eef695cf76" }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 3, + "outputs": [], + "source": [ + "# Create two centroids, one in the North End and one in the Financial District\n", + "centroids = [[42.364506, -71.054733], [42.358894, -71.056742]]\n", + "\n", + "northeastern_coordinate = \"-71.09033,42.33976;\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:13:47.446315Z", + "start_time": "2023-11-06T17:13:47.437257Z" + } + }, + "id": "fe8a5b9bc06cf2e0" + }, + { + "cell_type": "code", + "execution_count": 4, "outputs": [ { "data": { @@ -70,15 +84,15 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:11.326041Z", - "start_time": "2023-11-06T01:14:11.322857Z" + "end_time": "2023-11-06T17:13:47.449096Z", + "start_time": "2023-11-06T17:13:47.439983Z" } }, "id": "dc434958d5e4a3a8" }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 5, "outputs": [], "source": [ "# Remove all columns but name and gps\n", @@ -87,15 +101,15 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:12.014736Z", - "start_time": "2023-11-06T01:14:12.007694Z" + "end_time": "2023-11-06T17:13:47.455551Z", + "start_time": "2023-11-06T17:13:47.449946Z" } }, "id": "2873c16423fe3119" }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 6, "outputs": [], "source": [ "# Convert the gps column to a list of lists for k-means\n", @@ -105,117 +119,53 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:12.457221Z", - "start_time": "2023-11-06T01:14:12.448967Z" + "end_time": "2023-11-06T17:13:47.455655Z", + "start_time": "2023-11-06T17:13:47.452798Z" } }, "id": "29f9155ef8d75fda" }, { "cell_type": "code", - "execution_count": 83, - "outputs": [ - { - "data": { - "text/plain": " name gps list\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A\n1 Acorn St [42.3576234, -71.0688746] A\n2 Arlington's Great Meadows [42.4299758, -71.2038948] A\n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A\n4 BU Beach [42.3511927, -71.1060828] A\n.. ... ... ...\n28 The Clam Box [42.2763168, -71.0092883] C\n29 The Partisans [42.3478375, -71.0404428] C\n30 Union Oyster House [42.361288, -71.056908] C\n31 Victoria's Diner [42.3270498, -71.0667744] C\n32 Wollaston Beach [42.2806539, -71.0119933] C\n\n[131 rows x 3 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslist
0521 Commercial Street #525[42.3688272, -71.0553792]A
1Acorn St[42.3576234, -71.0688746]A
2Arlington's Great Meadows[42.4299758, -71.2038948]A
3Arthur Fiedler Statue[42.3565057, -71.0754527]A
4BU Beach[42.3511927, -71.1060828]A
............
28The Clam Box[42.2763168, -71.0092883]C
29The Partisans[42.3478375, -71.0404428]C
30Union Oyster House[42.361288, -71.056908]C
31Victoria's Diner[42.3270498, -71.0667744]C
32Wollaston Beach[42.2806539, -71.0119933]C
\n

131 rows × 3 columns

\n
" - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "execution_count": 7, + "outputs": [], "source": [ - "display(TotalList)" + "# Create a new column with normalized gps coordinates and centroids\n", + "TotalList['normalized_gps'], norm_centroids = utils.normalize_gps(TotalList['gps'].values.tolist(), centroids)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:13.043659Z", - "start_time": "2023-11-06T01:14:13.030154Z" + "end_time": "2023-11-06T17:13:47.472084Z", + "start_time": "2023-11-06T17:13:47.454865Z" } }, - "id": "a03a7c5dacebddd0" - }, - { - "cell_type": "markdown", - "source": [ - "# Dendrogram" - ], - "metadata": { - "collapsed": false - }, - "id": "72e85d219be8c635" + "id": "5b985f1a6df84a6c" }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 8, "outputs": [ { "data": { - "text/plain": "
", - "image/png": "" + "text/plain": " name gps list \\\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A \n1 Acorn St [42.3576234, -71.0688746] A \n2 Arlington's Great Meadows [42.4299758, -71.2038948] A \n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A \n4 BU Beach [42.3511927, -71.1060828] A \n.. ... ... ... \n28 The Clam Box [42.2763168, -71.0092883] C \n29 The Partisans [42.3478375, -71.0404428] C \n30 Union Oyster House [42.361288, -71.056908] C \n31 Victoria's Diner [42.3270498, -71.0667744] C \n32 Wollaston Beach [42.2806539, -71.0119933] C \n\n normalized_gps \n0 [0.7251058917247415, 0.8141430878559053] \n1 [0.6747391031099019, 0.778052752104061] \n2 [1.0, 0.41697235794883575] \n3 [0.6697144722136962, 0.7604611403245493] \n4 [0.6458298305822171, 0.6785480000609988] \n.. ... \n28 [0.30922451563130937, 0.9374025730216268] \n29 [0.6307464973238023, 0.8540870458656248] \n30 [0.6912133469876947, 0.8100546647415456] \n31 [0.5372951958288665, 0.7836692527743693] \n32 [0.32872198960456106, 0.9301686741961767] \n\n[131 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistnormalized_gps
0521 Commercial Street #525[42.3688272, -71.0553792]A[0.7251058917247415, 0.8141430878559053]
1Acorn St[42.3576234, -71.0688746]A[0.6747391031099019, 0.778052752104061]
2Arlington's Great Meadows[42.4299758, -71.2038948]A[1.0, 0.41697235794883575]
3Arthur Fiedler Statue[42.3565057, -71.0754527]A[0.6697144722136962, 0.7604611403245493]
4BU Beach[42.3511927, -71.1060828]A[0.6458298305822171, 0.6785480000609988]
...............
28The Clam Box[42.2763168, -71.0092883]C[0.30922451563130937, 0.9374025730216268]
29The Partisans[42.3478375, -71.0404428]C[0.6307464973238023, 0.8540870458656248]
30Union Oyster House[42.361288, -71.056908]C[0.6912133469876947, 0.8100546647415456]
31Victoria's Diner[42.3270498, -71.0667744]C[0.5372951958288665, 0.7836692527743693]
32Wollaston Beach[42.2806539, -71.0119933]C[0.32872198960456106, 0.9301686741961767]
\n

131 rows × 4 columns

\n
" }, "metadata": {}, "output_type": "display_data" } ], "source": [ - "# Create the linkage matrix\n", - "linkage_matrix = linkage(TotalList['gps'].values.tolist(), 'ward')\n", - "\n", - "# Plot the dendrogram\n", - "plt.figure(figsize=(25, 10))\n", - "plt.title('Hierarchical Clustering Dendrogram')\n", - "plt.xlabel('sample index')\n", - "plt.ylabel('distance')\n", - "dendrogram(linkage_matrix, leaf_rotation=90., leaf_font_size=8.)\n", - "plt.show()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-11-06T01:14:14.540031Z", - "start_time": "2023-11-06T01:14:14.088884Z" - } - }, - "id": "9e215df3a350e3cf" - }, - { - "cell_type": "code", - "execution_count": 85, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of clusters: 7\n", - "Silhouette score: 0.42876627286716495\n" - ] - } - ], - "source": [ - "# Set the threshold distance\n", - "threshold_distance = 0.15\n", - "\n", - "# Cut the dendrogram to get cluster labels\n", - "cluster_labels_hc = fcluster(linkage_matrix, t=threshold_distance, criterion='distance')\n", - "\n", - "# Now, you have the number of clusters determined by the dendrogram\n", - "num_clusters = len(np.unique(cluster_labels_hc))\n", - "print(\"Number of clusters:\", num_clusters)\n", - "\n", - "# Calculate the silhouette score to evaluate the clustering\n", - "silhouette_avg = silhouette_score(TotalList['gps'].values.tolist(), cluster_labels_hc)\n", - "print(\"Silhouette score:\", silhouette_avg)" + "display(TotalList)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:14.556841Z", - "start_time": "2023-11-06T01:14:14.545269Z" + "end_time": "2023-11-06T17:13:47.531619Z", + "start_time": "2023-11-06T17:13:47.459977Z" } }, - "id": "2f52d83746e670d" + "id": "a03a7c5dacebddd0" }, { "cell_type": "markdown", @@ -229,64 +179,7 @@ }, { "cell_type": "code", - "execution_count": 86, - "outputs": [], - "source": [ - "# Cluster the data using Gaussian Mixture Models\n", - "# Create two centroids, one in the North End and one in the Financial District\n", - "centroids = [[42.364506, -71.054733], [42.358894, -71.056742]]" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-11-06T01:14:15.329931Z", - "start_time": "2023-11-06T01:14:15.325838Z" - } - }, - "id": "45b59d81ae2de84e" - }, - { - "cell_type": "code", - "execution_count": 87, - "outputs": [ - { - "data": { - "text/plain": " name gps list weights\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A 0.018132\n1 Acorn St [42.3576234, -71.0688746] A 0.008032\n2 Arlington's Great Meadows [42.4299758, -71.2038948] A 0.000676\n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A 0.005410\n4 BU Beach [42.3511927, -71.1060828] A 0.002145\n.. ... ... ... ...\n28 The Clam Box [42.2763168, -71.0092883] C 0.001136\n29 The Partisans [42.3478375, -71.0404428] C 0.005315\n30 Union Oyster House [42.361288, -71.056908] C 0.037200\n31 Victoria's Diner [42.3270498, -71.0667744] C 0.003055\n32 Wollaston Beach [42.2806539, -71.0119933] C 0.001198\n\n[131 rows x 4 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistweights
0521 Commercial Street #525[42.3688272, -71.0553792]A0.018132
1Acorn St[42.3576234, -71.0688746]A0.008032
2Arlington's Great Meadows[42.4299758, -71.2038948]A0.000676
3Arthur Fiedler Statue[42.3565057, -71.0754527]A0.005410
4BU Beach[42.3511927, -71.1060828]A0.002145
...............
28The Clam Box[42.2763168, -71.0092883]C0.001136
29The Partisans[42.3478375, -71.0404428]C0.005315
30Union Oyster House[42.361288, -71.056908]C0.037200
31Victoria's Diner[42.3270498, -71.0667744]C0.003055
32Wollaston Beach[42.2806539, -71.0119933]C0.001198
\n

131 rows × 4 columns

\n
" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Create a weights column that increases as the location gets closer to the centroids\n", - "\n", - "# Compute the distance from each point to each centroid\n", - "TotalList['weights'] = TotalList['gps'].apply(lambda x: [np.linalg.norm(np.array(x) - np.array(centroids[0])), np.linalg.norm(np.array(x) - np.array(centroids[1]))])\n", - "\n", - "# Invert the weights so that the locations closest to the centroids have the highest weights\n", - "TotalList['weights'] = TotalList['weights'].apply(lambda x: [1/i for i in x])\n", - "\n", - "# Sum the weights\n", - "TotalList['weights'] = TotalList['weights'].apply(lambda x: sum(x))\n", - "\n", - "# Normalize the weights\n", - "TotalList['weights'] = TotalList['weights'].apply(lambda x: x/sum(TotalList['weights']))\n", - "\n", - "display(TotalList)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-11-06T01:14:15.942150Z", - "start_time": "2023-11-06T01:14:15.938980Z" - } - }, - "id": "2f2975484d00129c" - }, - { - "cell_type": "code", - "execution_count": 88, + "execution_count": 9, "outputs": [ { "name": "stderr", @@ -300,20 +193,20 @@ } ], "source": [ - "kmeans = KMeans(n_clusters=2, init=centroids).fit(TotalList['gps'].values.tolist())" + "kmeans = KMeans(n_clusters=2, init=norm_centroids).fit(TotalList['normalized_gps'].values.tolist())" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:16.878902Z", - "start_time": "2023-11-06T01:14:16.865126Z" + "end_time": "2023-11-06T17:13:47.552787Z", + "start_time": "2023-11-06T17:13:47.462389Z" } }, "id": "db1ef4b14a1da5f5" }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 10, "outputs": [], "source": [ "# Add the cluster labels to the dataframe\n", @@ -322,20 +215,20 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:17.887765Z", - "start_time": "2023-11-06T01:14:17.880353Z" + "end_time": "2023-11-06T17:13:47.654801Z", + "start_time": "2023-11-06T17:13:47.534432Z" } }, "id": "99891fae96a2fff7" }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 11, "outputs": [ { "data": { - "text/plain": " name gps list weights \\\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A 0.018132 \n1 Acorn St [42.3576234, -71.0688746] A 0.008032 \n2 Arlington's Great Meadows [42.4299758, -71.2038948] A 0.000676 \n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A 0.005410 \n4 BU Beach [42.3511927, -71.1060828] A 0.002145 \n.. ... ... ... ... \n28 The Clam Box [42.2763168, -71.0092883] C 0.001136 \n29 The Partisans [42.3478375, -71.0404428] C 0.005315 \n30 Union Oyster House [42.361288, -71.056908] C 0.037200 \n31 Victoria's Diner [42.3270498, -71.0667744] C 0.003055 \n32 Wollaston Beach [42.2806539, -71.0119933] C 0.001198 \n\n cluster \n0 1 \n1 1 \n2 0 \n3 1 \n4 0 \n.. ... \n28 1 \n29 1 \n30 1 \n31 1 \n32 1 \n\n[131 rows x 5 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistweightscluster
0521 Commercial Street #525[42.3688272, -71.0553792]A0.0181321
1Acorn St[42.3576234, -71.0688746]A0.0080321
2Arlington's Great Meadows[42.4299758, -71.2038948]A0.0006760
3Arthur Fiedler Statue[42.3565057, -71.0754527]A0.0054101
4BU Beach[42.3511927, -71.1060828]A0.0021450
..................
28The Clam Box[42.2763168, -71.0092883]C0.0011361
29The Partisans[42.3478375, -71.0404428]C0.0053151
30Union Oyster House[42.361288, -71.056908]C0.0372001
31Victoria's Diner[42.3270498, -71.0667744]C0.0030551
32Wollaston Beach[42.2806539, -71.0119933]C0.0011981
\n

131 rows × 5 columns

\n
" + "text/plain": " name gps list \\\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A \n1 Acorn St [42.3576234, -71.0688746] A \n2 Arlington's Great Meadows [42.4299758, -71.2038948] A \n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A \n4 BU Beach [42.3511927, -71.1060828] A \n.. ... ... ... \n28 The Clam Box [42.2763168, -71.0092883] C \n29 The Partisans [42.3478375, -71.0404428] C \n30 Union Oyster House [42.361288, -71.056908] C \n31 Victoria's Diner [42.3270498, -71.0667744] C \n32 Wollaston Beach [42.2806539, -71.0119933] C \n\n normalized_gps cluster \n0 [0.7251058917247415, 0.8141430878559053] 1 \n1 [0.6747391031099019, 0.778052752104061] 1 \n2 [1.0, 0.41697235794883575] 0 \n3 [0.6697144722136962, 0.7604611403245493] 1 \n4 [0.6458298305822171, 0.6785480000609988] 0 \n.. ... ... \n28 [0.30922451563130937, 0.9374025730216268] 1 \n29 [0.6307464973238023, 0.8540870458656248] 1 \n30 [0.6912133469876947, 0.8100546647415456] 1 \n31 [0.5372951958288665, 0.7836692527743693] 1 \n32 [0.32872198960456106, 0.9301686741961767] 1 \n\n[131 rows x 5 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistnormalized_gpscluster
0521 Commercial Street #525[42.3688272, -71.0553792]A[0.7251058917247415, 0.8141430878559053]1
1Acorn St[42.3576234, -71.0688746]A[0.6747391031099019, 0.778052752104061]1
2Arlington's Great Meadows[42.4299758, -71.2038948]A[1.0, 0.41697235794883575]0
3Arthur Fiedler Statue[42.3565057, -71.0754527]A[0.6697144722136962, 0.7604611403245493]1
4BU Beach[42.3511927, -71.1060828]A[0.6458298305822171, 0.6785480000609988]0
..................
28The Clam Box[42.2763168, -71.0092883]C[0.30922451563130937, 0.9374025730216268]1
29The Partisans[42.3478375, -71.0404428]C[0.6307464973238023, 0.8540870458656248]1
30Union Oyster House[42.361288, -71.056908]C[0.6912133469876947, 0.8100546647415456]1
31Victoria's Diner[42.3270498, -71.0667744]C[0.5372951958288665, 0.7836692527743693]1
32Wollaston Beach[42.2806539, -71.0119933]C[0.32872198960456106, 0.9301686741961767]1
\n

131 rows × 5 columns

\n
" }, "metadata": {}, "output_type": "display_data" @@ -348,8 +241,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:19.060647Z", - "start_time": "2023-11-06T01:14:19.051699Z" + "end_time": "2023-11-06T17:13:47.690379Z", + "start_time": "2023-11-06T17:13:47.562147Z" } }, "id": "49fc751352022ad1" @@ -366,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 12, "outputs": [], "source": [ "# Create a map in Boston\n", @@ -375,21 +268,21 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:20.184965Z", - "start_time": "2023-11-06T01:14:20.177057Z" + "end_time": "2023-11-06T17:13:47.690503Z", + "start_time": "2023-11-06T17:13:47.606732Z" } }, "id": "48d76bd40c44cc61" }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 13, "outputs": [], "source": [ "# Plot the centroids on the map\n", "for i in range(len(centroids)):\n", " folium.Marker(centroids[i], popup='Centroid ' + str(i), icon=folium.Icon(color='black')).add_to(m)\n", - " \n", + "\n", "# Add the points to the map with different colors for each cluster\n", "for i, row in TotalList.iterrows():\n", " if row['cluster'] == 0:\n", @@ -418,22 +311,22 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:20.573947Z", - "start_time": "2023-11-06T01:14:20.558985Z" + "end_time": "2023-11-06T17:13:47.690797Z", + "start_time": "2023-11-06T17:13:47.629116Z" } }, "id": "3c8a7d2b34d4f22d" }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 14, "outputs": [ { "data": { - "text/plain": "", - "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" }, - "execution_count": 93, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -445,21 +338,21 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:23.580878Z", - "start_time": "2023-11-06T01:14:23.507152Z" + "end_time": "2023-11-06T17:13:47.812439Z", + "start_time": "2023-11-06T17:13:47.668506Z" } }, "id": "d6941d1f0a203ee7" }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 15, "outputs": [ { "data": { - "text/plain": "1 74\n0 57\nName: cluster, dtype: int64" + "text/plain": "1 83\n0 48\nName: cluster, dtype: int64" }, - "execution_count": 94, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -471,191 +364,426 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:28.465028Z", - "start_time": "2023-11-06T01:14:28.461813Z" + "end_time": "2023-11-06T17:13:47.814584Z", + "start_time": "2023-11-06T17:13:47.761699Z" } }, "id": "479ba8f36cdafbf8" }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 16, "outputs": [], "source": [ - "# create a method to move n number of locations from the largest cluster to the smallest cluster, taking distance into account\n", - "def equalize_clusters(df, n):\n", - " # Get the number of locations in each cluster\n", - " cluster_counts = df['cluster'].value_counts()\n", - " \n", - " # Get the largest and smallest clusters\n", - " largest_cluster = cluster_counts.index[0]\n", - " smallest_cluster = cluster_counts.index[-1]\n", - " \n", - " # Get the locations in the largest cluster\n", - " largest_cluster_locations = df[df['cluster'] == largest_cluster]\n", - " \n", - " # Get the locations in the smallest cluster\n", - " smallest_cluster_locations = df[df['cluster'] == smallest_cluster]\n", - " \n", - " # Create a list of distances from each location in the largest cluster to each location in the smallest cluster\n", - " distances = []\n", - " for i, row in largest_cluster_locations.iterrows():\n", - " for j, row2 in smallest_cluster_locations.iterrows():\n", - " distances.append([i, j, np.linalg.norm(np.array(row['gps']) - np.array(row2['gps']))])\n", - " \n", - " # Sort the distances by distance\n", - " distances.sort(key=lambda x: x[2])\n", - " \n", - " # Move the n closest locations from the largest cluster to the smallest cluster\n", - " for i in range(n):\n", - " df.loc[distances[i][0], 'cluster'] = smallest_cluster\n", - " df.loc[distances[i][1], 'cluster'] = largest_cluster\n", - " \n", - " return df" + "# Return the list of locations in each cluster\n", + "route_1 = TotalList[TotalList['cluster'] == 0]\n", + "route_1_stops = len(route_1['gps'].values.tolist())\n", + "route_1_str = utils.list_to_string(route_1['gps'].values.tolist())" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:13:47.814649Z", + "start_time": "2023-11-06T17:13:47.767185Z" + } + }, + "id": "89297f77828e8ed8" + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [], + "source": [ + "route_2 = TotalList[TotalList['cluster'] == 1]\n", + "route_2_stops = len(route_2['gps'].values.tolist())\n", + "route_2_str = utils.list_to_string(route_2['gps'].values.tolist())" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:08:43.493687Z", - "start_time": "2023-11-06T01:08:43.480182Z" + "end_time": "2023-11-06T17:13:47.815014Z", + "start_time": "2023-11-06T17:13:47.770253Z" } }, - "id": "4b79215a12bf36e2" + "id": "6ff82e29a0366d9e" }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 18, "outputs": [ { - "data": { - "text/plain": "0 97\n1 72\nName: cluster, dtype: int64" - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "The trip will take 9.129166666666666 hours\n", + "The trip will take 11.833055555555555 hours\n" + ] } ], "source": [ - "# Equalize the clusters\n", - "TotalList = equalize_clusters(TotalList, 20)\n", + "# Get the time for each route\n", + "trip_hrs_1 = utils.get_trip_time(northeastern_coordinate + route_1_str, route_1_stops)\n", + "print(\"The trip will take {} hours\".format(trip_hrs_1))\n", + "trip_hrs_2 = utils.get_trip_time(northeastern_coordinate + route_2_str, route_2_stops)\n", + "print(\"The trip will take {} hours\".format(trip_hrs_2))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:13:49.614158Z", + "start_time": "2023-11-06T17:13:47.772345Z" + } + }, + "id": "7949bddd34b6731" + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [], + "source": [ + "# Move a coordinate from one cluster to the other and see how the trip time changes\n", + "# Find the closest coordinate between the two clusters\n", "\n", - "# Display the number of locations in each cluster\n", - "TotalList['cluster'].value_counts()" + "closest_coordinate = utils.move_coordinate(route_2['gps'].values.tolist(), route_1['gps'].values.tolist())" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:13:49.620559Z", + "start_time": "2023-11-06T17:13:49.614687Z" + } + }, + "id": "47ee7033f93c4d2b" + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The closest coordinate is [42.3446263, -71.0969274]\n" + ] + } + ], + "source": [ + "print(\"The closest coordinate is {}\".format(closest_coordinate))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:13:49.620803Z", + "start_time": "2023-11-06T17:13:49.617173Z" + } + }, + "id": "f77340f4382a886f" + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [], + "source": [ + "# Change the cluster of the closest coordinate array\n", + "TotalList.loc[TotalList['gps'].astype(str) == str(closest_coordinate), 'cluster'] = 0" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:08:43.649954Z", - "start_time": "2023-11-06T01:08:43.542655Z" + "end_time": "2023-11-06T17:13:49.623360Z", + "start_time": "2023-11-06T17:13:49.621229Z" } }, - "id": "176d5f92130c67b8" + "id": "7d9f2216c1c0e80f" }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 22, "outputs": [ { "data": { - "text/plain": "'-71.0553792,42.3688272;-71.0688746,42.3576234;-71.2038948,42.4299758;-71.0754527,42.3565057;-71.1060828,42.3511927;-71.0969274,42.3446263;-71.130887,42.35304;-71.0620802,42.3579151;-71.1459593,42.3501823;-71.0586014,42.357357;-71.0572023,42.3587627;-71.0556268,42.36521;-71.1460435,42.3495825;-71.1217152,42.3426377;-71.0720926,42.3489004;-71.067859,42.3500079;-71.0632036,42.3556154;-71.1258765,42.331864;-71.1095021,42.3364675;-71.133103,42.3890049;-71.0620134,42.3248471;-71.0851891,42.3500031;-71.1123834,42.3360385;-71.066414,42.354296;-71.2273649,42.3145041;-71.0834061,42.341987;-71.0992038,42.3306454;-71.0990577,42.3381442;-71.0569649,42.3604952;-71.0949218,42.3419564;-71.0942861,42.3413301;-71.0498714,42.3256817;-71.0908104,42.329969;-71.0616035,42.3537983;-71.0359433,42.3485465;-71.0913583,42.3490205;-71.1000217,42.3323776;-71.1241295,42.3518397;-71.1618052,42.3245965;-71.0638101,42.3587772;-71.1625829,42.340795;-71.167854,42.4107892;-71.155555,42.3317473;-71.1227278,42.3965778;-71.3598149,42.3140229;-71.1126695,42.3836229;-71.0555003,42.3640137;-71.119149,42.3884;-71.0712561,42.3407613;-71.0561781,42.3668968;-71.0664019,42.3554589;-71.059228,42.359349;-71.0668408,42.3524116;-71.0872846,42.2961434;-71.062146,42.366198;-71.1427371,42.3433772;-71.1438455,42.3569102;-71.0651214,42.3553972;-71.0596124,42.3509517;-71.0359354,42.3478381;-71.1313443,42.3525708;-71.1284677,42.3631904;-71.061757,42.3691906;-71.119301,42.388547;-71.097883,42.381008;-71.1107166,42.3741209;-71.0609962,42.3803747;-71.0516339,42.3609921;-71.1194344,42.3754427;-71.0809932,42.3675275;-71.0545357,42.3597994;-71.1013044,42.3627462;-71.1108423,42.3838224;-71.1026937,42.3820702;-71.1189467,42.373465;-71.1208817,42.3732344;-71.0342146,42.316274;-71.0756902,42.3695046;-71.0678704,42.3701829;-71.0968274,42.3799095;-71.0656594,42.3718401;-71.094048,42.339381;-71.1854722,42.3621177;-71.1146697,42.3782386;-71.0935443,42.3817274;-71.0611749,42.3551807;-71.0906355,42.3616095;-71.1161887,42.3766442;-71.0962734,42.3627993;-71.1155576,42.3784629;-71.0949101,42.3797674;-71.1087411,42.3640287;-71.0554239,42.3739796;-71.09476,42.37736;-71.1014951,42.3614115;-71.1024769,42.3822934;-71.1011111,42.3636597;-71.0631664,42.3741694;-71.056823,42.361531;-71.0632852,42.2857047;-71.0637877,42.2845163;-71.0496839,42.3519736;-71.0454645,42.3162356;-71.0336324,42.3441918;-71.0487437,42.3508756;-71.0512911,42.3521821;-71.0013637,42.2075316;-71.0607764,42.3763541;-71.0374911,42.316031;-71.0125206,42.3378699;-71.0672898,42.3523158;-71.02832,42.2576602;-71.0502126,42.3516479;-71.0331956,42.3639107;-71.0432778,42.3528151;-71.0035279,42.2392354;-71.0470633,42.3537343;-71.0352443,42.3291218;-71.0898829,42.3463992;-71.0240951,42.2743442;-71.0234949,42.3358743;-70.985881,42.420226;-71.0005483,42.2454086;-71.0096371,42.3367603;-71.0447796,42.3509709;-71.0983169,42.3319001;-71.0092883,42.2763168;-71.0404428,42.3478375;-71.056908,42.361288;-71.0667744,42.3270498;-71.0119933,42.2806539;-71.0618764,42.4074484;-71.0612182,42.3986053;-71.0392667,42.3855456;-71.0515875,42.4025721;-70.9903023,42.3917606;-71.055873,42.4206339;-71.0433886,42.4222989;-71.06088,42.3761612;-71.0412802,42.3936888;-71.0714924,42.3968978;-71.0282154,42.3778389;-71.0350852,42.3809511;-71.0331398,42.3734483;-70.9693867,42.3895122;-71.0945712,42.3253252;-71.0280157,42.398422;-71.0155516,42.4114215;-70.993656,42.4110462;-71.0355621,42.3976519;-71.0056995,42.390191;-71.0589219,42.403759;-71.037937,42.3698284;-71.0386285,42.3903823;-71.0316196,42.4122481;-71.0328839,42.3861321;-71.0270609,42.4213082;-71.0366491,42.391236;-71.0361399,42.3649623;-71.0116946,42.3827415;-70.9973058,42.4183123;-71.1122037,42.4008442;-70.997123,42.390501;-71.0506461,42.41826;-71.0359889,42.3670906;-71.0414523,42.3649544;-71.0371343,42.3711266;-71.033703,42.3891835;-70.9799864,42.3803348;'" + "text/plain": "1 82\n0 49\nName: cluster, dtype: int64" }, - "execution_count": 75, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "utils.list_to_string(TotalList['gps'].values.tolist())" + "# Display the number of locations in each cluster\n", + "TotalList['cluster'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:08:43.650401Z", - "start_time": "2023-11-06T01:08:43.622162Z" + "end_time": "2023-11-06T17:13:49.632625Z", + "start_time": "2023-11-06T17:13:49.624757Z" } }, - "id": "2d83e5db093608d2" + "id": "175937590bf5d19" }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 23, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "57\n" + "The trip will take 9.216666666666667 hours\n", + "The trip will take 11.710277777777778 hours\n" ] - }, + } + ], + "source": [ + "# Calculate the new trip time\n", + "new_route_1 = TotalList[TotalList['cluster'] == 0]\n", + "new_route_2 = TotalList[TotalList['cluster'] == 1]\n", + "new_route_1_stops = len(new_route_1['gps'].values.tolist())\n", + "new_route_1_str = utils.list_to_string(new_route_1['gps'].values.tolist())\n", + "new_route_2_stops = len(new_route_2['gps'].values.tolist())\n", + "new_route_2_str = utils.list_to_string(new_route_2['gps'].values.tolist())\n", + "\n", + "new_trip_hrs_1 = utils.get_trip_time(northeastern_coordinate + new_route_1_str, new_route_1_stops)\n", + "print(\"The trip will take {} hours\".format(new_trip_hrs_1))\n", + "new_trip_hrs_2 = utils.get_trip_time(northeastern_coordinate + new_route_2_str, new_route_2_stops)\n", + "print(\"The trip will take {} hours\".format(new_trip_hrs_2))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:13:51.356749Z", + "start_time": "2023-11-06T17:13:49.629464Z" + } + }, + "id": "1eddc12b846d259" + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [ { "data": { - "text/plain": "'-71.2038948,42.4299758;-71.1060828,42.3511927;-71.0969274,42.3446263;-71.130887,42.35304;-71.1459593,42.3501823;-71.1460435,42.3495825;-71.1217152,42.3426377;-71.1258765,42.331864;-71.1095021,42.3364675;-71.133103,42.3890049;-71.1123834,42.3360385;-71.2273649,42.3145041;-71.0992038,42.3306454;-71.0990577,42.3381442;-71.0949218,42.3419564;-71.0942861,42.3413301;-71.0913583,42.3490205;-71.1000217,42.3323776;-71.1241295,42.3518397;-71.1618052,42.3245965;-71.1625829,42.340795;-71.167854,42.4107892;-71.155555,42.3317473;-71.1227278,42.3965778;-71.3598149,42.3140229;-71.1126695,42.3836229;-71.119149,42.3884;-71.1427371,42.3433772;-71.1438455,42.3569102;-71.1313443,42.3525708;-71.1284677,42.3631904;-71.119301,42.388547;-71.097883,42.381008;-71.1107166,42.3741209;-71.1194344,42.3754427;-71.1013044,42.3627462;-71.1108423,42.3838224;-71.1026937,42.3820702;-71.1189467,42.373465;-71.1208817,42.3732344;-71.0968274,42.3799095;-71.094048,42.339381;-71.1854722,42.3621177;-71.1146697,42.3782386;-71.0935443,42.3817274;-71.0906355,42.3616095;-71.1161887,42.3766442;-71.0962734,42.3627993;-71.1155576,42.3784629;-71.0949101,42.3797674;-71.1087411,42.3640287;-71.09476,42.37736;-71.1014951,42.3614115;-71.1024769,42.3822934;-71.1011111,42.3636597;-71.0898829,42.3463992;-71.0983169,42.3319001;'" + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" }, - "execution_count": 95, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Return the list of locations in each cluster\n", - "print(len(TotalList[TotalList['cluster'] == 0]['gps'].values.tolist()))\n", - "utils.list_to_string(TotalList[TotalList['cluster'] == 0]['gps'].values.tolist())" + "# Create a new map with the new coordinates\n", + "m = folium.Map(location=[42.3601, -71.0589], zoom_start=12)\n", + "\n", + "# Plot the centroids on the map\n", + "for i in range(len(centroids)):\n", + " folium.Marker(centroids[i], popup='Centroid ' + str(i), icon=folium.Icon(color='black')).add_to(m)\n", + "\n", + "# Add the points to the map with different colors for each cluster\n", + "for i, row in TotalList.iterrows():\n", + " if row['cluster'] == 0:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='red')).add_to(m)\n", + " elif row['cluster'] == 1:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='blue')).add_to(m)\n", + " elif row['cluster'] == 2:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='green')).add_to(m)\n", + " elif row['cluster'] == 3:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='purple')).add_to(m)\n", + " elif row['cluster'] == 4:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='orange')).add_to(m)\n", + " elif row['cluster'] == 5:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkred')).add_to(m)\n", + " elif row['cluster'] == 6:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightred')).add_to(m)\n", + " elif row['cluster'] == 7:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='beige')).add_to(m)\n", + " elif row['cluster'] == 8:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkblue')).add_to(m)\n", + " elif row['cluster'] == 9:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightblue')).add_to(m)\n", + " elif row['cluster'] == 10:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='cadet')).add_to(m)\n", + "\n", + "# Display the map\n", + "m" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:35.829990Z", - "start_time": "2023-11-06T01:14:35.821619Z" + "end_time": "2023-11-06T17:13:51.474500Z", + "start_time": "2023-11-06T17:13:51.364744Z" } }, - "id": "89297f77828e8ed8" + "id": "e02dfb4cc414066a" + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [], + "source": [ + "# Attempt to minimize the trip time by moving a coordinate from one cluster to the other\n", + "new_route_2_coordinates, new_route_1_coordinates = utils.minimize_route_time_diff(route_2['gps'].values.tolist(), route_1['gps'].values.tolist(), northeastern_coordinate, 0.5)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:14:15.930771Z", + "start_time": "2023-11-06T17:13:51.471954Z" + } + }, + "id": "fa09560bd996ad9c" }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 26, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "74\n" + "The trip will take 10.150555555555556 hours\n", + "The trip will take 10.488888888888889 hours\n" ] - }, + } + ], + "source": [ + "# Calculate the new trip time\n", + "new_route_1_stops = len(new_route_1_coordinates)\n", + "new_route_1_str = utils.list_to_string(new_route_1_coordinates)\n", + "new_route_2_stops = len(new_route_2_coordinates)\n", + "new_route_2_str = utils.list_to_string(new_route_2_coordinates)\n", + "\n", + "new_trip_hrs_1 = utils.get_trip_time(northeastern_coordinate + new_route_1_str, new_route_1_stops)\n", + "print(\"The trip will take {} hours\".format(new_trip_hrs_1))\n", + "new_trip_hrs_2 = utils.get_trip_time(northeastern_coordinate + new_route_2_str, new_route_2_stops)\n", + "print(\"The trip will take {} hours\".format(new_trip_hrs_2))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:14:17.697174Z", + "start_time": "2023-11-06T17:14:15.937708Z" + } + }, + "id": "cb4c9f02d769c5b2" + }, + { + "cell_type": "code", + "execution_count": 27, + "outputs": [], + "source": [ + "# Edit the dataframe to reflect the new coordinate clusters\n", + "TotalList.loc[TotalList['gps'].astype(str).isin(map(str, new_route_1_coordinates)), 'cluster'] = 0\n", + "TotalList.loc[TotalList['gps'].astype(str).isin(map(str, new_route_2_coordinates)), 'cluster'] = 1" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:14:17.713355Z", + "start_time": "2023-11-06T17:14:17.702484Z" + } + }, + "id": "ccda123bae5a7fe2" + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ { "data": { - "text/plain": "'-71.0553792,42.3688272;-71.0688746,42.3576234;-71.0754527,42.3565057;-71.0620802,42.3579151;-71.0586014,42.357357;-71.0572023,42.3587627;-71.0556268,42.36521;-71.0720926,42.3489004;-71.067859,42.3500079;-71.0632036,42.3556154;-71.0620134,42.3248471;-71.0851891,42.3500031;-71.066414,42.354296;-71.0834061,42.341987;-71.0569649,42.3604952;-71.0498714,42.3256817;-71.0908104,42.329969;-71.0616035,42.3537983;-71.0359433,42.3485465;-71.0638101,42.3587772;-71.0555003,42.3640137;-71.0712561,42.3407613;-71.0561781,42.3668968;-71.0664019,42.3554589;-71.059228,42.359349;-71.0668408,42.3524116;-71.0872846,42.2961434;-71.062146,42.366198;-71.0651214,42.3553972;-71.0596124,42.3509517;-71.0359354,42.3478381;-71.061757,42.3691906;-71.0609962,42.3803747;-71.0516339,42.3609921;-71.0809932,42.3675275;-71.0545357,42.3597994;-71.0342146,42.316274;-71.0756902,42.3695046;-71.0678704,42.3701829;-71.0656594,42.3718401;-71.0611749,42.3551807;-71.0554239,42.3739796;-71.0631664,42.3741694;-71.056823,42.361531;-71.0632852,42.2857047;-71.0637877,42.2845163;-71.0496839,42.3519736;-71.0454645,42.3162356;-71.0336324,42.3441918;-71.0487437,42.3508756;-71.0512911,42.3521821;-71.0013637,42.2075316;-71.0607764,42.3763541;-71.0374911,42.316031;-71.0125206,42.3378699;-71.0672898,42.3523158;-71.02832,42.2576602;-71.0502126,42.3516479;-71.0331956,42.3639107;-71.0432778,42.3528151;-71.0035279,42.2392354;-71.0470633,42.3537343;-71.0352443,42.3291218;-71.0240951,42.2743442;-71.0234949,42.3358743;-70.985881,42.420226;-71.0005483,42.2454086;-71.0096371,42.3367603;-71.0447796,42.3509709;-71.0092883,42.2763168;-71.0404428,42.3478375;-71.056908,42.361288;-71.0667744,42.3270498;-71.0119933,42.2806539;'" + "text/plain": "1 70\n0 61\nName: cluster, dtype: int64" }, - "execution_count": 96, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "print(len(TotalList[TotalList['cluster'] == 1]['gps'].values.tolist()))\n", - "utils.list_to_string(TotalList[TotalList['cluster'] == 1]['gps'].values.tolist())" + "# Display the number of locations in each cluster\n", + "TotalList['cluster'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:14:36.909798Z", - "start_time": "2023-11-06T01:14:36.904157Z" + "end_time": "2023-11-06T17:14:17.725481Z", + "start_time": "2023-11-06T17:14:17.710476Z" } }, - "id": "6ff82e29a0366d9e" + "id": "c871a41d003d72ee" + }, + { + "cell_type": "code", + "execution_count": 29, + "outputs": [ + { + "data": { + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a new map with the new coordinates\n", + "m = folium.Map(location=[42.3601, -71.0589], zoom_start=12)\n", + "\n", + "# Plot the centroids on the map\n", + "for i in range(len(centroids)):\n", + " folium.Marker(centroids[i], popup='Centroid ' + str(i), icon=folium.Icon(color='black')).add_to(m)\n", + "\n", + "# Add the points to the map with different colors for each cluster\n", + "for i, row in TotalList.iterrows():\n", + " if row['cluster'] == 0:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='red')).add_to(m)\n", + " elif row['cluster'] == 1:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='blue')).add_to(m)\n", + " elif row['cluster'] == 2:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='green')).add_to(m)\n", + " elif row['cluster'] == 3:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='purple')).add_to(m)\n", + " elif row['cluster'] == 4:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='orange')).add_to(m)\n", + " elif row['cluster'] == 5:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkred')).add_to(m)\n", + " elif row['cluster'] == 6:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightred')).add_to(m)\n", + " elif row['cluster'] == 7:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='beige')).add_to(m)\n", + " elif row['cluster'] == 8:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkblue')).add_to(m)\n", + " elif row['cluster'] == 9:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightblue')).add_to(m)\n", + " elif row['cluster'] == 10:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='cadet')).add_to(m)\n", + "\n", + "# Display the map\n", + "m" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T17:14:17.803413Z", + "start_time": "2023-11-06T17:14:17.723348Z" + } + }, + "id": "76538bc325ff80b0" }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 29, "outputs": [], "source": [], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-11-06T01:08:43.651470Z", - "start_time": "2023-11-06T01:08:43.640872Z" + "end_time": "2023-11-06T17:14:17.803513Z", + "start_time": "2023-11-06T17:14:17.800565Z" } }, - "id": "7949bddd34b6731" + "id": "438c323e29e25031" } ], "metadata": { diff --git a/Clustering2.0.ipynb b/Clustering2.0.ipynb new file mode 100644 index 0000000..5ff2d63 --- /dev/null +++ b/Clustering2.0.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.475082Z", + "start_time": "2023-11-06T18:51:21.667023Z" + } + }, + "outputs": [], + "source": [ + "import folium\n", + "import pandas as pd\n", + "from sklearn.cluster import KMeans\n", + "import utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "# Load the data\n", + "ListA = pd.read_csv('List A.csv')\n", + "ListB = pd.read_csv('List B.csv')\n", + "ListC = pd.read_csv('List C.csv')\n", + "ListD = pd.read_csv('List D.csv')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.495242Z", + "start_time": "2023-11-06T18:51:22.473334Z" + } + }, + "id": "bb6f57eef695cf76" + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "# Create two centroids, one in the North End and one in the Financial District\n", + "centroids = [[42.364506, -71.054733], [42.358894, -71.056742]]\n", + "\n", + "northeastern_coordinate = \"-71.09033,42.33976;\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.495492Z", + "start_time": "2023-11-06T18:51:22.483246Z" + } + }, + "id": "fe8a5b9bc06cf2e0" + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "data": { + "text/plain": " name gps \\\n0 521 Commercial Street #525 42.3688272,-71.0553792 \n1 Acorn St 42.3576234,-71.0688746 \n2 Arlington's Great Meadows 42.4299758,-71.2038948 \n3 Arthur Fiedler Statue 42.3565057,-71.0754527 \n4 BU Beach 42.3511927,-71.1060828 \n.. ... ... \n28 The Clam Box 42.2763168,-71.0092883 \n29 The Partisans 42.3478375,-71.0404428 \n30 Union Oyster House 42.361288,-71.056908 \n31 Victoria's Diner 42.3270498,-71.0667744 \n32 Wollaston Beach 42.2806539,-71.0119933 \n\n googleUrl \\\n0 https://maps.google.com/maps?q=+%4042.3688272,... \n1 https://maps.google.com/maps?q=+%4042.3576234,... \n2 https://maps.google.com/maps?q=+%4042.4299758,... \n3 https://maps.google.com/maps?q=+%4042.3565057,... \n4 https://maps.google.com/maps?q=+%4042.3511927,... \n.. ... \n28 https://maps.google.com/maps?q=+%4042.2763168,... \n29 https://maps.google.com/maps?q=+%4042.3478375,... \n30 https://maps.google.com/maps?q=+%4042.361288,-... \n31 https://maps.google.com/maps?q=+%4042.3270498,... \n32 https://maps.google.com/maps?q=+%4042.2806539,... \n\n originalUrl info types \\\n0 https://www.google.com/maps/place/521+Commerci... NaN NaN \n1 https://www.google.com/maps/place/Acorn+St/dat... NaN NaN \n2 https://www.google.com/maps/place/Arlington's+... NaN NaN \n3 https://www.google.com/maps/place/Arthur+Fiedl... NaN NaN \n4 https://www.google.com/maps/place/BU+Beach/dat... NaN NaN \n.. ... ... ... \n28 https://www.google.com/maps/place/The+Clam+Box... NaN NaN \n29 https://www.google.com/maps/place/The+Partisan... NaN NaN \n30 https://www.google.com/maps/place/Union+Oyster... NaN NaN \n31 https://www.google.com/maps/place/Victoria's+D... NaN NaN \n32 https://www.google.com/maps/place/Wollaston+Be... NaN NaN \n\n address \\\n0 NaN \n1 NaN \n2 Minuteman Commuter Bikeway, Lexington, MA 0242... \n3 Charles River Esplanades, Boston, MA 02114, Un... \n4 270 Bay State Rd, Boston, MA 02215, United States \n.. ... \n28 789 Quincy Shore Dr, Quincy, MA 02170, United ... \n29 Boston, MA 02210, United States \n30 41 Union St, Boston, MA 02108, United States \n31 1024 Massachusetts Ave, Boston, MA 02118, Unit... \n32 Quincy, MA, United States \n\n description type \\\n0 NaN NaN \n1 NaN NaN \n2 183-acres of wet meadows & uplands with trails... Nature preserve \n3 NaN Sculpture \n4 A sloping, grassy plaza on the university grou... Park \n.. ... ... \n28 Classic beachfront joint with a rustic vibe di... Seafood restaurant \n29 NaN Sculpture \n30 Historic eatery serving chowder & other New En... Seafood restaurant \n31 Long-standing classic diner for breakfast & sa... Diner \n32 Historic 2.3-mi.-long beach with a paved prome... Beach \n\n phone website \\\n0 NaN NaN \n1 NaN NaN \n2 +1 781-863-5385 http://www.foagm.org/ \n3 +1 617-332-2433 http://helmicksculpture.com/portfolio/arthur-f... \n4 NaN https://www.bu.edu/today/2009/icons-among-us-t... \n.. ... ... \n28 +1 617-302-3474 http://www.clamboxquincy.com/ \n29 NaN https://www.bostonseaport.xyz/venue/the-partis... \n30 +1 617-227-2750 http://www.unionoysterhouse.com/?y_source=1_Mj... \n31 +1 617-442-5965 http://www.victoriasdiner.com/ \n32 NaN NaN \n\n ratingsAverage ratingsTotal plusCode list \n0 NaN NaN NaN A \n1 NaN NaN NaN A \n2 4.6 171.0 CQHW+XC Lexington, Massachusetts, USA A \n3 4.6 14.0 9W4F+JR Boston, Massachusetts, USA A \n4 4.5 133.0 9V2V+FH Boston, Massachusetts, USA A \n.. ... ... ... ... \n28 4.3 2145.0 7XGR+G7 Quincy, Massachusetts, USA C \n29 4.8 6.0 8XX5+4R Boston, Massachusetts, USA C \n30 4.3 8497.0 9W6V+G6 Boston, Massachusetts, USA C \n31 4.1 1797.0 8WGM+R7 Boston, Massachusetts, USA C \n32 4.4 171.0 NaN C \n\n[131 rows x 15 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpsgoogleUrloriginalUrlinfotypesaddressdescriptiontypephonewebsiteratingsAverageratingsTotalplusCodelist
0521 Commercial Street #52542.3688272,-71.0553792https://maps.google.com/maps?q=+%4042.3688272,...https://www.google.com/maps/place/521+Commerci...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNA
1Acorn St42.3576234,-71.0688746https://maps.google.com/maps?q=+%4042.3576234,...https://www.google.com/maps/place/Acorn+St/dat...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNA
2Arlington's Great Meadows42.4299758,-71.2038948https://maps.google.com/maps?q=+%4042.4299758,...https://www.google.com/maps/place/Arlington's+...NaNNaNMinuteman Commuter Bikeway, Lexington, MA 0242...183-acres of wet meadows & uplands with trails...Nature preserve+1 781-863-5385http://www.foagm.org/4.6171.0CQHW+XC Lexington, Massachusetts, USAA
3Arthur Fiedler Statue42.3565057,-71.0754527https://maps.google.com/maps?q=+%4042.3565057,...https://www.google.com/maps/place/Arthur+Fiedl...NaNNaNCharles River Esplanades, Boston, MA 02114, Un...NaNSculpture+1 617-332-2433http://helmicksculpture.com/portfolio/arthur-f...4.614.09W4F+JR Boston, Massachusetts, USAA
4BU Beach42.3511927,-71.1060828https://maps.google.com/maps?q=+%4042.3511927,...https://www.google.com/maps/place/BU+Beach/dat...NaNNaN270 Bay State Rd, Boston, MA 02215, United StatesA sloping, grassy plaza on the university grou...ParkNaNhttps://www.bu.edu/today/2009/icons-among-us-t...4.5133.09V2V+FH Boston, Massachusetts, USAA
................................................
28The Clam Box42.2763168,-71.0092883https://maps.google.com/maps?q=+%4042.2763168,...https://www.google.com/maps/place/The+Clam+Box...NaNNaN789 Quincy Shore Dr, Quincy, MA 02170, United ...Classic beachfront joint with a rustic vibe di...Seafood restaurant+1 617-302-3474http://www.clamboxquincy.com/4.32145.07XGR+G7 Quincy, Massachusetts, USAC
29The Partisans42.3478375,-71.0404428https://maps.google.com/maps?q=+%4042.3478375,...https://www.google.com/maps/place/The+Partisan...NaNNaNBoston, MA 02210, United StatesNaNSculptureNaNhttps://www.bostonseaport.xyz/venue/the-partis...4.86.08XX5+4R Boston, Massachusetts, USAC
30Union Oyster House42.361288,-71.056908https://maps.google.com/maps?q=+%4042.361288,-...https://www.google.com/maps/place/Union+Oyster...NaNNaN41 Union St, Boston, MA 02108, United StatesHistoric eatery serving chowder & other New En...Seafood restaurant+1 617-227-2750http://www.unionoysterhouse.com/?y_source=1_Mj...4.38497.09W6V+G6 Boston, Massachusetts, USAC
31Victoria's Diner42.3270498,-71.0667744https://maps.google.com/maps?q=+%4042.3270498,...https://www.google.com/maps/place/Victoria's+D...NaNNaN1024 Massachusetts Ave, Boston, MA 02118, Unit...Long-standing classic diner for breakfast & sa...Diner+1 617-442-5965http://www.victoriasdiner.com/4.11797.08WGM+R7 Boston, Massachusetts, USAC
32Wollaston Beach42.2806539,-71.0119933https://maps.google.com/maps?q=+%4042.2806539,...https://www.google.com/maps/place/Wollaston+Be...NaNNaNQuincy, MA, United StatesHistoric 2.3-mi.-long beach with a paved prome...BeachNaNNaN4.4171.0NaNC
\n

131 rows × 15 columns

\n
" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Combine the two lists and add a column to indicate the list\n", + "ListA['list'] = 'A'\n", + "ListB['list'] = 'B'\n", + "ListC['list'] = 'C'\n", + "ListD['list'] = 'D'\n", + "\n", + "TotalList = pd.concat([ListA, ListB, ListC])\n", + "display(TotalList)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.496051Z", + "start_time": "2023-11-06T18:51:22.487941Z" + } + }, + "id": "dc434958d5e4a3a8" + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "# Remove all columns but name and gps\n", + "TotalList = TotalList[['name', 'gps', 'list']]" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.504898Z", + "start_time": "2023-11-06T18:51:22.496235Z" + } + }, + "id": "2873c16423fe3119" + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "# Convert the gps column to a list of lists for k-means\n", + "TotalList['gps'] = TotalList['gps'].apply(lambda x: x.strip('[]').split(','))\n", + "TotalList['gps'] = TotalList['gps'].apply(lambda x: [float(i) for i in x])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.522522Z", + "start_time": "2023-11-06T18:51:22.498651Z" + } + }, + "id": "29f9155ef8d75fda" + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "# Create a new column with normalized gps coordinates and centroids\n", + "TotalList['normalized_gps'], norm_centroids = utils.normalize_gps(TotalList['gps'].values.tolist(), centroids)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.548654Z", + "start_time": "2023-11-06T18:51:22.503769Z" + } + }, + "id": "5b985f1a6df84a6c" + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": " name gps list \\\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A \n1 Acorn St [42.3576234, -71.0688746] A \n2 Arlington's Great Meadows [42.4299758, -71.2038948] A \n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A \n4 BU Beach [42.3511927, -71.1060828] A \n.. ... ... ... \n28 The Clam Box [42.2763168, -71.0092883] C \n29 The Partisans [42.3478375, -71.0404428] C \n30 Union Oyster House [42.361288, -71.056908] C \n31 Victoria's Diner [42.3270498, -71.0667744] C \n32 Wollaston Beach [42.2806539, -71.0119933] C \n\n normalized_gps \n0 [0.7251058917247415, 0.8141430878559053] \n1 [0.6747391031099019, 0.778052752104061] \n2 [1.0, 0.41697235794883575] \n3 [0.6697144722136962, 0.7604611403245493] \n4 [0.6458298305822171, 0.6785480000609988] \n.. ... \n28 [0.30922451563130937, 0.9374025730216268] \n29 [0.6307464973238023, 0.8540870458656248] \n30 [0.6912133469876947, 0.8100546647415456] \n31 [0.5372951958288665, 0.7836692527743693] \n32 [0.32872198960456106, 0.9301686741961767] \n\n[131 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistnormalized_gps
0521 Commercial Street #525[42.3688272, -71.0553792]A[0.7251058917247415, 0.8141430878559053]
1Acorn St[42.3576234, -71.0688746]A[0.6747391031099019, 0.778052752104061]
2Arlington's Great Meadows[42.4299758, -71.2038948]A[1.0, 0.41697235794883575]
3Arthur Fiedler Statue[42.3565057, -71.0754527]A[0.6697144722136962, 0.7604611403245493]
4BU Beach[42.3511927, -71.1060828]A[0.6458298305822171, 0.6785480000609988]
...............
28The Clam Box[42.2763168, -71.0092883]C[0.30922451563130937, 0.9374025730216268]
29The Partisans[42.3478375, -71.0404428]C[0.6307464973238023, 0.8540870458656248]
30Union Oyster House[42.361288, -71.056908]C[0.6912133469876947, 0.8100546647415456]
31Victoria's Diner[42.3270498, -71.0667744]C[0.5372951958288665, 0.7836692527743693]
32Wollaston Beach[42.2806539, -71.0119933]C[0.32872198960456106, 0.9301686741961767]
\n

131 rows × 4 columns

\n
" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(TotalList)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:22.609058Z", + "start_time": "2023-11-06T18:51:22.509542Z" + } + }, + "id": "a03a7c5dacebddd0" + }, + { + "cell_type": "markdown", + "source": [ + "# Cluster and Minimize" + ], + "metadata": { + "collapsed": false + }, + "id": "ee3ab1c81ea71b0" + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/garrinshieh/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", + " super()._check_params_vs_input(X, default_n_init=10)\n", + "/Users/garrinshieh/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: RuntimeWarning: Explicit initial center position passed: performing only one init in KMeans instead of n_init=10.\n", + " super()._check_params_vs_input(X, default_n_init=10)\n" + ] + } + ], + "source": [ + "# Cluster and minimize the data\n", + "df, route_1_coordinates, route_2_coordinates = utils.cluster_and_minimize(TotalList, centroids, norm_centroids, 0.5)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:45.784650Z", + "start_time": "2023-11-06T18:51:22.513160Z" + } + }, + "id": "a1a3e446594e8c20" + }, + { + "cell_type": "markdown", + "source": [ + "# Map" + ], + "metadata": { + "collapsed": false + }, + "id": "dc35d41885a19079" + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "text/plain": "", + "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a new map with the new coordinates\n", + "m = folium.Map(location=[42.3601, -71.0589], zoom_start=12)\n", + "\n", + "# Plot the centroids on the map\n", + "for i in range(len(centroids)):\n", + " folium.Marker(centroids[i], popup='Centroid ' + str(i), icon=folium.Icon(color='black')).add_to(m)\n", + "\n", + "# Add the points to the map with different colors for each cluster\n", + "for i, row in df.iterrows():\n", + " if row['cluster'] == 0:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='red')).add_to(m)\n", + " elif row['cluster'] == 1:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='blue')).add_to(m)\n", + " elif row['cluster'] == 2:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='green')).add_to(m)\n", + " elif row['cluster'] == 3:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='purple')).add_to(m)\n", + " elif row['cluster'] == 4:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='orange')).add_to(m)\n", + " elif row['cluster'] == 5:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkred')).add_to(m)\n", + " elif row['cluster'] == 6:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightred')).add_to(m)\n", + " elif row['cluster'] == 7:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='beige')).add_to(m)\n", + " elif row['cluster'] == 8:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkblue')).add_to(m)\n", + " elif row['cluster'] == 9:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightblue')).add_to(m)\n", + " elif row['cluster'] == 10:\n", + " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='cadet')).add_to(m)\n", + "\n", + "# Display the map\n", + "m" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:45.869346Z", + "start_time": "2023-11-06T18:51:45.791672Z" + } + }, + "id": "de9c2f7b892b1bee" + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-11-06T18:51:45.869482Z", + "start_time": "2023-11-06T18:51:45.865159Z" + } + }, + "id": "b50ee3d4d6e09be9" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/utils.py b/utils.py index 880dd2a..7f6a408 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,53 @@ import folium import pandas as pd import requests +from sklearn.cluster import KMeans + + +# Given a dataframe of coordinates and centroids, cluster the coordinates, minimize the time difference, and return the routes +def cluster_and_minimize(df, centroids, norm_centroids, time_diff): + # Cluster the coordinates + kmeans = KMeans(n_clusters=len(norm_centroids), init=norm_centroids) + + # Fit the coordinates to the clusters + kmeans.fit(df['normalized_gps'].values.tolist()) + + # Add the cluster labels to the dataframe + df['cluster'] = kmeans.labels_ + + # Create centroid strings + centroid_1 = list_to_string([centroids[0]]) + ';' + centroid_2 = list_to_string([centroids[1]]) + ';' + + # Return the list of locations in each cluster + route_1 = df[df['cluster'] == 0] + route_1_stops = len(route_1['gps'].values.tolist()) + route_1_str = list_to_string(route_1['gps'].values.tolist()) + + route_2 = df[df['cluster'] == 1] + route_2_stops = len(route_2['gps'].values.tolist()) + route_2_str = list_to_string(route_2['gps'].values.tolist()) + + # Get the trip time for each route + trip_hrs_1 = get_trip_time(centroid_1 + route_1_str, route_1_stops) + trip_hrs_2 = get_trip_time(centroid_2 + route_2_str, route_2_stops) + + # if the absolute value of the difference in trip times is greater than the time difference, minimize the time difference + if abs(trip_hrs_1 - trip_hrs_2) > time_diff: + route_1_coordinates, route_2_coordinates = minimize_route_time_diff(route_1['gps'].values.tolist(), + route_2['gps'].values.tolist(), + centroid_1, centroid_2, time_diff) + else: + route_1_coordinates = route_1['gps'].values.tolist() + route_2_coordinates = route_2['gps'].values.tolist() + + # Edit the dataframe to reflect the new coordinate clusters + df.loc[df['gps'].astype(str).isin(map(str, route_1_coordinates)), 'cluster'] = 0 + df.loc[df['gps'].astype(str).isin(map(str, route_2_coordinates)), 'cluster'] = 1 + + return df, route_1_coordinates, route_2_coordinates -# make a function that turns a list of lists of coordinates into a string def list_to_string(list_of_lists): """ Takes a list of lists of coordinates and returns a string of the coordinates @@ -11,6 +55,8 @@ def list_to_string(list_of_lists): string = '' for i in list_of_lists: string += str(i[1]) + ',' + str(i[0]) + ';' + + string = string[:-1] return string @@ -33,11 +79,120 @@ def create_json_df(coordinate_string): return df -def get_trip_time(coordinate_string): +def get_trip_time(coordinate_string, num_waypoints): """ Takes a list of lists of coordinates and returns the time of the trip in hours """ coordinates = requests.get('http://acetyl.net:5000/trip/v1/bike/' + coordinate_string) coordinates = coordinates.json() - return int(coordinates['trips'][0]['duration']) / 3600 + travel_time_seconds = int(coordinates['trips'][0]['duration']) + waypoint_time_seconds = num_waypoints * 60 + + total_time_hours = (travel_time_seconds + waypoint_time_seconds) / 3600 + + return total_time_hours + + +def normalize_gps(coordinates, centroids): + """ + Takes a list of lists of coordinates and centroids and returns a list of lists of normalized coordinates and centroids + """ + + # Create a list of latitudes and longitudes + latitudes = [i[0] for i in coordinates] + longitudes = [i[1] for i in coordinates] + + # Find the minimum and maximum latitudes and longitudes + min_lat = min(latitudes) + max_lat = max(latitudes) + min_lon = min(longitudes) + max_lon = max(longitudes) + + # Normalize the coordinates and centroids using min-max normalization + normalized_coordinates = [] + normalized_centroids = [] + + for i in coordinates: + normalized_coordinates.append( + [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) + for i in centroids: + normalized_centroids.append( + [__min_max_normalize__(i[0], min_lat, max_lat), __min_max_normalize__(i[1], min_lon, max_lon)]) + + return normalized_coordinates, normalized_centroids + + +def __min_max_normalize__(value, min_value, max_value): + """ + Takes a value, min value, and max value and returns the normalized value + """ + return (value - min_value) / (max_value - min_value) + + +def minimize_route_time_diff(route_1_coordinates, route_2_coordinates, route_1_start, route_2_start, + time_diff): + """ + Takes two routes and a time difference and returns a route that is the same length as the shorter route but has a time difference that is less than the time difference + """ + # Find the difference in time between the two routes + route_1_time = get_trip_time(route_1_start + list_to_string(route_1_coordinates), + len(route_1_coordinates)) + route_2_time = get_trip_time(route_2_start + list_to_string(route_2_coordinates), + len(route_2_coordinates)) + route_time_diff = abs(route_1_time - route_2_time) + + # If the difference in time is greater than the time difference, move the closest coordinate from the longer route to the shorter route + if route_time_diff > time_diff: + # Find which route is longer + if len(route_1_coordinates) > len(route_2_coordinates): + longer_route = route_1_coordinates + shorter_route = route_2_coordinates + + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff(longer_route, shorter_route, route_1_start, route_2_start, time_diff) + + else: + longer_route = route_2_coordinates + shorter_route = route_1_coordinates + + # Move the closest coordinate from the longer route to the shorter route + closest_coordinate = move_coordinate(longer_route, shorter_route) + longer_route.remove(closest_coordinate) + shorter_route.append(closest_coordinate) + + # Recursively call the function + return minimize_route_time_diff(shorter_route, longer_route, route_1_start, route_2_start, time_diff) + + # If the difference in time is less than the time difference, return the routes + return route_1_coordinates, route_2_coordinates + + +# Given two clusters and their respective lists of coordinates, move one coordinate from the larger centroid to the smaller centroid +def move_coordinate(larger_centroid_coordinates, smaller_centroid_coordinates): + # Calculate the centroid of the smaller cluster + smaller_centroid = [sum([i[0] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates), + sum([i[1] for i in smaller_centroid_coordinates]) / len(smaller_centroid_coordinates)] + + # Find the coordinate in larger_centroid_coordinates that is closest to smaller_centroid + closest_coordinate = larger_centroid_coordinates[0] + closest_coordinate_distance = __distance__(closest_coordinate, smaller_centroid) + + for coordinate in larger_centroid_coordinates: + if __distance__(coordinate, smaller_centroid) < closest_coordinate_distance: + closest_coordinate = coordinate + closest_coordinate_distance = __distance__(coordinate, smaller_centroid) + + return closest_coordinate + + +def __distance__(coordinate1, coordinate2): + """ + Takes two coordinates and returns the distance between them + """ + return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2) ** 0.5 -- cgit v1.2.3