{ "cells": [ { "cell_type": "code", "execution_count": 248, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2023-11-06T00:18:48.861074Z", "start_time": "2023-11-06T00:18:48.853531Z" } }, "outputs": [], "source": [ "import folium\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "from scipy.cluster.hierarchy import dendrogram, linkage\n", "from scipy.cluster.hierarchy import fcluster\n", "from sklearn.metrics import silhouette_score\n", "from sklearn.cluster import KMeans\n", "import utils" ] }, { "cell_type": "code", "execution_count": 249, "outputs": [], "source": [ "# Load the data\n", "ListA = pd.read_csv('List A.csv')\n", "ListB = pd.read_csv('List B.csv')\n", "ListC = pd.read_csv('List C.csv')\n", "ListD = pd.read_csv('List D.csv')" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:49.113478Z", "start_time": "2023-11-06T00:18:49.101708Z" } }, "id": "bb6f57eef695cf76" }, { "cell_type": "code", "execution_count": 250, "outputs": [ { "data": { "text/plain": " name gps \\\n0 521 Commercial Street #525 42.3688272,-71.0553792 \n1 Acorn St 42.3576234,-71.0688746 \n2 Arlington's Great Meadows 42.4299758,-71.2038948 \n3 Arthur Fiedler Statue 42.3565057,-71.0754527 \n4 BU Beach 42.3511927,-71.1060828 \n.. ... ... \n33 The Quiet Few 42.3670906,-71.0359889 \n34 The Tall Ship Boston 42.3649544,-71.0414523 \n35 Toasted Flats 42.3711266,-71.0371343 \n36 Vega Market 42.3891835,-71.033703 \n37 Winthrop High School 42.3803348,-70.9799864 \n\n googleUrl \\\n0 https://maps.google.com/maps?q=+%4042.3688272,... \n1 https://maps.google.com/maps?q=+%4042.3576234,... \n2 https://maps.google.com/maps?q=+%4042.4299758,... \n3 https://maps.google.com/maps?q=+%4042.3565057,... \n4 https://maps.google.com/maps?q=+%4042.3511927,... \n.. ... \n33 https://maps.google.com/maps?q=+%4042.3670906,... \n34 https://maps.google.com/maps?q=+%4042.3649544,... \n35 https://maps.google.com/maps?q=+%4042.3711266,... \n36 https://maps.google.com/maps?q=+%4042.3891835,... \n37 https://maps.google.com/maps?q=+%4042.3803348,... \n\n originalUrl info types \\\n0 https://www.google.com/maps/place/521+Commerci... NaN NaN \n1 https://www.google.com/maps/place/Acorn+St/dat... NaN NaN \n2 https://www.google.com/maps/place/Arlington's+... NaN NaN \n3 https://www.google.com/maps/place/Arthur+Fiedl... NaN NaN \n4 https://www.google.com/maps/place/BU+Beach/dat... NaN NaN \n.. ... ... ... \n33 https://www.google.com/maps/place/The+Quiet+Fe... NaN NaN \n34 https://www.google.com/maps/place/The+Tall+Shi... NaN NaN \n35 https://www.google.com/maps/place/Toasted+Flat... NaN NaN \n36 https://www.google.com/maps/place/Vega+Market/... NaN NaN \n37 https://www.google.com/maps/place/Winthrop+Hig... NaN NaN \n\n address \\\n0 NaN \n1 NaN \n2 Minuteman Commuter Bikeway, Lexington, MA 0242... \n3 Charles River Esplanades, Boston, MA 02114, Un... \n4 270 Bay State Rd, Boston, MA 02215, United States \n.. ... \n33 331 Sumner St, East Boston, MA 02128, United S... \n34 1 E Pier Dr, Boston, MA 02128, United States \n35 53 Chelsea St, Boston, MA 02128, United States \n36 29 Maverick St, Chelsea, MA 02150, United States \n37 400 Main St, Winthrop, MA 02152, United States \n\n description type \\\n0 NaN NaN \n1 NaN NaN \n2 183-acres of wet meadows & uplands with trails... Nature preserve \n3 NaN Sculpture \n4 A sloping, grassy plaza on the university grou... Park \n.. ... ... \n33 NaN Bar \n34 Laid-back waterside tall ship with tables doli... Lounge \n35 Snug eatery whipping up health-conscious flatb... Sandwich shop \n36 NaN Grocery store \n37 NaN High school \n\n phone website \\\n0 NaN NaN \n1 NaN NaN \n2 +1 781-863-5385 http://www.foagm.org/ \n3 +1 617-332-2433 http://helmicksculpture.com/portfolio/arthur-f... \n4 NaN https://www.bu.edu/today/2009/icons-among-us-t... \n.. ... ... \n33 +1 617-561-1061 https://www.thequietfew.com/ \n34 +1 617-307-7714 https://www.tallshipboston.com/ \n35 +1 857-264-8531 https://toastedflats.com/ \n36 NaN NaN \n37 +1 617-846-5505 https://www.winthrop.k12.ma.us/Domain/99 \n\n ratingsAverage ratingsTotal plusCode list \n0 NaN NaN NaN A \n1 NaN NaN NaN A \n2 4.6 171.0 CQHW+XC Lexington, Massachusetts, USA A \n3 4.6 14.0 9W4F+JR Boston, Massachusetts, USA A \n4 4.5 133.0 9V2V+FH Boston, Massachusetts, USA A \n.. ... ... ... ... \n33 4.7 257.0 9X87+RJ East Boston, Boston, MA, USA D \n34 4.2 549.0 9X75+XC Boston, Massachusetts, USA D \n35 4.7 372.0 9XC7+F4 Boston, Massachusetts, USA D \n36 4.8 5.0 9XQ8+MG Chelsea, Massachusetts, USA D \n37 NaN NaN 92JC+42 Winthrop, Massachusetts, USA D \n\n[169 rows x 15 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpsgoogleUrloriginalUrlinfotypesaddressdescriptiontypephonewebsiteratingsAverageratingsTotalplusCodelist
0521 Commercial Street #52542.3688272,-71.0553792https://maps.google.com/maps?q=+%4042.3688272,...https://www.google.com/maps/place/521+Commerci...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNA
1Acorn St42.3576234,-71.0688746https://maps.google.com/maps?q=+%4042.3576234,...https://www.google.com/maps/place/Acorn+St/dat...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNA
2Arlington's Great Meadows42.4299758,-71.2038948https://maps.google.com/maps?q=+%4042.4299758,...https://www.google.com/maps/place/Arlington's+...NaNNaNMinuteman Commuter Bikeway, Lexington, MA 0242...183-acres of wet meadows & uplands with trails...Nature preserve+1 781-863-5385http://www.foagm.org/4.6171.0CQHW+XC Lexington, Massachusetts, USAA
3Arthur Fiedler Statue42.3565057,-71.0754527https://maps.google.com/maps?q=+%4042.3565057,...https://www.google.com/maps/place/Arthur+Fiedl...NaNNaNCharles River Esplanades, Boston, MA 02114, Un...NaNSculpture+1 617-332-2433http://helmicksculpture.com/portfolio/arthur-f...4.614.09W4F+JR Boston, Massachusetts, USAA
4BU Beach42.3511927,-71.1060828https://maps.google.com/maps?q=+%4042.3511927,...https://www.google.com/maps/place/BU+Beach/dat...NaNNaN270 Bay State Rd, Boston, MA 02215, United StatesA sloping, grassy plaza on the university grou...ParkNaNhttps://www.bu.edu/today/2009/icons-among-us-t...4.5133.09V2V+FH Boston, Massachusetts, USAA
................................................
33The Quiet Few42.3670906,-71.0359889https://maps.google.com/maps?q=+%4042.3670906,...https://www.google.com/maps/place/The+Quiet+Fe...NaNNaN331 Sumner St, East Boston, MA 02128, United S...NaNBar+1 617-561-1061https://www.thequietfew.com/4.7257.09X87+RJ East Boston, Boston, MA, USAD
34The Tall Ship Boston42.3649544,-71.0414523https://maps.google.com/maps?q=+%4042.3649544,...https://www.google.com/maps/place/The+Tall+Shi...NaNNaN1 E Pier Dr, Boston, MA 02128, United StatesLaid-back waterside tall ship with tables doli...Lounge+1 617-307-7714https://www.tallshipboston.com/4.2549.09X75+XC Boston, Massachusetts, USAD
35Toasted Flats42.3711266,-71.0371343https://maps.google.com/maps?q=+%4042.3711266,...https://www.google.com/maps/place/Toasted+Flat...NaNNaN53 Chelsea St, Boston, MA 02128, United StatesSnug eatery whipping up health-conscious flatb...Sandwich shop+1 857-264-8531https://toastedflats.com/4.7372.09XC7+F4 Boston, Massachusetts, USAD
36Vega Market42.3891835,-71.033703https://maps.google.com/maps?q=+%4042.3891835,...https://www.google.com/maps/place/Vega+Market/...NaNNaN29 Maverick St, Chelsea, MA 02150, United StatesNaNGrocery storeNaNNaN4.85.09XQ8+MG Chelsea, Massachusetts, USAD
37Winthrop High School42.3803348,-70.9799864https://maps.google.com/maps?q=+%4042.3803348,...https://www.google.com/maps/place/Winthrop+Hig...NaNNaN400 Main St, Winthrop, MA 02152, United StatesNaNHigh school+1 617-846-5505https://www.winthrop.k12.ma.us/Domain/99NaNNaN92JC+42 Winthrop, Massachusetts, USAD
\n

169 rows × 15 columns

\n
" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Combine the two lists and add a column to indicate the list\n", "ListA['list'] = 'A'\n", "ListB['list'] = 'B'\n", "ListC['list'] = 'C'\n", "ListD['list'] = 'D'\n", "\n", "TotalList = pd.concat([ListA, ListB, ListC, ListD])\n", "display(TotalList)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:49.364284Z", "start_time": "2023-11-06T00:18:49.351342Z" } }, "id": "dc434958d5e4a3a8" }, { "cell_type": "code", "execution_count": 251, "outputs": [], "source": [ "# Remove all columns but name and gps\n", "TotalList = TotalList[['name', 'gps', 'list']]" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:49.785140Z", "start_time": "2023-11-06T00:18:49.779600Z" } }, "id": "2873c16423fe3119" }, { "cell_type": "code", "execution_count": 252, "outputs": [], "source": [ "# Convert the gps column to a list of lists for k-means\n", "TotalList['gps'] = TotalList['gps'].apply(lambda x: x.strip('[]').split(','))\n", "TotalList['gps'] = TotalList['gps'].apply(lambda x: [float(i) for i in x])" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:50.022174Z", "start_time": "2023-11-06T00:18:50.008422Z" } }, "id": "29f9155ef8d75fda" }, { "cell_type": "code", "execution_count": 253, "outputs": [ { "data": { "text/plain": " name gps list\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A\n1 Acorn St [42.3576234, -71.0688746] A\n2 Arlington's Great Meadows [42.4299758, -71.2038948] A\n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A\n4 BU Beach [42.3511927, -71.1060828] A\n.. ... ... ...\n33 The Quiet Few [42.3670906, -71.0359889] D\n34 The Tall Ship Boston [42.3649544, -71.0414523] D\n35 Toasted Flats [42.3711266, -71.0371343] D\n36 Vega Market [42.3891835, -71.033703] D\n37 Winthrop High School [42.3803348, -70.9799864] D\n\n[169 rows x 3 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslist
0521 Commercial Street #525[42.3688272, -71.0553792]A
1Acorn St[42.3576234, -71.0688746]A
2Arlington's Great Meadows[42.4299758, -71.2038948]A
3Arthur Fiedler Statue[42.3565057, -71.0754527]A
4BU Beach[42.3511927, -71.1060828]A
............
33The Quiet Few[42.3670906, -71.0359889]D
34The Tall Ship Boston[42.3649544, -71.0414523]D
35Toasted Flats[42.3711266, -71.0371343]D
36Vega Market[42.3891835, -71.033703]D
37Winthrop High School[42.3803348, -70.9799864]D
\n

169 rows × 3 columns

\n
" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(TotalList)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:50.242086Z", "start_time": "2023-11-06T00:18:50.237892Z" } }, "id": "a03a7c5dacebddd0" }, { "cell_type": "markdown", "source": [ "# Dendrogram" ], "metadata": { "collapsed": false }, "id": "72e85d219be8c635" }, { "cell_type": "code", "execution_count": 254, "outputs": [ { "data": { "text/plain": "
", "image/png": "" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create the linkage matrix\n", "linkage_matrix = linkage(TotalList['gps'].values.tolist(), 'ward')\n", "\n", "# Plot the dendrogram\n", "plt.figure(figsize=(25, 10))\n", "plt.title('Hierarchical Clustering Dendrogram')\n", "plt.xlabel('sample index')\n", "plt.ylabel('distance')\n", "dendrogram(linkage_matrix, leaf_rotation=90., leaf_font_size=8.)\n", "plt.show()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:51.214742Z", "start_time": "2023-11-06T00:18:50.736378Z" } }, "id": "9e215df3a350e3cf" }, { "cell_type": "code", "execution_count": 255, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of clusters: 10\n", "Silhouette score: 0.4218177237729999\n" ] } ], "source": [ "# Set the threshold distance\n", "threshold_distance = 0.15\n", "\n", "# Cut the dendrogram to get cluster labels\n", "cluster_labels_hc = fcluster(linkage_matrix, t=threshold_distance, criterion='distance')\n", "\n", "# Now, you have the number of clusters determined by the dendrogram\n", "num_clusters = len(np.unique(cluster_labels_hc))\n", "print(\"Number of clusters:\", num_clusters)\n", "\n", "# Calculate the silhouette score to evaluate the clustering\n", "silhouette_avg = silhouette_score(TotalList['gps'].values.tolist(), cluster_labels_hc)\n", "print(\"Silhouette score:\", silhouette_avg)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:51.241741Z", "start_time": "2023-11-06T00:18:51.217906Z" } }, "id": "2f52d83746e670d" }, { "cell_type": "markdown", "source": [ "# K-means" ], "metadata": { "collapsed": false }, "id": "bc97c258908ac38a" }, { "cell_type": "code", "execution_count": 256, "outputs": [], "source": [ "# Cluster the data using Gaussian Mixture Models\n", "# Create two centroids, one in the North End and one in the Financial District\n", "centroids = [[42.364506, -71.054733], [42.358894, -71.056742]]" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:54.543675Z", "start_time": "2023-11-06T00:18:54.540819Z" } }, "id": "45b59d81ae2de84e" }, { "cell_type": "code", "execution_count": 257, "outputs": [ { "data": { "text/plain": " name gps list weights\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A 0.016268\n1 Acorn St [42.3576234, -71.0688746] A 0.007206\n2 Arlington's Great Meadows [42.4299758, -71.2038948] A 0.000607\n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A 0.004854\n4 BU Beach [42.3511927, -71.1060828] A 0.001925\n.. ... ... ... ...\n33 The Quiet Few [42.3670906, -71.0359889] D 0.004835\n34 The Tall Ship Boston [42.3649544, -71.0414523] D 0.006736\n35 Toasted Flats [42.3711266, -71.0371343] D 0.004775\n36 Vega Market [42.3891835, -71.033703] D 0.002828\n37 Winthrop High School [42.3803348, -70.9799864] D 0.001269\n\n[169 rows x 4 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistweights
0521 Commercial Street #525[42.3688272, -71.0553792]A0.016268
1Acorn St[42.3576234, -71.0688746]A0.007206
2Arlington's Great Meadows[42.4299758, -71.2038948]A0.000607
3Arthur Fiedler Statue[42.3565057, -71.0754527]A0.004854
4BU Beach[42.3511927, -71.1060828]A0.001925
...............
33The Quiet Few[42.3670906, -71.0359889]D0.004835
34The Tall Ship Boston[42.3649544, -71.0414523]D0.006736
35Toasted Flats[42.3711266, -71.0371343]D0.004775
36Vega Market[42.3891835, -71.033703]D0.002828
37Winthrop High School[42.3803348, -70.9799864]D0.001269
\n

169 rows × 4 columns

\n
" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create a weights column that increases as the location gets closer to the centroids\n", "\n", "# Compute the distance from each point to each centroid\n", "TotalList['weights'] = TotalList['gps'].apply(lambda x: [np.linalg.norm(np.array(x) - np.array(centroids[0])), np.linalg.norm(np.array(x) - np.array(centroids[1]))])\n", "\n", "# Invert the weights so that the locations closest to the centroids have the highest weights\n", "TotalList['weights'] = TotalList['weights'].apply(lambda x: [1/i for i in x])\n", "\n", "# Sum the weights\n", "TotalList['weights'] = TotalList['weights'].apply(lambda x: sum(x))\n", "\n", "# Normalize the weights\n", "TotalList['weights'] = TotalList['weights'].apply(lambda x: x/sum(TotalList['weights']))\n", "\n", "display(TotalList)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:54.970160Z", "start_time": "2023-11-06T00:18:54.956075Z" } }, "id": "2f2975484d00129c" }, { "cell_type": "code", "execution_count": 258, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/garrinshieh/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", " super()._check_params_vs_input(X, default_n_init=10)\n", "/Users/garrinshieh/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: RuntimeWarning: Explicit initial center position passed: performing only one init in KMeans instead of n_init=10.\n", " super()._check_params_vs_input(X, default_n_init=10)\n" ] } ], "source": [ "kmeans = KMeans(n_clusters=2, init=centroids).fit(TotalList['gps'].values.tolist())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:56.312805Z", "start_time": "2023-11-06T00:18:56.299246Z" } }, "id": "db1ef4b14a1da5f5" }, { "cell_type": "code", "execution_count": 259, "outputs": [], "source": [ "# Add the cluster labels to the dataframe\n", "TotalList['cluster'] = kmeans.labels_" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:58.475575Z", "start_time": "2023-11-06T00:18:58.468594Z" } }, "id": "99891fae96a2fff7" }, { "cell_type": "code", "execution_count": 260, "outputs": [ { "data": { "text/plain": " name gps list weights \\\n0 521 Commercial Street #525 [42.3688272, -71.0553792] A 0.016268 \n1 Acorn St [42.3576234, -71.0688746] A 0.007206 \n2 Arlington's Great Meadows [42.4299758, -71.2038948] A 0.000607 \n3 Arthur Fiedler Statue [42.3565057, -71.0754527] A 0.004854 \n4 BU Beach [42.3511927, -71.1060828] A 0.001925 \n.. ... ... ... ... \n33 The Quiet Few [42.3670906, -71.0359889] D 0.004835 \n34 The Tall Ship Boston [42.3649544, -71.0414523] D 0.006736 \n35 Toasted Flats [42.3711266, -71.0371343] D 0.004775 \n36 Vega Market [42.3891835, -71.033703] D 0.002828 \n37 Winthrop High School [42.3803348, -70.9799864] D 0.001269 \n\n cluster \n0 0 \n1 0 \n2 1 \n3 0 \n4 1 \n.. ... \n33 0 \n34 0 \n35 0 \n36 0 \n37 0 \n\n[169 rows x 5 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namegpslistweightscluster
0521 Commercial Street #525[42.3688272, -71.0553792]A0.0162680
1Acorn St[42.3576234, -71.0688746]A0.0072060
2Arlington's Great Meadows[42.4299758, -71.2038948]A0.0006071
3Arthur Fiedler Statue[42.3565057, -71.0754527]A0.0048540
4BU Beach[42.3511927, -71.1060828]A0.0019251
..................
33The Quiet Few[42.3670906, -71.0359889]D0.0048350
34The Tall Ship Boston[42.3649544, -71.0414523]D0.0067360
35Toasted Flats[42.3711266, -71.0371343]D0.0047750
36Vega Market[42.3891835, -71.033703]D0.0028280
37Winthrop High School[42.3803348, -70.9799864]D0.0012690
\n

169 rows × 5 columns

\n
" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Display the dataframe\n", "display(TotalList)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:18:58.847605Z", "start_time": "2023-11-06T00:18:58.843691Z" } }, "id": "49fc751352022ad1" }, { "cell_type": "markdown", "source": [ "# Map" ], "metadata": { "collapsed": false }, "id": "8c46f8ae30caa721" }, { "cell_type": "code", "execution_count": 261, "outputs": [], "source": [ "# Create a map in Boston\n", "m = folium.Map(location=[42.3601, -71.0589], zoom_start=12)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:19:00.620249Z", "start_time": "2023-11-06T00:19:00.612951Z" } }, "id": "48d76bd40c44cc61" }, { "cell_type": "code", "execution_count": 262, "outputs": [], "source": [ "# Plot the centroids on the map\n", "for i in range(len(centroids)):\n", " folium.Marker(centroids[i], popup='Centroid ' + str(i), icon=folium.Icon(color='black')).add_to(m)\n", " \n", "# Add the points to the map with different colors for each cluster\n", "for i, row in TotalList.iterrows():\n", " if row['cluster'] == 0:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='red')).add_to(m)\n", " elif row['cluster'] == 1:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='blue')).add_to(m)\n", " elif row['cluster'] == 2:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='green')).add_to(m)\n", " elif row['cluster'] == 3:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='purple')).add_to(m)\n", " elif row['cluster'] == 4:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='orange')).add_to(m)\n", " elif row['cluster'] == 5:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkred')).add_to(m)\n", " elif row['cluster'] == 6:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightred')).add_to(m)\n", " elif row['cluster'] == 7:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='beige')).add_to(m)\n", " elif row['cluster'] == 8:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='darkblue')).add_to(m)\n", " elif row['cluster'] == 9:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='lightblue')).add_to(m)\n", " elif row['cluster'] == 10:\n", " folium.Marker([row['gps'][0], row['gps'][1]], popup=row['name'], icon=folium.Icon(color='cadet')).add_to(m)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:19:01.125434Z", "start_time": "2023-11-06T00:19:01.109369Z" } }, "id": "3c8a7d2b34d4f22d" }, { "cell_type": "code", "execution_count": 263, "outputs": [ { "data": { "text/plain": "", "text/html": "
Make this Notebook Trusted to load map: File -> Trust Notebook
" }, "execution_count": 263, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the map\n", "m" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:19:02.155307Z", "start_time": "2023-11-06T00:19:02.145235Z" } }, "id": "d6941d1f0a203ee7" }, { "cell_type": "code", "execution_count": 264, "outputs": [ { "data": { "text/plain": "0 106\n1 63\nName: cluster, dtype: int64" }, "execution_count": 264, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the number of locations in each cluster\n", "TotalList['cluster'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:19:05.516139Z", "start_time": "2023-11-06T00:19:05.510902Z" } }, "id": "479ba8f36cdafbf8" }, { "cell_type": "code", "execution_count": 244, "outputs": [], "source": [ "# create a method to move n number of locations from the largest cluster to the smallest cluster, taking distance into account\n", "def equalize_clusters(df, n):\n", " # Get the number of locations in each cluster\n", " cluster_counts = df['cluster'].value_counts()\n", " \n", " # Get the largest and smallest clusters\n", " largest_cluster = cluster_counts.index[0]\n", " smallest_cluster = cluster_counts.index[-1]\n", " \n", " # Get the locations in the largest cluster\n", " largest_cluster_locations = df[df['cluster'] == largest_cluster]\n", " \n", " # Get the locations in the smallest cluster\n", " smallest_cluster_locations = df[df['cluster'] == smallest_cluster]\n", " \n", " # Create a list of distances from each location in the largest cluster to each location in the smallest cluster\n", " distances = []\n", " for i, row in largest_cluster_locations.iterrows():\n", " for j, row2 in smallest_cluster_locations.iterrows():\n", " distances.append([i, j, np.linalg.norm(np.array(row['gps']) - np.array(row2['gps']))])\n", " \n", " # Sort the distances by distance\n", " distances.sort(key=lambda x: x[2])\n", " \n", " # Move the n closest locations from the largest cluster to the smallest cluster\n", " for i in range(n):\n", " df.loc[distances[i][0], 'cluster'] = smallest_cluster\n", " df.loc[distances[i][1], 'cluster'] = largest_cluster\n", " \n", " return df" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-05T23:51:40.761645Z", "start_time": "2023-11-05T23:51:40.752184Z" } }, "id": "4b79215a12bf36e2" }, { "cell_type": "code", "execution_count": 245, "outputs": [ { "data": { "text/plain": "0 97\n1 72\nName: cluster, dtype: int64" }, "execution_count": 245, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Equalize the clusters\n", "TotalList = equalize_clusters(TotalList, 20)\n", "\n", "# Display the number of locations in each cluster\n", "TotalList['cluster'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-05T23:51:41.510095Z", "start_time": "2023-11-05T23:51:41.434643Z" } }, "id": "176d5f92130c67b8" }, { "cell_type": "code", "execution_count": 267, "outputs": [ { "data": { "text/plain": "0 106\n1 63\nName: cluster, dtype: int64" }, "execution_count": 267, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TotalList['cluster'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:20:43.543111Z", "start_time": "2023-11-06T00:20:43.529364Z" } }, "id": "1b8e5c1793fb2feb" }, { "cell_type": "code", "execution_count": 265, "outputs": [ { "data": { "text/plain": "'-71.0553792,42.3688272;-71.0688746,42.3576234;-71.2038948,42.4299758;-71.0754527,42.3565057;-71.1060828,42.3511927;-71.0969274,42.3446263;-71.130887,42.35304;-71.0620802,42.3579151;-71.1459593,42.3501823;-71.0586014,42.357357;-71.0572023,42.3587627;-71.0556268,42.36521;-71.1460435,42.3495825;-71.1217152,42.3426377;-71.0720926,42.3489004;-71.067859,42.3500079;-71.0632036,42.3556154;-71.1258765,42.331864;-71.1095021,42.3364675;-71.133103,42.3890049;-71.0620134,42.3248471;-71.0851891,42.3500031;-71.1123834,42.3360385;-71.066414,42.354296;-71.2273649,42.3145041;-71.0834061,42.341987;-71.0992038,42.3306454;-71.0990577,42.3381442;-71.0569649,42.3604952;-71.0949218,42.3419564;-71.0942861,42.3413301;-71.0498714,42.3256817;-71.0908104,42.329969;-71.0616035,42.3537983;-71.0359433,42.3485465;-71.0913583,42.3490205;-71.1000217,42.3323776;-71.1241295,42.3518397;-71.1618052,42.3245965;-71.0638101,42.3587772;-71.1625829,42.340795;-71.167854,42.4107892;-71.155555,42.3317473;-71.1227278,42.3965778;-71.3598149,42.3140229;-71.1126695,42.3836229;-71.0555003,42.3640137;-71.119149,42.3884;-71.0712561,42.3407613;-71.0561781,42.3668968;-71.0664019,42.3554589;-71.059228,42.359349;-71.0668408,42.3524116;-71.0872846,42.2961434;-71.062146,42.366198;-71.1427371,42.3433772;-71.1438455,42.3569102;-71.0651214,42.3553972;-71.0596124,42.3509517;-71.0359354,42.3478381;-71.1313443,42.3525708;-71.1284677,42.3631904;-71.061757,42.3691906;-71.119301,42.388547;-71.097883,42.381008;-71.1107166,42.3741209;-71.0609962,42.3803747;-71.0516339,42.3609921;-71.1194344,42.3754427;-71.0809932,42.3675275;-71.0545357,42.3597994;-71.1013044,42.3627462;-71.1108423,42.3838224;-71.1026937,42.3820702;-71.1189467,42.373465;-71.1208817,42.3732344;-71.0342146,42.316274;-71.0756902,42.3695046;-71.0678704,42.3701829;-71.0968274,42.3799095;-71.0656594,42.3718401;-71.094048,42.339381;-71.1854722,42.3621177;-71.1146697,42.3782386;-71.0935443,42.3817274;-71.0611749,42.3551807;-71.0906355,42.3616095;-71.1161887,42.3766442;-71.0962734,42.3627993;-71.1155576,42.3784629;-71.0949101,42.3797674;-71.1087411,42.3640287;-71.0554239,42.3739796;-71.09476,42.37736;-71.1014951,42.3614115;-71.1024769,42.3822934;-71.1011111,42.3636597;-71.0631664,42.3741694;-71.056823,42.361531;-71.0632852,42.2857047;-71.0637877,42.2845163;-71.0496839,42.3519736;-71.0454645,42.3162356;-71.0336324,42.3441918;-71.0487437,42.3508756;-71.0512911,42.3521821;-71.0013637,42.2075316;-71.0607764,42.3763541;-71.0374911,42.316031;-71.0125206,42.3378699;-71.0672898,42.3523158;-71.02832,42.2576602;-71.0502126,42.3516479;-71.0331956,42.3639107;-71.0432778,42.3528151;-71.0035279,42.2392354;-71.0470633,42.3537343;-71.0352443,42.3291218;-71.0898829,42.3463992;-71.0240951,42.2743442;-71.0234949,42.3358743;-70.985881,42.420226;-71.0005483,42.2454086;-71.0096371,42.3367603;-71.0447796,42.3509709;-71.0983169,42.3319001;-71.0092883,42.2763168;-71.0404428,42.3478375;-71.056908,42.361288;-71.0667744,42.3270498;-71.0119933,42.2806539;-71.0618764,42.4074484;-71.0612182,42.3986053;-71.0392667,42.3855456;-71.0515875,42.4025721;-70.9903023,42.3917606;-71.055873,42.4206339;-71.0433886,42.4222989;-71.06088,42.3761612;-71.0412802,42.3936888;-71.0714924,42.3968978;-71.0282154,42.3778389;-71.0350852,42.3809511;-71.0331398,42.3734483;-70.9693867,42.3895122;-71.0945712,42.3253252;-71.0280157,42.398422;-71.0155516,42.4114215;-70.993656,42.4110462;-71.0355621,42.3976519;-71.0056995,42.390191;-71.0589219,42.403759;-71.037937,42.3698284;-71.0386285,42.3903823;-71.0316196,42.4122481;-71.0328839,42.3861321;-71.0270609,42.4213082;-71.0366491,42.391236;-71.0361399,42.3649623;-71.0116946,42.3827415;-70.9973058,42.4183123;-71.1122037,42.4008442;-70.997123,42.390501;-71.0506461,42.41826;-71.0359889,42.3670906;-71.0414523,42.3649544;-71.0371343,42.3711266;-71.033703,42.3891835;-70.9799864,42.3803348;'" }, "execution_count": 265, "metadata": {}, "output_type": "execute_result" } ], "source": [ "utils.list_to_string(TotalList['gps'].values.tolist())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:19:09.317992Z", "start_time": "2023-11-06T00:19:09.308541Z" } }, "id": "2d83e5db093608d2" }, { "cell_type": "code", "execution_count": 268, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "106\n" ] }, { "data": { "text/plain": "'-71.0553792,42.3688272;-71.0688746,42.3576234;-71.0754527,42.3565057;-71.0620802,42.3579151;-71.0586014,42.357357;-71.0572023,42.3587627;-71.0556268,42.36521;-71.0720926,42.3489004;-71.067859,42.3500079;-71.0632036,42.3556154;-71.0620134,42.3248471;-71.066414,42.354296;-71.0569649,42.3604952;-71.0498714,42.3256817;-71.0616035,42.3537983;-71.0359433,42.3485465;-71.0638101,42.3587772;-71.0555003,42.3640137;-71.0712561,42.3407613;-71.0561781,42.3668968;-71.0664019,42.3554589;-71.059228,42.359349;-71.0668408,42.3524116;-71.062146,42.366198;-71.0651214,42.3553972;-71.0596124,42.3509517;-71.0359354,42.3478381;-71.061757,42.3691906;-71.0609962,42.3803747;-71.0516339,42.3609921;-71.0809932,42.3675275;-71.0545357,42.3597994;-71.0342146,42.316274;-71.0756902,42.3695046;-71.0678704,42.3701829;-71.0656594,42.3718401;-71.0611749,42.3551807;-71.0554239,42.3739796;-71.0631664,42.3741694;-71.056823,42.361531;-71.0632852,42.2857047;-71.0637877,42.2845163;-71.0496839,42.3519736;-71.0454645,42.3162356;-71.0336324,42.3441918;-71.0487437,42.3508756;-71.0512911,42.3521821;-71.0013637,42.2075316;-71.0607764,42.3763541;-71.0374911,42.316031;-71.0125206,42.3378699;-71.0672898,42.3523158;-71.02832,42.2576602;-71.0502126,42.3516479;-71.0331956,42.3639107;-71.0432778,42.3528151;-71.0035279,42.2392354;-71.0470633,42.3537343;-71.0352443,42.3291218;-71.0240951,42.2743442;-71.0234949,42.3358743;-70.985881,42.420226;-71.0005483,42.2454086;-71.0096371,42.3367603;-71.0447796,42.3509709;-71.0092883,42.2763168;-71.0404428,42.3478375;-71.056908,42.361288;-71.0667744,42.3270498;-71.0119933,42.2806539;-71.0618764,42.4074484;-71.0612182,42.3986053;-71.0392667,42.3855456;-71.0515875,42.4025721;-70.9903023,42.3917606;-71.055873,42.4206339;-71.0433886,42.4222989;-71.06088,42.3761612;-71.0412802,42.3936888;-71.0714924,42.3968978;-71.0282154,42.3778389;-71.0350852,42.3809511;-71.0331398,42.3734483;-70.9693867,42.3895122;-71.0280157,42.398422;-71.0155516,42.4114215;-70.993656,42.4110462;-71.0355621,42.3976519;-71.0056995,42.390191;-71.0589219,42.403759;-71.037937,42.3698284;-71.0386285,42.3903823;-71.0316196,42.4122481;-71.0328839,42.3861321;-71.0270609,42.4213082;-71.0366491,42.391236;-71.0361399,42.3649623;-71.0116946,42.3827415;-70.9973058,42.4183123;-70.997123,42.390501;-71.0506461,42.41826;-71.0359889,42.3670906;-71.0414523,42.3649544;-71.0371343,42.3711266;-71.033703,42.3891835;-70.9799864,42.3803348;'" }, "execution_count": 268, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Return the list of locations in each cluster\n", "print(len(TotalList[TotalList['cluster'] == 0]['gps'].values.tolist()))\n", "utils.list_to_string(TotalList[TotalList['cluster'] == 0]['gps'].values.tolist())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:25:20.703565Z", "start_time": "2023-11-06T00:25:20.690483Z" } }, "id": "89297f77828e8ed8" }, { "cell_type": "code", "execution_count": 269, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "63\n" ] }, { "data": { "text/plain": "'-71.2038948,42.4299758;-71.1060828,42.3511927;-71.0969274,42.3446263;-71.130887,42.35304;-71.1459593,42.3501823;-71.1460435,42.3495825;-71.1217152,42.3426377;-71.1258765,42.331864;-71.1095021,42.3364675;-71.133103,42.3890049;-71.0851891,42.3500031;-71.1123834,42.3360385;-71.2273649,42.3145041;-71.0834061,42.341987;-71.0992038,42.3306454;-71.0990577,42.3381442;-71.0949218,42.3419564;-71.0942861,42.3413301;-71.0908104,42.329969;-71.0913583,42.3490205;-71.1000217,42.3323776;-71.1241295,42.3518397;-71.1618052,42.3245965;-71.1625829,42.340795;-71.167854,42.4107892;-71.155555,42.3317473;-71.1227278,42.3965778;-71.3598149,42.3140229;-71.1126695,42.3836229;-71.119149,42.3884;-71.0872846,42.2961434;-71.1427371,42.3433772;-71.1438455,42.3569102;-71.1313443,42.3525708;-71.1284677,42.3631904;-71.119301,42.388547;-71.097883,42.381008;-71.1107166,42.3741209;-71.1194344,42.3754427;-71.1013044,42.3627462;-71.1108423,42.3838224;-71.1026937,42.3820702;-71.1189467,42.373465;-71.1208817,42.3732344;-71.0968274,42.3799095;-71.094048,42.339381;-71.1854722,42.3621177;-71.1146697,42.3782386;-71.0935443,42.3817274;-71.0906355,42.3616095;-71.1161887,42.3766442;-71.0962734,42.3627993;-71.1155576,42.3784629;-71.0949101,42.3797674;-71.1087411,42.3640287;-71.09476,42.37736;-71.1014951,42.3614115;-71.1024769,42.3822934;-71.1011111,42.3636597;-71.0898829,42.3463992;-71.0983169,42.3319001;-71.0945712,42.3253252;-71.1122037,42.4008442;'" }, "execution_count": 269, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(len(TotalList[TotalList['cluster'] == 1]['gps'].values.tolist()))\n", "utils.list_to_string(TotalList[TotalList['cluster'] == 1]['gps'].values.tolist())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2023-11-06T00:25:30.097229Z", "start_time": "2023-11-06T00:25:30.070409Z" } }, "id": "6ff82e29a0366d9e" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false }, "id": "7949bddd34b6731" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }