|
174 | 174 | "Define some functions that will be used repeatedly for visualization."
|
175 | 175 | ]
|
176 | 176 | },
|
| 177 | + { |
| 178 | + "source": [ |
| 179 | + "### 3D matplotlib (plus seaborn) charting with some data prep and optional center points" |
| 180 | + ], |
| 181 | + "cell_type": "markdown", |
| 182 | + "metadata": {} |
| 183 | + }, |
177 | 184 | {
|
178 | 185 | "cell_type": "code",
|
179 | 186 | "execution_count": null,
|
180 | 187 | "metadata": {},
|
181 | 188 | "outputs": [],
|
182 | 189 | "source": [
|
183 |
| - "# 3D matplotlib (plus seaborn) charting with some data prep and optional center points\n", |
184 | 190 | "def show_scatter_3d(df, x_name, y_name, z_name, predicted=None, centers=None,\n",
|
185 | 191 | " marker='o', cmap=None, edgecolors=None, alpha=0.3,\n",
|
186 | 192 | " elev=25, azim=10, show_colorbar=True,\n",
|
|
249 | 255 | " plt.scatter(center[0], center[1], marker=\"X\", s=300, color='red') "
|
250 | 256 | ]
|
251 | 257 | },
|
| 258 | + { |
| 259 | + "source": [ |
| 260 | + "### Plotly 3D scatter chart is almost a one-liner, but use this function to keep the params in one place" |
| 261 | + ], |
| 262 | + "cell_type": "markdown", |
| 263 | + "metadata": {} |
| 264 | + }, |
252 | 265 | {
|
253 | 266 | "cell_type": "code",
|
254 | 267 | "execution_count": null,
|
255 | 268 | "metadata": {},
|
256 | 269 | "outputs": [],
|
257 | 270 | "source": [
|
258 |
| - "# Plotly 3D scatter chart is almost a one-liner, but use this function to keep the params in one place\n", |
259 | 271 | "def plotly_scatter_3d(df, x, y, z, color=None):\n",
|
260 | 272 | " fig = px.scatter_3d(df, x=x, y=y, z=z, color=color,\n",
|
261 | 273 | " opacity=0.2, template='plotly_dark', color_continuous_scale=px.colors.qualitative.Set1)\n",
|
262 | 274 | " fig.show() "
|
263 | 275 | ]
|
264 | 276 | },
|
| 277 | + { |
| 278 | + "source": [ |
| 279 | + "### Use a stacked bar chart for an external evaluation of the churn cluster vs known churn risk" |
| 280 | + ], |
| 281 | + "cell_type": "markdown", |
| 282 | + "metadata": {} |
| 283 | + }, |
265 | 284 | {
|
266 | 285 | "cell_type": "code",
|
267 | 286 | "execution_count": null,
|
268 | 287 | "metadata": {},
|
269 | 288 | "outputs": [],
|
270 | 289 | "source": [
|
271 |
| - "# Use a stacked bar chart for an external evaluation of the churn cluster vs known churn risk\n", |
272 |
| - "\n", |
273 | 290 | "# Map the risk values to sortables (and still OK for the legend)\n",
|
274 | 291 | "risk_map = {'High': '2: High', 'Medium': '1: Medium', 'Low': '0: Low'}\n",
|
275 | 292 | " \n",
|
|
318 | 335 | "show_scatter_3d(blobs_df, 'X', 'Y', 'Z', predicted=blob_labels);"
|
319 | 336 | ]
|
320 | 337 | },
|
| 338 | + { |
| 339 | + "source": [ |
| 340 | + "#### This is the same thing we just showed with matplotlib, but now we have tooltips and we can zoom and rotate.\n", |
| 341 | + "#### Rotating the chart can be very helpful when clusters are overlapping in 3-dimensional space." |
| 342 | + ], |
| 343 | + "cell_type": "markdown", |
| 344 | + "metadata": {} |
| 345 | + }, |
321 | 346 | {
|
322 | 347 | "cell_type": "code",
|
323 | 348 | "execution_count": null,
|
324 | 349 | "metadata": {},
|
325 | 350 | "outputs": [],
|
326 | 351 | "source": [
|
327 |
| - "# This is the same thing we just showed with matplotlib, but now we have tooltips and we can zoom and rotate.\n", |
328 |
| - "# Rotating the chart can be very helpful when clusters are overlapping in 3-dimensional space.\n", |
329 | 352 | "plotly_scatter_3d(blobs_df, 'X', 'Y', 'Z', color='CLUSTER')"
|
330 | 353 | ]
|
331 | 354 | },
|
|
584 | 607 | "outliers_df = temp_df[temp_df['CLUSTER']==-1]"
|
585 | 608 | ]
|
586 | 609 | },
|
| 610 | + { |
| 611 | + "source": [ |
| 612 | + "Here we can see the algorithm recognize outliers into a chart using matplotlib and makes those regions, this is based on our mean-shift algorithm" |
| 613 | + ], |
| 614 | + "cell_type": "markdown", |
| 615 | + "metadata": {} |
| 616 | + }, |
587 | 617 | {
|
588 | 618 | "cell_type": "code",
|
589 | 619 | "execution_count": null,
|
|
723 | 753 | "cell_type": "markdown",
|
724 | 754 | "metadata": {},
|
725 | 755 | "source": [
|
726 |
| - "## Hierarchical\n" |
| 756 | + "## Hierarchical Models\n", |
| 757 | + "\n", |
| 758 | + "- Builds hierarchy of clusters\n", |
| 759 | + "\n", |
| 760 | + "- Starts with all the data points assigned to a cluster of their own\n", |
| 761 | + "\n", |
| 762 | + "- Two nearest clusters are merged into the same cluster \n", |
| 763 | + "\n", |
| 764 | + "- Terminates when there is only a single cluster left\n", |
| 765 | + "\n", |
| 766 | + "#### Agglomerative \n", |
| 767 | + "\n", |
| 768 | + "- Bottom up approach when it comes to clustering\n", |
| 769 | + "\n", |
| 770 | + "- Start with many small clusters and merge them together to create bigger clusters\n" |
727 | 771 | ]
|
728 | 772 | },
|
729 | 773 | {
|
|
807 | 851 | "kernelspec": {
|
808 | 852 | "display_name": "Python 3.6.10 64-bit ('py36': conda)",
|
809 | 853 | "language": "python",
|
810 |
| - "name": "python_defaultSpec_1600174602311" |
| 854 | + "name": "python_defaultSpec_1600186116323" |
811 | 855 | },
|
812 | 856 | "language_info": {
|
813 | 857 | "codemirror_mode": {
|
|
0 commit comments