Skip to content

Commit 31cb205

Browse files
Updated some of the Exercise files
1 parent 19f03e1 commit 31cb205

File tree

394 files changed

+127346
-1028
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

394 files changed

+127346
-1028
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Estimating the Value of PI with Pyspark"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 2,
13+
"metadata": {},
14+
"outputs": [
15+
{
16+
"ename": "ModuleNotFoundError",
17+
"evalue": "No module named 'pyspark'",
18+
"output_type": "error",
19+
"traceback": [
20+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
21+
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
22+
"\u001b[1;32m<ipython-input-2-c15ae3402d12>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mpyspark\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
23+
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pyspark'"
24+
]
25+
}
26+
],
27+
"source": [
28+
"import pyspark"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": 1,
34+
"metadata": {},
35+
"outputs": [
36+
{
37+
"ename": "NameError",
38+
"evalue": "name 'SparkContext' is not defined",
39+
"output_type": "error",
40+
"traceback": [
41+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
42+
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
43+
"\u001b[1;32m<ipython-input-1-357ae8606ddf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Reason why we have the getOrCreate code\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# http://stackoverflow.com/questions/28999332/how-to-access-sparkcontext-in-pyspark-script\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0msc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSparkContext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetOrCreate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
44+
"\u001b[1;31mNameError\u001b[0m: name 'SparkContext' is not defined"
45+
]
46+
}
47+
],
48+
"source": [
49+
"# Reason why we have the getOrCreate code\n",
50+
"# http://stackoverflow.com/questions/28999332/how-to-access-sparkcontext-in-pyspark-script\n",
51+
"sc = SparkContext.getOrCreate()\n"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": null,
57+
"metadata": {
58+
"collapsed": true
59+
},
60+
"outputs": [],
61+
"source": [
62+
"import numpy as np\n",
63+
"\n",
64+
"TOTAL = 1000000\n",
65+
"dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in range(TOTAL)]).cache()\n",
66+
"print(\"Number of random points:\", dots.count())\n",
67+
"\n",
68+
"stats = dots.stats()\n",
69+
"print('Mean:', stats.mean())\n",
70+
"print('stdev:', stats.stdev())\n"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"metadata": {
77+
"collapsed": true
78+
},
79+
"outputs": [],
80+
"source": [
81+
"%matplotlib inline\n",
82+
"from operator import itemgetter\n",
83+
"from matplotlib import pyplot as plt\n",
84+
"\n",
85+
"plt.figure(figsize = (10, 5))\n",
86+
"\n",
87+
"# Plot 1\n",
88+
"plt.subplot(1, 2, 1)\n",
89+
"plt.xlim((-1.0, 1.0))\n",
90+
"plt.ylim((-1.0, 1.0))\n",
91+
"\n",
92+
"sample = dots.sample(False, 0.01)\n",
93+
"X = sample.map(itemgetter(0)).collect()\n",
94+
"Y = sample.map(itemgetter(1)).collect()\n",
95+
"plt.scatter(X, Y)\n",
96+
"\n",
97+
"# Plot 2\n",
98+
"plt.subplot(1, 2, 2)\n",
99+
"plt.xlim((-1.0, 1.0))\n",
100+
"plt.ylim((-1.0, 1.0))\n",
101+
"\n",
102+
"inCircle = lambda v: np.linalg.norm(v) <= 1.0\n",
103+
"dotsIn = sample.filter(inCircle).cache()\n",
104+
"dotsOut = sample.filter(lambda v: not inCircle(v)).cache()\n",
105+
"\n",
106+
"# inside circle\n",
107+
"Xin = dotsIn.map(itemgetter(0)).collect()\n",
108+
"Yin = dotsIn.map(itemgetter(1)).collect()\n",
109+
"plt.scatter(Xin, Yin, color = 'r')\n",
110+
"\n",
111+
"# outside circle\n",
112+
"Xout = dotsOut.map(itemgetter(0)).collect()\n",
113+
"Yout = dotsOut.map(itemgetter(1)).collect()\n",
114+
"plt.scatter(Xout, Yout)"
115+
]
116+
}
117+
],
118+
"metadata": {
119+
"kernelspec": {
120+
"display_name": "Python 3",
121+
"language": "python",
122+
"name": "python3"
123+
},
124+
"language_info": {
125+
"codemirror_mode": {
126+
"name": "ipython",
127+
"version": 3
128+
},
129+
"file_extension": ".py",
130+
"mimetype": "text/x-python",
131+
"name": "python",
132+
"nbconvert_exporter": "python",
133+
"pygments_lexer": "ipython3",
134+
"version": "3.6.1"
135+
}
136+
},
137+
"nbformat": 4,
138+
"nbformat_minor": 2
139+
}
Binary file not shown.
Binary file not shown.
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading

0 commit comments

Comments
 (0)