Skip to content

Commit 7a137e2

Browse files
committed
Added files to scikit learn, changes to pandas files, questions posed
1 parent 4161963 commit 7a137e2

16 files changed

+3553
-736
lines changed

02-Simulation/02 SciPy Basics.ipynb

+144-6
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,22 @@
11881188
"outputs": [],
11891189
"source": []
11901190
},
1191+
{
1192+
"cell_type": "markdown",
1193+
"metadata": {},
1194+
"source": [
1195+
"### Task 4\n",
1196+
"\n",
1197+
"**EDGE DETECTION TASK; FIND SUITABLE IMAGE**"
1198+
]
1199+
},
1200+
{
1201+
"cell_type": "code",
1202+
"execution_count": null,
1203+
"metadata": {},
1204+
"outputs": [],
1205+
"source": []
1206+
},
11911207
{
11921208
"cell_type": "markdown",
11931209
"metadata": {},
@@ -1217,7 +1233,7 @@
12171233
},
12181234
{
12191235
"cell_type": "code",
1220-
"execution_count": 225,
1236+
"execution_count": 2,
12211237
"metadata": {},
12221238
"outputs": [
12231239
{
@@ -1259,23 +1275,145 @@
12591275
},
12601276
{
12611277
"cell_type": "code",
1262-
"execution_count": 224,
1278+
"execution_count": 3,
1279+
"metadata": {},
1280+
"outputs": [],
1281+
"source": [
1282+
"D = A.sum(axis=0)"
1283+
]
1284+
},
1285+
{
1286+
"cell_type": "code",
1287+
"execution_count": 28,
1288+
"metadata": {},
1289+
"outputs": [],
1290+
"source": [
1291+
"L = sparse.diags(np.asarray(D)[0]) - A"
1292+
]
1293+
},
1294+
{
1295+
"cell_type": "markdown",
1296+
"metadata": {},
1297+
"source": [
1298+
"### Task 5\n",
1299+
"\n",
1300+
"Normalize the Laplacian matrix as follows:\n",
1301+
"\n",
1302+
"$$\n",
1303+
"L_{norm}=D^{-\\frac{1}{2}}LD^{-\\frac{1}{2}}\n",
1304+
"$$\n",
1305+
"\n",
1306+
"where $D^{-\\frac{1}{2}}$ is the negative one-over square-root $-\\frac{1}{\\sqrt{D_{ii}}}$applied to every element $D_{ii}$, given it is a diagonal matrix. Calculate the eigenvalues of this matrix and extract the second-smallest eigenvalue and eigenvector. This is known as the **Fiedler number** and Fiedler vector, respectively."
1307+
]
1308+
},
1309+
{
1310+
"cell_type": "code",
1311+
"execution_count": 50,
1312+
"metadata": {},
1313+
"outputs": [
1314+
{
1315+
"name": "stderr",
1316+
"output_type": "stream",
1317+
"text": [
1318+
"//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in true_divide\n",
1319+
" if __name__ == '__main__':\n"
1320+
]
1321+
}
1322+
],
1323+
"source": [
1324+
"D_m = sparse.diags(-1. / np.asarray(np.sqrt(D))[0])\n",
1325+
"L_n = np.dot(D_m, np.dot(L, D_m))"
1326+
]
1327+
},
1328+
{
1329+
"cell_type": "code",
1330+
"execution_count": 54,
12631331
"metadata": {},
12641332
"outputs": [
12651333
{
12661334
"data": {
12671335
"text/plain": [
1268-
"matrix([[3., 3., 5., 4., 4., 4., 4., 3., 4., 5., 4., 2., 3., 4., 4., 3.,\n",
1269-
" 3., 2., 1., 2., 2., 0., 2., 2., 2., 1., 2., 1., 4., 1.]])"
1336+
"True"
12701337
]
12711338
},
1272-
"execution_count": 224,
1339+
"execution_count": 54,
12731340
"metadata": {},
12741341
"output_type": "execute_result"
12751342
}
12761343
],
12771344
"source": [
1278-
"A.sum(axis=0)"
1345+
"sparse.isspmatrix(L_n)"
1346+
]
1347+
},
1348+
{
1349+
"cell_type": "code",
1350+
"execution_count": 55,
1351+
"metadata": {},
1352+
"outputs": [],
1353+
"source": [
1354+
"import scipy"
1355+
]
1356+
},
1357+
{
1358+
"cell_type": "code",
1359+
"execution_count": 57,
1360+
"metadata": {},
1361+
"outputs": [],
1362+
"source": [
1363+
"np.linalg.eigvals?"
1364+
]
1365+
},
1366+
{
1367+
"cell_type": "code",
1368+
"execution_count": 62,
1369+
"metadata": {},
1370+
"outputs": [],
1371+
"source": [
1372+
"from scipy import linalg"
1373+
]
1374+
},
1375+
{
1376+
"cell_type": "code",
1377+
"execution_count": 64,
1378+
"metadata": {},
1379+
"outputs": [
1380+
{
1381+
"ename": "AttributeError",
1382+
"evalue": "module 'scipy.sparse' has no attribute 'linalg'",
1383+
"output_type": "error",
1384+
"traceback": [
1385+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1386+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
1387+
"\u001b[0;32m<ipython-input-64-5494f8ca95c2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinalg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meigh\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mL_n\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1388+
"\u001b[0;31mAttributeError\u001b[0m: module 'scipy.sparse' has no attribute 'linalg'"
1389+
]
1390+
}
1391+
],
1392+
"source": [
1393+
"sparse.linalg.eigh(L_n)"
1394+
]
1395+
},
1396+
{
1397+
"cell_type": "code",
1398+
"execution_count": 63,
1399+
"metadata": {},
1400+
"outputs": [
1401+
{
1402+
"ename": "ValueError",
1403+
"evalue": "Sparse matrices are not supported by this function. Perhaps one of the scipy.sparse.linalg functions would work instead.",
1404+
"output_type": "error",
1405+
"traceback": [
1406+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1407+
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
1408+
"\u001b[0;32m<ipython-input-63-32c28a44fd90>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlinalg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meigh\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mL_n\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1409+
"\u001b[0;32m//anaconda/lib/python3.5/site-packages/scipy/linalg/decomp.py\u001b[0m in \u001b[0;36meigh\u001b[0;34m(a, b, lower, eigvals_only, overwrite_a, overwrite_b, turbo, eigvals, type, check_finite)\u001b[0m\n\u001b[1;32m 372\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 373\u001b[0m \"\"\"\n\u001b[0;32m--> 374\u001b[0;31m \u001b[0ma1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_asarray_validated\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheck_finite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcheck_finite\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 375\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0ma1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0ma1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'expected square matrix'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1410+
"\u001b[0;32m//anaconda/lib/python3.5/site-packages/scipy/_lib/_util.py\u001b[0m in \u001b[0;36m_asarray_validated\u001b[0;34m(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[0;34m'Perhaps one of the scipy.sparse.linalg functions '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m 'would work instead.')\n\u001b[0;32m--> 233\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 234\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mmask_ok\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misMaskedArray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1411+
"\u001b[0;31mValueError\u001b[0m: Sparse matrices are not supported by this function. Perhaps one of the scipy.sparse.linalg functions would work instead."
1412+
]
1413+
}
1414+
],
1415+
"source": [
1416+
"linalg.eigh(L_n)"
12791417
]
12801418
},
12811419
{

02-Simulation/03 Dask Arrays.ipynb

+98-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,101 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Dask Arrays"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 15,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import dask.array as da\n",
17+
"import numpy as np"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": 16,
23+
"metadata": {},
24+
"outputs": [
25+
{
26+
"data": {
27+
"text/plain": [
28+
"[5.0, 6.0, 10.0, 20.0]"
29+
]
30+
},
31+
"execution_count": 16,
32+
"metadata": {},
33+
"output_type": "execute_result"
34+
}
35+
],
36+
"source": [
37+
"A = da.array([5., 6., 10., 20.])\n",
38+
"A"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 17,
44+
"metadata": {},
45+
"outputs": [],
46+
"source": [
47+
"A = np.random.randint(1000, size=(100000))"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": 20,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"A = da.array(A)"
57+
]
58+
},
59+
{
60+
"cell_type": "code",
61+
"execution_count": 21,
62+
"metadata": {},
63+
"outputs": [
64+
{
65+
"data": {
66+
"text/plain": [
67+
"array([967, 396, 125, ..., 268, 941, 132])"
68+
]
69+
},
70+
"execution_count": 21,
71+
"metadata": {},
72+
"output_type": "execute_result"
73+
}
74+
],
75+
"source": [
76+
"A"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": 22,
82+
"metadata": {},
83+
"outputs": [
84+
{
85+
"data": {
86+
"text/plain": [
87+
"50155984"
88+
]
89+
},
90+
"execution_count": 22,
91+
"metadata": {},
92+
"output_type": "execute_result"
93+
}
94+
],
95+
"source": [
96+
"A.sum()"
97+
]
98+
},
399
{
4100
"cell_type": "code",
5101
"execution_count": null,
@@ -10,7 +106,7 @@
10106
],
11107
"metadata": {
12108
"kernelspec": {
13-
"display_name": "Python 3",
109+
"display_name": "Python [default]",
14110
"language": "python",
15111
"name": "python3"
16112
},
@@ -24,7 +120,7 @@
24120
"name": "python",
25121
"nbconvert_exporter": "python",
26122
"pygments_lexer": "ipython3",
27-
"version": "3.6.6"
123+
"version": "3.5.6"
28124
}
29125
},
30126
"nbformat": 4,

03-Data/01 Pandas Basics.ipynb

+4-4
Original file line numberDiff line numberDiff line change
@@ -4002,7 +4002,7 @@
40024002
"cell_type": "code",
40034003
"execution_count": 59,
40044004
"metadata": {
4005-
"scrolled": true
4005+
"scrolled": false
40064006
},
40074007
"outputs": [
40084008
{
@@ -4059,7 +4059,7 @@
40594059
"source": [
40604060
"### Task 3. \n",
40614061
"\n",
4062-
"The *Gini coefficient* is a measure of dispersion usually related to represent an income or wealth distribution between individuals, which always exists between 0 and 1.\n",
4062+
"The **Gini coefficient** is a measure of dispersion usually related to represent an income or wealth distribution between individuals, which always exists between 0 and 1.\n",
40634063
"\n",
40644064
"Gini coefficients of 0 express perfect equality, whereby coefficients of 1 expresses maximal inequality among values.\n",
40654065
"\n",
@@ -4108,7 +4108,7 @@
41084108
],
41094109
"metadata": {
41104110
"kernelspec": {
4111-
"display_name": "Python 3",
4111+
"display_name": "Python [default]",
41124112
"language": "python",
41134113
"name": "python3"
41144114
},
@@ -4122,7 +4122,7 @@
41224122
"name": "python",
41234123
"nbconvert_exporter": "python",
41244124
"pygments_lexer": "ipython3",
4125-
"version": "3.6.6"
4125+
"version": "3.5.6"
41264126
}
41274127
},
41284128
"nbformat": 4,

0 commit comments

Comments
 (0)