Skip to content

Commit 1fbeadb

Browse files
committed
make release-tag: Merge branch 'master' into stable
2 parents c5c1567 + e262a2a commit 1fbeadb

15 files changed

+1418
-460
lines changed

HISTORY.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# History
22

3+
## 0.2.2 - 2019-12-10
4+
5+
### New Features
6+
7+
* Adapt evaluation to the different data types - [Issue #128](https://github.com/HDI-Project/SDV/issues/128) by @csala @JDTheRipperPC
8+
9+
* Extend `load_demo` functionality to load other datasets - [Issue #136](https://github.com/HDI-Project/SDV/issues/136) by @JDTheRipperPC
10+
311
## 0.2.1 - 2019-11-25
412

513
### New Features

examples/2. Quickstart - Single Table - Census.ipynb

+121-96
Original file line numberDiff line numberDiff line change
@@ -309,23 +309,23 @@
309309
"name": "stderr",
310310
"output_type": "stream",
311311
"text": [
312-
"2019-11-25 16:36:01,497 - INFO - modeler - Modeling census\n",
313-
"2019-11-25 16:36:01,498 - INFO - metadata - Loading transformer NumericalTransformer for field age\n",
314-
"2019-11-25 16:36:01,499 - INFO - metadata - Loading transformer CategoricalTransformer for field workclass\n",
315-
"2019-11-25 16:36:01,499 - INFO - metadata - Loading transformer NumericalTransformer for field fnlwgt\n",
316-
"2019-11-25 16:36:01,499 - INFO - metadata - Loading transformer CategoricalTransformer for field education\n",
317-
"2019-11-25 16:36:01,500 - INFO - metadata - Loading transformer NumericalTransformer for field education-num\n",
318-
"2019-11-25 16:36:01,500 - INFO - metadata - Loading transformer CategoricalTransformer for field marital-status\n",
319-
"2019-11-25 16:36:01,500 - INFO - metadata - Loading transformer CategoricalTransformer for field occupation\n",
320-
"2019-11-25 16:36:01,501 - INFO - metadata - Loading transformer CategoricalTransformer for field relationship\n",
321-
"2019-11-25 16:36:01,501 - INFO - metadata - Loading transformer CategoricalTransformer for field race\n",
322-
"2019-11-25 16:36:01,501 - INFO - metadata - Loading transformer CategoricalTransformer for field sex\n",
323-
"2019-11-25 16:36:01,502 - INFO - metadata - Loading transformer NumericalTransformer for field capital-gain\n",
324-
"2019-11-25 16:36:01,502 - INFO - metadata - Loading transformer NumericalTransformer for field capital-loss\n",
325-
"2019-11-25 16:36:01,502 - INFO - metadata - Loading transformer NumericalTransformer for field hours-per-week\n",
326-
"2019-11-25 16:36:01,503 - INFO - metadata - Loading transformer CategoricalTransformer for field native-country\n",
327-
"2019-11-25 16:36:01,503 - INFO - metadata - Loading transformer CategoricalTransformer for field income\n",
328-
"2019-11-25 16:36:01,754 - INFO - modeler - Modeling Complete\n"
312+
"INFO - Modeling census\n",
313+
"INFO - Loading transformer NumericalTransformer for field age\n",
314+
"INFO - Loading transformer CategoricalTransformer for field workclass\n",
315+
"INFO - Loading transformer NumericalTransformer for field fnlwgt\n",
316+
"INFO - Loading transformer CategoricalTransformer for field education\n",
317+
"INFO - Loading transformer NumericalTransformer for field education-num\n",
318+
"INFO - Loading transformer CategoricalTransformer for field marital-status\n",
319+
"INFO - Loading transformer CategoricalTransformer for field occupation\n",
320+
"INFO - Loading transformer CategoricalTransformer for field relationship\n",
321+
"INFO - Loading transformer CategoricalTransformer for field race\n",
322+
"INFO - Loading transformer CategoricalTransformer for field sex\n",
323+
"INFO - Loading transformer NumericalTransformer for field capital-gain\n",
324+
"INFO - Loading transformer NumericalTransformer for field capital-loss\n",
325+
"INFO - Loading transformer NumericalTransformer for field hours-per-week\n",
326+
"INFO - Loading transformer CategoricalTransformer for field native-country\n",
327+
"INFO - Loading transformer CategoricalTransformer for field income\n",
328+
"INFO - Modeling Complete\n"
329329
]
330330
}
331331
],
@@ -382,90 +382,90 @@
382382
" <tbody>\n",
383383
" <tr>\n",
384384
" <th>0</th>\n",
385-
" <td>39</td>\n",
385+
" <td>35</td>\n",
386386
" <td>Private</td>\n",
387-
" <td>354328</td>\n",
388-
" <td>Bachelors</td>\n",
389-
" <td>12</td>\n",
390-
" <td>Married-civ-spouse</td>\n",
391-
" <td>Exec-managerial</td>\n",
392-
" <td>Husband</td>\n",
387+
" <td>207371</td>\n",
388+
" <td>7th-8th</td>\n",
389+
" <td>10</td>\n",
390+
" <td>Divorced</td>\n",
391+
" <td>Adm-clerical</td>\n",
392+
" <td>Wife</td>\n",
393393
" <td>White</td>\n",
394-
" <td>Male</td>\n",
395-
" <td>5731</td>\n",
396-
" <td>-416</td>\n",
397-
" <td>43</td>\n",
394+
" <td>Female</td>\n",
395+
" <td>931</td>\n",
396+
" <td>-620</td>\n",
397+
" <td>30</td>\n",
398398
" <td>United-States</td>\n",
399-
" <td>&gt;50K</td>\n",
399+
" <td>&lt;=50K</td>\n",
400400
" </tr>\n",
401401
" <tr>\n",
402402
" <th>1</th>\n",
403-
" <td>39</td>\n",
403+
" <td>26</td>\n",
404404
" <td>Private</td>\n",
405-
" <td>97345</td>\n",
405+
" <td>87833</td>\n",
406406
" <td>Some-college</td>\n",
407-
" <td>10</td>\n",
408-
" <td>Never-married</td>\n",
409-
" <td>Other-service</td>\n",
407+
" <td>9</td>\n",
408+
" <td>Widowed</td>\n",
409+
" <td>Exec-managerial</td>\n",
410410
" <td>Own-child</td>\n",
411411
" <td>White</td>\n",
412412
" <td>Female</td>\n",
413-
" <td>1512</td>\n",
414-
" <td>-253</td>\n",
415-
" <td>40</td>\n",
413+
" <td>-736</td>\n",
414+
" <td>109</td>\n",
415+
" <td>36</td>\n",
416416
" <td>United-States</td>\n",
417417
" <td>&lt;=50K</td>\n",
418418
" </tr>\n",
419419
" <tr>\n",
420420
" <th>2</th>\n",
421-
" <td>49</td>\n",
422-
" <td>Self-emp-not-inc</td>\n",
423-
" <td>122831</td>\n",
424-
" <td>Some-college</td>\n",
425-
" <td>9</td>\n",
421+
" <td>39</td>\n",
422+
" <td>Private</td>\n",
423+
" <td>127833</td>\n",
424+
" <td>HS-grad</td>\n",
425+
" <td>12</td>\n",
426426
" <td>Married-civ-spouse</td>\n",
427-
" <td>Sales</td>\n",
427+
" <td>Adm-clerical</td>\n",
428428
" <td>Not-in-family</td>\n",
429429
" <td>White</td>\n",
430430
" <td>Male</td>\n",
431-
" <td>-2067</td>\n",
432-
" <td>108</td>\n",
433-
" <td>44</td>\n",
431+
" <td>-2763</td>\n",
432+
" <td>-213</td>\n",
433+
" <td>34</td>\n",
434434
" <td>United-States</td>\n",
435435
" <td>&lt;=50K</td>\n",
436436
" </tr>\n",
437437
" <tr>\n",
438438
" <th>3</th>\n",
439-
" <td>35</td>\n",
440-
" <td>Private</td>\n",
441-
" <td>218362</td>\n",
442-
" <td>Some-college</td>\n",
443-
" <td>7</td>\n",
444-
" <td>Never-married</td>\n",
445-
" <td>Adm-clerical</td>\n",
446-
" <td>Own-child</td>\n",
439+
" <td>73</td>\n",
440+
" <td>Self-emp-not-inc</td>\n",
441+
" <td>159836</td>\n",
442+
" <td>HS-grad</td>\n",
443+
" <td>15</td>\n",
444+
" <td>Married-civ-spouse</td>\n",
445+
" <td>Craft-repair</td>\n",
446+
" <td>Husband</td>\n",
447447
" <td>White</td>\n",
448448
" <td>Male</td>\n",
449-
" <td>-1209</td>\n",
450-
" <td>99</td>\n",
451-
" <td>54</td>\n",
449+
" <td>-183</td>\n",
450+
" <td>115</td>\n",
451+
" <td>56</td>\n",
452452
" <td>United-States</td>\n",
453453
" <td>&lt;=50K</td>\n",
454454
" </tr>\n",
455455
" <tr>\n",
456456
" <th>4</th>\n",
457-
" <td>13</td>\n",
458-
" <td>Private</td>\n",
459-
" <td>146365</td>\n",
460-
" <td>HS-grad</td>\n",
461-
" <td>9</td>\n",
462-
" <td>Married-civ-spouse</td>\n",
463-
" <td>Adm-clerical</td>\n",
464-
" <td>Own-child</td>\n",
457+
" <td>50</td>\n",
458+
" <td>?</td>\n",
459+
" <td>318263</td>\n",
460+
" <td>Bachelors</td>\n",
461+
" <td>8</td>\n",
462+
" <td>Never-married</td>\n",
463+
" <td>Other-service</td>\n",
464+
" <td>Husband</td>\n",
465465
" <td>White</td>\n",
466-
" <td>Female</td>\n",
467-
" <td>8987</td>\n",
468-
" <td>338</td>\n",
466+
" <td>Male</td>\n",
467+
" <td>2797</td>\n",
468+
" <td>-334</td>\n",
469469
" <td>42</td>\n",
470470
" <td>United-States</td>\n",
471471
" <td>&lt;=50K</td>\n",
@@ -476,25 +476,25 @@
476476
],
477477
"text/plain": [
478478
" age workclass fnlwgt education education-num \\\n",
479-
"0 39 Private 354328 Bachelors 12 \n",
480-
"1 39 Private 97345 Some-college 10 \n",
481-
"2 49 Self-emp-not-inc 122831 Some-college 9 \n",
482-
"3 35 Private 218362 Some-college 7 \n",
483-
"4 13 Private 146365 HS-grad 9 \n",
479+
"0 35 Private 207371 7th-8th 10 \n",
480+
"1 26 Private 87833 Some-college 9 \n",
481+
"2 39 Private 127833 HS-grad 12 \n",
482+
"3 73 Self-emp-not-inc 159836 HS-grad 15 \n",
483+
"4 50 ? 318263 Bachelors 8 \n",
484484
"\n",
485485
" marital-status occupation relationship race sex \\\n",
486-
"0 Married-civ-spouse Exec-managerial Husband White Male \n",
487-
"1 Never-married Other-service Own-child White Female \n",
488-
"2 Married-civ-spouse Sales Not-in-family White Male \n",
489-
"3 Never-married Adm-clerical Own-child White Male \n",
490-
"4 Married-civ-spouse Adm-clerical Own-child White Female \n",
486+
"0 Divorced Adm-clerical Wife White Female \n",
487+
"1 Widowed Exec-managerial Own-child White Female \n",
488+
"2 Married-civ-spouse Adm-clerical Not-in-family White Male \n",
489+
"3 Married-civ-spouse Craft-repair Husband White Male \n",
490+
"4 Never-married Other-service Husband White Male \n",
491491
"\n",
492492
" capital-gain capital-loss hours-per-week native-country income \n",
493-
"0 5731 -416 43 United-States >50K \n",
494-
"1 1512 -253 40 United-States <=50K \n",
495-
"2 -2067 108 44 United-States <=50K \n",
496-
"3 -1209 99 54 United-States <=50K \n",
497-
"4 8987 338 42 United-States <=50K "
493+
"0 931 -620 30 United-States <=50K \n",
494+
"1 -736 109 36 United-States <=50K \n",
495+
"2 -2763 -213 34 United-States <=50K \n",
496+
"3 -183 115 56 United-States <=50K \n",
497+
"4 2797 -334 42 United-States <=50K "
498498
]
499499
},
500500
"execution_count": 7,
@@ -512,21 +512,46 @@
512512
"execution_count": 8,
513513
"metadata": {},
514514
"outputs": [
515-
{
516-
"name": "stderr",
517-
"output_type": "stream",
518-
"text": [
519-
"/home/xals/.virtualenvs/SDV/lib/python3.6/site-packages/scipy/stats/stats.py:248: RuntimeWarning: The input array could not be properly checked for nan values. nan values will be ignored.\n",
520-
" \"values. nan values will be ignored.\", RuntimeWarning)\n"
521-
]
522-
},
523515
{
524516
"data": {
517+
"text/html": [
518+
"<div>\n",
519+
"<style scoped>\n",
520+
" .dataframe tbody tr th:only-of-type {\n",
521+
" vertical-align: middle;\n",
522+
" }\n",
523+
"\n",
524+
" .dataframe tbody tr th {\n",
525+
" vertical-align: top;\n",
526+
" }\n",
527+
"\n",
528+
" .dataframe thead th {\n",
529+
" text-align: right;\n",
530+
" }\n",
531+
"</style>\n",
532+
"<table border=\"1\" class=\"dataframe\">\n",
533+
" <thead>\n",
534+
" <tr style=\"text-align: right;\">\n",
535+
" <th></th>\n",
536+
" <th>mse</th>\n",
537+
" <th>r2_score</th>\n",
538+
" <th>rmse</th>\n",
539+
" </tr>\n",
540+
" </thead>\n",
541+
" <tbody>\n",
542+
" <tr>\n",
543+
" <th>0</th>\n",
544+
" <td>212670.957588</td>\n",
545+
" <td>0.999414</td>\n",
546+
" <td>461.162615</td>\n",
547+
" </tr>\n",
548+
" </tbody>\n",
549+
"</table>\n",
550+
"</div>"
551+
],
525552
"text/plain": [
526-
"mse 403.8003241991141\n",
527-
"rmse 20.094783507147174\n",
528-
"r2_score 0.9994524190196646\n",
529-
"dtype: object"
553+
" mse r2_score rmse\n",
554+
"0 212670.957588 0.999414 461.162615"
530555
]
531556
},
532557
"execution_count": 8,
@@ -539,7 +564,7 @@
539564
"\n",
540565
"samples = sdv.sample_all(len(tables['census']))\n",
541566
"\n",
542-
"evaluate(tables, samples).astype(str)"
567+
"evaluate(samples, real=tables, metadata=sdv.metadata)"
543568
]
544569
}
545570
],

0 commit comments

Comments
 (0)