|
309 | 309 | "name": "stderr",
|
310 | 310 | "output_type": "stream",
|
311 | 311 | "text": [
|
312 |
| - "2019-11-25 16:36:01,497 - INFO - modeler - Modeling census\n", |
313 |
| - "2019-11-25 16:36:01,498 - INFO - metadata - Loading transformer NumericalTransformer for field age\n", |
314 |
| - "2019-11-25 16:36:01,499 - INFO - metadata - Loading transformer CategoricalTransformer for field workclass\n", |
315 |
| - "2019-11-25 16:36:01,499 - INFO - metadata - Loading transformer NumericalTransformer for field fnlwgt\n", |
316 |
| - "2019-11-25 16:36:01,499 - INFO - metadata - Loading transformer CategoricalTransformer for field education\n", |
317 |
| - "2019-11-25 16:36:01,500 - INFO - metadata - Loading transformer NumericalTransformer for field education-num\n", |
318 |
| - "2019-11-25 16:36:01,500 - INFO - metadata - Loading transformer CategoricalTransformer for field marital-status\n", |
319 |
| - "2019-11-25 16:36:01,500 - INFO - metadata - Loading transformer CategoricalTransformer for field occupation\n", |
320 |
| - "2019-11-25 16:36:01,501 - INFO - metadata - Loading transformer CategoricalTransformer for field relationship\n", |
321 |
| - "2019-11-25 16:36:01,501 - INFO - metadata - Loading transformer CategoricalTransformer for field race\n", |
322 |
| - "2019-11-25 16:36:01,501 - INFO - metadata - Loading transformer CategoricalTransformer for field sex\n", |
323 |
| - "2019-11-25 16:36:01,502 - INFO - metadata - Loading transformer NumericalTransformer for field capital-gain\n", |
324 |
| - "2019-11-25 16:36:01,502 - INFO - metadata - Loading transformer NumericalTransformer for field capital-loss\n", |
325 |
| - "2019-11-25 16:36:01,502 - INFO - metadata - Loading transformer NumericalTransformer for field hours-per-week\n", |
326 |
| - "2019-11-25 16:36:01,503 - INFO - metadata - Loading transformer CategoricalTransformer for field native-country\n", |
327 |
| - "2019-11-25 16:36:01,503 - INFO - metadata - Loading transformer CategoricalTransformer for field income\n", |
328 |
| - "2019-11-25 16:36:01,754 - INFO - modeler - Modeling Complete\n" |
| 312 | + "INFO - Modeling census\n", |
| 313 | + "INFO - Loading transformer NumericalTransformer for field age\n", |
| 314 | + "INFO - Loading transformer CategoricalTransformer for field workclass\n", |
| 315 | + "INFO - Loading transformer NumericalTransformer for field fnlwgt\n", |
| 316 | + "INFO - Loading transformer CategoricalTransformer for field education\n", |
| 317 | + "INFO - Loading transformer NumericalTransformer for field education-num\n", |
| 318 | + "INFO - Loading transformer CategoricalTransformer for field marital-status\n", |
| 319 | + "INFO - Loading transformer CategoricalTransformer for field occupation\n", |
| 320 | + "INFO - Loading transformer CategoricalTransformer for field relationship\n", |
| 321 | + "INFO - Loading transformer CategoricalTransformer for field race\n", |
| 322 | + "INFO - Loading transformer CategoricalTransformer for field sex\n", |
| 323 | + "INFO - Loading transformer NumericalTransformer for field capital-gain\n", |
| 324 | + "INFO - Loading transformer NumericalTransformer for field capital-loss\n", |
| 325 | + "INFO - Loading transformer NumericalTransformer for field hours-per-week\n", |
| 326 | + "INFO - Loading transformer CategoricalTransformer for field native-country\n", |
| 327 | + "INFO - Loading transformer CategoricalTransformer for field income\n", |
| 328 | + "INFO - Modeling Complete\n" |
329 | 329 | ]
|
330 | 330 | }
|
331 | 331 | ],
|
|
382 | 382 | " <tbody>\n",
|
383 | 383 | " <tr>\n",
|
384 | 384 | " <th>0</th>\n",
|
385 |
| - " <td>39</td>\n", |
| 385 | + " <td>35</td>\n", |
386 | 386 | " <td>Private</td>\n",
|
387 |
| - " <td>354328</td>\n", |
388 |
| - " <td>Bachelors</td>\n", |
389 |
| - " <td>12</td>\n", |
390 |
| - " <td>Married-civ-spouse</td>\n", |
391 |
| - " <td>Exec-managerial</td>\n", |
392 |
| - " <td>Husband</td>\n", |
| 387 | + " <td>207371</td>\n", |
| 388 | + " <td>7th-8th</td>\n", |
| 389 | + " <td>10</td>\n", |
| 390 | + " <td>Divorced</td>\n", |
| 391 | + " <td>Adm-clerical</td>\n", |
| 392 | + " <td>Wife</td>\n", |
393 | 393 | " <td>White</td>\n",
|
394 |
| - " <td>Male</td>\n", |
395 |
| - " <td>5731</td>\n", |
396 |
| - " <td>-416</td>\n", |
397 |
| - " <td>43</td>\n", |
| 394 | + " <td>Female</td>\n", |
| 395 | + " <td>931</td>\n", |
| 396 | + " <td>-620</td>\n", |
| 397 | + " <td>30</td>\n", |
398 | 398 | " <td>United-States</td>\n",
|
399 |
| - " <td>>50K</td>\n", |
| 399 | + " <td><=50K</td>\n", |
400 | 400 | " </tr>\n",
|
401 | 401 | " <tr>\n",
|
402 | 402 | " <th>1</th>\n",
|
403 |
| - " <td>39</td>\n", |
| 403 | + " <td>26</td>\n", |
404 | 404 | " <td>Private</td>\n",
|
405 |
| - " <td>97345</td>\n", |
| 405 | + " <td>87833</td>\n", |
406 | 406 | " <td>Some-college</td>\n",
|
407 |
| - " <td>10</td>\n", |
408 |
| - " <td>Never-married</td>\n", |
409 |
| - " <td>Other-service</td>\n", |
| 407 | + " <td>9</td>\n", |
| 408 | + " <td>Widowed</td>\n", |
| 409 | + " <td>Exec-managerial</td>\n", |
410 | 410 | " <td>Own-child</td>\n",
|
411 | 411 | " <td>White</td>\n",
|
412 | 412 | " <td>Female</td>\n",
|
413 |
| - " <td>1512</td>\n", |
414 |
| - " <td>-253</td>\n", |
415 |
| - " <td>40</td>\n", |
| 413 | + " <td>-736</td>\n", |
| 414 | + " <td>109</td>\n", |
| 415 | + " <td>36</td>\n", |
416 | 416 | " <td>United-States</td>\n",
|
417 | 417 | " <td><=50K</td>\n",
|
418 | 418 | " </tr>\n",
|
419 | 419 | " <tr>\n",
|
420 | 420 | " <th>2</th>\n",
|
421 |
| - " <td>49</td>\n", |
422 |
| - " <td>Self-emp-not-inc</td>\n", |
423 |
| - " <td>122831</td>\n", |
424 |
| - " <td>Some-college</td>\n", |
425 |
| - " <td>9</td>\n", |
| 421 | + " <td>39</td>\n", |
| 422 | + " <td>Private</td>\n", |
| 423 | + " <td>127833</td>\n", |
| 424 | + " <td>HS-grad</td>\n", |
| 425 | + " <td>12</td>\n", |
426 | 426 | " <td>Married-civ-spouse</td>\n",
|
427 |
| - " <td>Sales</td>\n", |
| 427 | + " <td>Adm-clerical</td>\n", |
428 | 428 | " <td>Not-in-family</td>\n",
|
429 | 429 | " <td>White</td>\n",
|
430 | 430 | " <td>Male</td>\n",
|
431 |
| - " <td>-2067</td>\n", |
432 |
| - " <td>108</td>\n", |
433 |
| - " <td>44</td>\n", |
| 431 | + " <td>-2763</td>\n", |
| 432 | + " <td>-213</td>\n", |
| 433 | + " <td>34</td>\n", |
434 | 434 | " <td>United-States</td>\n",
|
435 | 435 | " <td><=50K</td>\n",
|
436 | 436 | " </tr>\n",
|
437 | 437 | " <tr>\n",
|
438 | 438 | " <th>3</th>\n",
|
439 |
| - " <td>35</td>\n", |
440 |
| - " <td>Private</td>\n", |
441 |
| - " <td>218362</td>\n", |
442 |
| - " <td>Some-college</td>\n", |
443 |
| - " <td>7</td>\n", |
444 |
| - " <td>Never-married</td>\n", |
445 |
| - " <td>Adm-clerical</td>\n", |
446 |
| - " <td>Own-child</td>\n", |
| 439 | + " <td>73</td>\n", |
| 440 | + " <td>Self-emp-not-inc</td>\n", |
| 441 | + " <td>159836</td>\n", |
| 442 | + " <td>HS-grad</td>\n", |
| 443 | + " <td>15</td>\n", |
| 444 | + " <td>Married-civ-spouse</td>\n", |
| 445 | + " <td>Craft-repair</td>\n", |
| 446 | + " <td>Husband</td>\n", |
447 | 447 | " <td>White</td>\n",
|
448 | 448 | " <td>Male</td>\n",
|
449 |
| - " <td>-1209</td>\n", |
450 |
| - " <td>99</td>\n", |
451 |
| - " <td>54</td>\n", |
| 449 | + " <td>-183</td>\n", |
| 450 | + " <td>115</td>\n", |
| 451 | + " <td>56</td>\n", |
452 | 452 | " <td>United-States</td>\n",
|
453 | 453 | " <td><=50K</td>\n",
|
454 | 454 | " </tr>\n",
|
455 | 455 | " <tr>\n",
|
456 | 456 | " <th>4</th>\n",
|
457 |
| - " <td>13</td>\n", |
458 |
| - " <td>Private</td>\n", |
459 |
| - " <td>146365</td>\n", |
460 |
| - " <td>HS-grad</td>\n", |
461 |
| - " <td>9</td>\n", |
462 |
| - " <td>Married-civ-spouse</td>\n", |
463 |
| - " <td>Adm-clerical</td>\n", |
464 |
| - " <td>Own-child</td>\n", |
| 457 | + " <td>50</td>\n", |
| 458 | + " <td>?</td>\n", |
| 459 | + " <td>318263</td>\n", |
| 460 | + " <td>Bachelors</td>\n", |
| 461 | + " <td>8</td>\n", |
| 462 | + " <td>Never-married</td>\n", |
| 463 | + " <td>Other-service</td>\n", |
| 464 | + " <td>Husband</td>\n", |
465 | 465 | " <td>White</td>\n",
|
466 |
| - " <td>Female</td>\n", |
467 |
| - " <td>8987</td>\n", |
468 |
| - " <td>338</td>\n", |
| 466 | + " <td>Male</td>\n", |
| 467 | + " <td>2797</td>\n", |
| 468 | + " <td>-334</td>\n", |
469 | 469 | " <td>42</td>\n",
|
470 | 470 | " <td>United-States</td>\n",
|
471 | 471 | " <td><=50K</td>\n",
|
|
476 | 476 | ],
|
477 | 477 | "text/plain": [
|
478 | 478 | " age workclass fnlwgt education education-num \\\n",
|
479 |
| - "0 39 Private 354328 Bachelors 12 \n", |
480 |
| - "1 39 Private 97345 Some-college 10 \n", |
481 |
| - "2 49 Self-emp-not-inc 122831 Some-college 9 \n", |
482 |
| - "3 35 Private 218362 Some-college 7 \n", |
483 |
| - "4 13 Private 146365 HS-grad 9 \n", |
| 479 | + "0 35 Private 207371 7th-8th 10 \n", |
| 480 | + "1 26 Private 87833 Some-college 9 \n", |
| 481 | + "2 39 Private 127833 HS-grad 12 \n", |
| 482 | + "3 73 Self-emp-not-inc 159836 HS-grad 15 \n", |
| 483 | + "4 50 ? 318263 Bachelors 8 \n", |
484 | 484 | "\n",
|
485 | 485 | " marital-status occupation relationship race sex \\\n",
|
486 |
| - "0 Married-civ-spouse Exec-managerial Husband White Male \n", |
487 |
| - "1 Never-married Other-service Own-child White Female \n", |
488 |
| - "2 Married-civ-spouse Sales Not-in-family White Male \n", |
489 |
| - "3 Never-married Adm-clerical Own-child White Male \n", |
490 |
| - "4 Married-civ-spouse Adm-clerical Own-child White Female \n", |
| 486 | + "0 Divorced Adm-clerical Wife White Female \n", |
| 487 | + "1 Widowed Exec-managerial Own-child White Female \n", |
| 488 | + "2 Married-civ-spouse Adm-clerical Not-in-family White Male \n", |
| 489 | + "3 Married-civ-spouse Craft-repair Husband White Male \n", |
| 490 | + "4 Never-married Other-service Husband White Male \n", |
491 | 491 | "\n",
|
492 | 492 | " capital-gain capital-loss hours-per-week native-country income \n",
|
493 |
| - "0 5731 -416 43 United-States >50K \n", |
494 |
| - "1 1512 -253 40 United-States <=50K \n", |
495 |
| - "2 -2067 108 44 United-States <=50K \n", |
496 |
| - "3 -1209 99 54 United-States <=50K \n", |
497 |
| - "4 8987 338 42 United-States <=50K " |
| 493 | + "0 931 -620 30 United-States <=50K \n", |
| 494 | + "1 -736 109 36 United-States <=50K \n", |
| 495 | + "2 -2763 -213 34 United-States <=50K \n", |
| 496 | + "3 -183 115 56 United-States <=50K \n", |
| 497 | + "4 2797 -334 42 United-States <=50K " |
498 | 498 | ]
|
499 | 499 | },
|
500 | 500 | "execution_count": 7,
|
|
512 | 512 | "execution_count": 8,
|
513 | 513 | "metadata": {},
|
514 | 514 | "outputs": [
|
515 |
| - { |
516 |
| - "name": "stderr", |
517 |
| - "output_type": "stream", |
518 |
| - "text": [ |
519 |
| - "/home/xals/.virtualenvs/SDV/lib/python3.6/site-packages/scipy/stats/stats.py:248: RuntimeWarning: The input array could not be properly checked for nan values. nan values will be ignored.\n", |
520 |
| - " \"values. nan values will be ignored.\", RuntimeWarning)\n" |
521 |
| - ] |
522 |
| - }, |
523 | 515 | {
|
524 | 516 | "data": {
|
| 517 | + "text/html": [ |
| 518 | + "<div>\n", |
| 519 | + "<style scoped>\n", |
| 520 | + " .dataframe tbody tr th:only-of-type {\n", |
| 521 | + " vertical-align: middle;\n", |
| 522 | + " }\n", |
| 523 | + "\n", |
| 524 | + " .dataframe tbody tr th {\n", |
| 525 | + " vertical-align: top;\n", |
| 526 | + " }\n", |
| 527 | + "\n", |
| 528 | + " .dataframe thead th {\n", |
| 529 | + " text-align: right;\n", |
| 530 | + " }\n", |
| 531 | + "</style>\n", |
| 532 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 533 | + " <thead>\n", |
| 534 | + " <tr style=\"text-align: right;\">\n", |
| 535 | + " <th></th>\n", |
| 536 | + " <th>mse</th>\n", |
| 537 | + " <th>r2_score</th>\n", |
| 538 | + " <th>rmse</th>\n", |
| 539 | + " </tr>\n", |
| 540 | + " </thead>\n", |
| 541 | + " <tbody>\n", |
| 542 | + " <tr>\n", |
| 543 | + " <th>0</th>\n", |
| 544 | + " <td>212670.957588</td>\n", |
| 545 | + " <td>0.999414</td>\n", |
| 546 | + " <td>461.162615</td>\n", |
| 547 | + " </tr>\n", |
| 548 | + " </tbody>\n", |
| 549 | + "</table>\n", |
| 550 | + "</div>" |
| 551 | + ], |
525 | 552 | "text/plain": [
|
526 |
| - "mse 403.8003241991141\n", |
527 |
| - "rmse 20.094783507147174\n", |
528 |
| - "r2_score 0.9994524190196646\n", |
529 |
| - "dtype: object" |
| 553 | + " mse r2_score rmse\n", |
| 554 | + "0 212670.957588 0.999414 461.162615" |
530 | 555 | ]
|
531 | 556 | },
|
532 | 557 | "execution_count": 8,
|
|
539 | 564 | "\n",
|
540 | 565 | "samples = sdv.sample_all(len(tables['census']))\n",
|
541 | 566 | "\n",
|
542 |
| - "evaluate(tables, samples).astype(str)" |
| 567 | + "evaluate(samples, real=tables, metadata=sdv.metadata)" |
543 | 568 | ]
|
544 | 569 | }
|
545 | 570 | ],
|
|
0 commit comments