-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrandom_and_cross_validation_split_examples.py
48 lines (39 loc) · 1.54 KB
/
random_and_cross_validation_split_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""
Author : Abdullah Al Masud\n
email : [email protected]\n
LICENSE : MIT License
"""
import pandas as pd
from sklearn.datasets import load_iris
import os
import sys
project_dir = os.getcwd()
sys.path.append(project_dir)
from msdlib import msd
# Creating data set for splitting that into Train, Validation and Test
loader = load_iris()
data = pd.DataFrame(loader['data'], columns=loader['feature_names'])
label = pd.Series(loader['target'])
print(data)
# Splitting object
# defining object as splitter
splitter = msd.SplitDataset(data, label, same_ratio=True, test_ratio=.1)
print('applying random split...')
outdata = splitter.random_split(val_ratio=.2)
print(outdata.keys())
print(outdata['train'].keys())
for _set in outdata:
print('%s data, label and index shapes:' % _set,
outdata[_set]['data'].shape, outdata[_set]['label'].shape, outdata[_set]['index'].shape)
print('\n\n applying cross validation split...')
outdata = splitter.cross_validation_split(fold=5)
print(outdata.keys())
print(outdata['train'].keys())
for _set in outdata:
if _set == 'test':
print('%s data, label and index shapes:' % _set,
outdata[_set]['data'].shape, outdata[_set]['label'].shape, outdata[_set]['index'].shape)
else:
for fold in outdata[_set]['label'].keys():
print('fold=%s; %s data, label and index shapes:' % (fold, _set),
outdata[_set]['data'][fold].shape, outdata[_set]['label'][fold].shape, outdata[_set]['index'][fold].shape)