-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsize-analysis-fast.py
102 lines (55 loc) · 2.25 KB
/
size-analysis-fast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python
# coding: utf-8
# In[3]:
import pandas as pd
df = pd.read_pickle("all_size_withyes.pkl")
df[:10]
df.shape
# In[4]:
df.sort_values("vmlinux")
# In[5]:
size_methods = ["vmlinux", "GZIP-bzImage", "GZIP-vmlinux", "GZIP", "BZIP2-bzImage",
"BZIP2-vmlinux", "BZIP2", "LZMA-bzImage", "LZMA-vmlinux", "LZMA", "XZ-bzImage", "XZ-vmlinux", "XZ",
"LZO-bzImage", "LZO-vmlinux", "LZO", "LZ4-bzImage", "LZ4-vmlinux", "LZ4"]
# In[7]:
import matplotlib.pyplot as plt
plt.figure()
pd.DataFrame(df['vmlinux']).plot.box()
plt.show(block=False)
plt.figure()
pd.DataFrame(df['LZO']).plot.box()
plt.show(block=False)
plt.figure()
pd.DataFrame(df['BZIP2']).plot.box()
plt.show(block=False)
df['vmlinux'].describe()
# In[ ]:
# In[9]:
import scipy.stats
import seaborn as sns
import numpy as np
def color_negative_positive(val, pcolor="green", ncolor="red"):
"""
Takes a scalar and returns a string with
the css property `'color: red'` for negative
strings, black otherwise.
"""
color = pcolor if val > 0 else ncolor
if val == 0:
color = 'black'
return 'color: %s' % color
compress_methods = ["GZIP", "BZIP2", "LZMA", "XZ", "LZO", "LZ4"]
def compareCompress(size_measure_of_interest): #"" # "-vmlinux" #"-bzImage" # prefix
rCompressDiff = pd.DataFrame(index=list(map(lambda c: c + "o", compress_methods)) , columns=compress_methods)
for compress_method in compress_methods:
for compress_method2 in compress_methods:
rCompressDiff.loc[compress_method + "o"][compress_method2] = (np.mean(df[compress_method + size_measure_of_interest] / df[compress_method2 + size_measure_of_interest]) * 100) - 100
return rCompressDiff
#cmy = sns.light_palette("red", as_cmap=True)
compareCompress("").style.set_caption('Difference (average in percentage) per compression methods').applymap(color_negative_positive)
# In[10]:
compareCompress("-vmlinux").style.set_caption('Difference (average in percentage) per compression methods, vmlinux').applymap(color_negative_positive)
# In[11]:
cm = sns.light_palette("green", as_cmap=True)
print(pd.DataFrame.corr(df[size_methods]).style.set_caption('Correlations between size measures').background_gradient(cmap=cm))
# In[ ]: