Skip to content

Commit b27bf8b

Browse files
authored
Merge pull request #3187 from biolab/datainfo-filename
[ENH] Data Info display data set name
2 parents 9813602 + 41c5f3b commit b27bf8b

1 file changed

Lines changed: 81 additions & 69 deletions

File tree

Orange/widgets/data/owdatainfo.py

Lines changed: 81 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -33,61 +33,82 @@ class Inputs:
3333
def __init__(self):
3434
super().__init__()
3535

36-
self.data(None)
37-
self.data_set_size = self.features = self.meta_attributes = ""
38-
self.location = ""
39-
for box in ("Data Set Size", "Features", "Targets", "Meta Attributes",
40-
"Location", "Data Attributes"):
36+
self._clear_fields()
37+
38+
for box in ("Data Set Name", "Data Set Size", "Features", "Targets",
39+
"Meta Attributes", "Location", "Data Attributes"):
4140
name = box.lower().replace(" ", "_")
4241
bo = gui.vBox(self.controlArea, box,
4342
addSpace=False and box != "Meta Attributes")
4443
gui.label(bo, self, "%%(%s)s" % name)
4544

4645
# ensure the widget has some decent minimum width.
47-
self.targets = "Discrete outcome with 123 values"
46+
self.targets = "Categorical outcome with 123 values"
4847
self.layout().activate()
4948
# NOTE: The minimum width is set on the 'contained' widget and
5049
# not `self`. The layout will set a fixed size to `self` taking
5150
# into account the minimum constraints of the children (it would
5251
# override any minimum/fixed size set on `self`).
52+
self.targets = ""
5353
self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
5454
self.layout().setSizeConstraint(QtWidgets.QLayout.SetFixedSize)
5555

56-
self.targets = ""
57-
self.data_attributes = ""
58-
self.data_desc = None
5956

6057
@Inputs.data
6158
def data(self, data):
62-
def n_or_none(i):
63-
return i or "(none)"
59+
if data is None:
60+
self._clear_fields()
61+
else:
62+
self._set_fields(data)
63+
self._set_report(data)
64+
65+
def _clear_fields(self):
66+
self.data_set_name = ""
67+
self.data_set_size = ""
68+
self.features = self.targets = self.meta_attributes = ""
69+
self.location = ""
70+
self.data_desc = None
71+
self.data_attributes = ""
72+
73+
@staticmethod
74+
def _count(s, tpe):
75+
return sum(isinstance(x, tpe) for x in s)
6476

65-
def count(s, tpe):
66-
return sum(isinstance(x, tpe) for x in s)
77+
def _set_fields(self, data):
78+
def n_or_none(n):
79+
return n or "-"
6780

6881
def pack_table(info):
6982
return '<table>\n' + "\n".join(
70-
'<tr><td align="right" width="90">%s:</td>\n'
71-
'<td width="40">%s</td></tr>\n' % (d, textwrap.shorten(str(v), width=30, placeholder="..."))
83+
'<tr><td align="right" width="90">{}:</td>\n'
84+
'<td width="40">{}</td></tr>\n'.format(
85+
d,
86+
textwrap.shorten(str(v), width=30, placeholder="..."))
7287
for d, v in info
7388
) + "</table>\n"
7489

75-
if data is None:
76-
self.data_set_size = "No data"
77-
self.features = self.targets = self.meta_attributes = "None"
78-
self.location = ""
79-
self.data_desc = None
80-
self.data_attributes = ""
81-
return
90+
def pack_counts(s, include_non_primitive=False):
91+
if not s:
92+
return "None"
93+
return pack_table(
94+
(name, n_or_none(self._count(s, type_)))
95+
for name, type_ in (
96+
("Categorical", DiscreteVariable),
97+
("Numeric", ContinuousVariable),
98+
("Text", StringVariable))[:2 + include_non_primitive]
99+
)
100+
101+
domain = data.domain
102+
class_var = domain.class_var
82103

83104
sparseness = [s for s, m in (("features", data.X_density),
84105
("meta attributes", data.metas_density),
85106
("targets", data.Y_density)) if m() > 1]
86107
if sparseness:
87-
sparseness = "<p>Sparse representation: %s</p>" % ", ".join(sparseness)
108+
sparseness = "<p>Sparse representation: {}</p>"\
109+
.format(", ".join(sparseness))
88110
else:
89111
sparseness = ""
90-
domain = data.domain
91112
self.data_set_size = pack_table((
92113
("Rows", '~{}'.format(data.approx_len())),
93114
("Columns", len(domain)+len(domain.metas)))) + sparseness
@@ -99,56 +120,50 @@ def update_size():
99120

100121
threading.Thread(target=update_size).start()
101122

102-
if not domain.attributes:
103-
self.features = "None"
104-
else:
105-
disc_features = count(domain.attributes, DiscreteVariable)
106-
cont_features = count(domain.attributes, ContinuousVariable)
107-
self.features = pack_table((
108-
("Discrete", n_or_none(disc_features)),
109-
("Numeric", n_or_none(cont_features))
110-
))
111-
112-
if not domain.metas:
113-
self.meta_attributes = "None"
114-
else:
115-
disc_metas = count(domain.metas, DiscreteVariable)
116-
cont_metas = count(domain.metas, ContinuousVariable)
117-
str_metas = count(domain.metas, StringVariable)
118-
self.meta_attributes = pack_table((
119-
("Discrete", n_or_none(disc_metas)),
120-
("Numeric", n_or_none(cont_metas)),
121-
("Textual", n_or_none(str_metas))))
123+
self.data_set_name = getattr(data, "name", "N/A")
122124

123-
class_var = domain.class_var
125+
self.features = pack_counts(domain.attributes)
126+
self.meta_attributes = pack_counts(domain.metas, True)
124127
if class_var:
125128
if class_var.is_continuous:
126129
self.targets = "Numeric target variable"
127130
else:
128-
self.targets = "Discrete outcome with %i values" % \
129-
len(class_var.values)
131+
self.targets = "Categorical outcome with {} values"\
132+
.format(len(class_var.values))
130133
elif domain.class_vars:
131-
disc_class = count(domain.class_vars, DiscreteVariable)
132-
cont_class = count(domain.class_vars, ContinuousVariable)
134+
disc_class = self._count(domain.class_vars, DiscreteVariable)
135+
cont_class = self._count(domain.class_vars, ContinuousVariable)
133136
if not cont_class:
134-
self.targets = "Multi-target data,\n%i categorical targets" % \
135-
n_or_none(disc_class)
137+
self.targets = "Multi-target data,\n{} categorical targets"\
138+
.format(n_or_none(disc_class))
136139
elif not disc_class:
137-
self.targets = "Multi-target data,\n%i numeric targets" % \
138-
n_or_none(cont_class)
140+
self.targets = "Multi-target data,\n{} numeric targets"\
141+
.format(n_or_none(cont_class))
139142
else:
140-
self.targets = "<p>Multi-target data</p>\n" + pack_table(
141-
(("Categorical", disc_class), ("Numeric", cont_class)))
143+
self.targets = "<p>Multi-target data</p>\n" + \
144+
pack_counts(domain.class_vars)
145+
else:
146+
self.targets = "None"
147+
148+
if data.attributes:
149+
self.data_attributes = pack_table(data.attributes.items())
150+
else:
151+
self.data_attributes = ""
152+
153+
def _set_report(self, data):
154+
domain = data.domain
155+
count = self._count
142156

143157
self.data_desc = dd = OrderedDict()
158+
dd["Name"] = self.data_set_name
144159

145160
if SqlTable is not None and isinstance(data, SqlTable):
146161
connection_string = ' '.join(
147-
'%s=%s' % (key, value)
162+
'{}={}'.format(key, value)
148163
for key, value in data.connection_params.items()
149164
if value is not None and key != 'password')
150-
self.location = "Table '%s', using connection:\n%s" % (
151-
data.table_name, connection_string)
165+
self.location = "Table '{}', using connection:\n{}"\
166+
.format(data.table_name, connection_string)
152167
dd["Rows"] = data.approx_len()
153168
else:
154169
self.location = "Data is stored in memory"
@@ -157,9 +172,9 @@ def update_size():
157172
def join_if(items):
158173
return ", ".join(s.format(n) for s, n in items if n)
159174

160-
dd["Features"] = len(domain.attributes) and join_if((
161-
("{} categorical", disc_features),
162-
("{} numeric", cont_features)
175+
dd["Features"] = len(domain.attributes) > 0 and join_if((
176+
("{} categorical", count(domain.attributes, DiscreteVariable)),
177+
("{} numeric", count(domain.attributes, ContinuousVariable))
163178
))
164179
if domain.class_var:
165180
name = domain.class_var.name
@@ -168,22 +183,19 @@ def join_if(items):
168183
else:
169184
dd["Target"] = "numeric target '{}'".format(name)
170185
elif domain.class_vars:
186+
disc_class = count(domain.class_vars, DiscreteVariable)
187+
cont_class = count(domain.class_vars, ContinuousVariable)
171188
tt = ""
172189
if disc_class:
173190
tt += report.plural("{number} categorical outcome{s}", disc_class)
174191
if cont_class:
175192
tt += report.plural("{number} numeric target{s}", cont_class)
176193
dd["Meta attributes"] = len(domain.metas) > 0 and join_if((
177-
("{} categorical", disc_metas),
178-
("{} numeric", cont_metas),
179-
("{} textual", str_metas)
194+
("{} categorical", count(domain.metas, DiscreteVariable)),
195+
("{} numeric", count(domain.metas, ContinuousVariable)),
196+
("{} text", count(domain.metas, StringVariable))
180197
))
181198

182-
if data.attributes:
183-
self.data_attributes = pack_table(data.attributes.items())
184-
else:
185-
self.data_attributes = ""
186-
187199
def send_report(self):
188200
if self.data_desc:
189201
self.report_items(self.data_desc)

0 commit comments

Comments
 (0)