Skip to content

Commit d9f557b

Browse files
TreeViewer: Fix different trees being displayed for same dataset
OWTreeViewer was not deterministic in showing trees. This happened because the layout was updated w.r.t the graph node edges, which were stored in a set object, which is unordered. I assume that a set was used because speed is important, so I've implemented edges as an ordered dict, to keep the speed benefits as well as an ordering.
1 parent 1b1813c commit d9f557b

3 files changed

Lines changed: 72 additions & 10 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
target d1 d2
2+
d d d
3+
class
4+
A 2 2
5+
A 2 2
6+
B 1 1
7+
B 1 1

Orange/widgets/visualize/owtreeviewer2d.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from collections import OrderedDict
2+
13
from AnyQt.QtGui import (
24
QBrush, QPen, QColor, QPainter, QPainterPath, QTransform
35
)
@@ -20,16 +22,20 @@
2022

2123
class GraphNode:
2224
def __init__(self, *_, **kwargs):
23-
self.edges = kwargs.get("edges", set())
25+
# Implement edges as an ordered dict to get the nice speed benefits as
26+
# well as adding ordering, which we need to make trees deterministic
27+
self.__edges = kwargs.get("edges", OrderedDict())
2428

2529
def graph_edges(self):
26-
return self.edges
30+
"""Get a list of the edges that stem from the node."""
31+
return self.__edges.keys()
2732

2833
def graph_add_edge(self, edge):
29-
self.edges.add(edge)
34+
"""Add an edge stemming from the node."""
35+
self.__edges[edge] = 0
3036

3137
def __iter__(self):
32-
for edge in self.edges:
38+
for edge in self.__edges.keys():
3339
yield edge.node2
3440

3541
def graph_nodes(self, atype=1):
@@ -185,7 +191,7 @@ def itemChange(self, change, value):
185191

186192
# noinspection PyCallByClass,PyTypeChecker
187193
def update_edge(self):
188-
for edge in self.edges:
194+
for edge in self.graph_edges():
189195
if edge.node1 is self:
190196
QTimer.singleShot(0, edge.update_ends)
191197
elif edge.node2 is self:
@@ -274,16 +280,17 @@ def fix_pos(self, node=None, x=10, y=10):
274280
self.update()
275281

276282
def _fix_pos(self, node, x, y):
283+
"""Fix the position of the tree stemming from the given node."""
277284
def brect(node):
285+
"""Get the bounding box of the parent rect and all its children."""
278286
return node.boundingRect() | node.childrenBoundingRect()
279287

280288
if node.branches and node.isOpen:
281289
for n in node.branches:
282-
x, ry = self._fix_pos(n, x,
283-
y + self._VSPACING + brect(node).height())
290+
x, ry = self._fix_pos(n, x, y + self._VSPACING + brect(node).height())
284291
x = (node.branches[0].pos().x() + node.branches[-1].pos().x()) / 2
285292
node.setPos(x, y)
286-
for e in node.edges:
293+
for e in node.graph_edges():
287294
e.update_ends()
288295
else:
289296
node.setPos(self.gx, y)

Orange/widgets/visualize/tests/test_owtreegraph.py

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Test methods with long descriptive names can omit docstrings
22
# pylint: disable=missing-docstring
3+
from os import path
4+
35
from Orange.classification import TreeLearner
6+
from Orange.data import Table
47
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin
5-
from Orange.widgets.visualize.owtreeviewer import \
6-
OWTreeGraph
8+
from Orange.widgets.visualize.owtreeviewer import OWTreeGraph
79

810

911
class TestOWTreeGraph(WidgetTest, WidgetOutputsTestMixin):
@@ -19,6 +21,12 @@ def setUpClass(cls):
1921
cls.signal_name = "Tree"
2022
cls.signal_data = cls.model
2123

24+
# Load a dataset that contains two variables with the same entropy
25+
data_same_entropy = Table(path.join(
26+
path.dirname(__file__), "../../tests/datasets/same_entropy.tab"))
27+
cls.data_same_entropy = tree(data_same_entropy)
28+
cls.data_same_entropy.instances = data_same_entropy
29+
2230
def setUp(self):
2331
self.widget = self.create_widget(OWTreeGraph)
2432

@@ -35,3 +43,43 @@ def test_target_class_changed(self):
3543
self.widget.color_combo.activated.emit(1)
3644
self.widget.color_combo.setCurrentIndex(1)
3745
self.assertNotEqual(nodes[0].toPlainText(), text)
46+
47+
def test_tree_determinism(self):
48+
"""Check that the tree is drawn identically upon receiving the same
49+
dataset with no parameter changes."""
50+
n_tries = 10
51+
52+
def _check_all_same(data):
53+
"""Check that all the elements within an iterable are identical."""
54+
iterator = iter(data)
55+
try:
56+
first = next(iterator)
57+
except StopIteration:
58+
return True
59+
return all(first == rest for rest in iterator)
60+
61+
# Make sure the tree are deterministic for iris
62+
scene_nodes = []
63+
for i in range(n_tries):
64+
self.send_signal(self.signal_name, self.signal_data)
65+
scene_nodes.append([n.pos() for n in self.widget.scene.nodes()])
66+
for node_row in zip(*scene_nodes):
67+
self.assertTrue(
68+
_check_all_same(node_row),
69+
"The tree was not drawn identically in the %d times it was "
70+
"sent to widget after receiving the iris dataset." % n_tries
71+
)
72+
73+
# Make sure trees are deterministic with data where some variables have
74+
# the same entropy
75+
scene_nodes = []
76+
for i in range(n_tries):
77+
self.send_signal(self.signal_name, self.data_same_entropy)
78+
scene_nodes.append([n.pos() for n in self.widget.scene.nodes()])
79+
for node_row in zip(*scene_nodes):
80+
self.assertTrue(
81+
_check_all_same(node_row),
82+
"The tree was not drawn identically in the %d times it was "
83+
"sent to widget after receiving a dataset with variables with "
84+
"same entropy." % n_tries
85+
)

0 commit comments

Comments
 (0)