From 1e633cdb377cca0b1332aae933ee0ff8de5022e2 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 13:47:14 +0200 Subject: [PATCH 001/105] WIP: add new learner concept and example notebook --- adaptive/learner/new_learnerND.py | 411 ++++++++++++++++++++++++++++++ proof-of-concept-learner.ipynb | 103 ++++++++ 2 files changed, 514 insertions(+) create mode 100644 adaptive/learner/new_learnerND.py create mode 100644 proof-of-concept-learner.ipynb diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py new file mode 100644 index 000000000..a5159502a --- /dev/null +++ b/adaptive/learner/new_learnerND.py @@ -0,0 +1,411 @@ +from math import sqrt +import itertools + +import numpy as np +import sortedcontainers + +from adaptive.learner.base_learner import BaseLearner +from adaptive.notebook_integration import ensure_holoviews + + +class Domain: + def insert_points(self, subdomain, n): + "Insert 'n' points into 'subdomain'." + + def insert_into(self, subdomain, x): + """Insert 'x' into 'subdomain'. + + Raises + ------ + ValueError : if x is outside of subdomain or on its boundary + """ + + def split_at(self, x): + """Split the domain at 'x'. + + Returns + ------- + old_subdomains : list of subdomains + The subdomains that were removed when splitting at 'x'. + new_subdomains : list of subdomains + The subdomains that were added when splitting at 'x'. + """ + + def which_subdomain(self, x): + """Return the subdomain that contains 'x'. + + Raises + ------ + ValueError: if x is on a subdomain boundary + """ + + def neighbors(self, subdomain, n=1): + "Return all neighboring subdomains up to degree 'n'." + + def subdomains(self): + "Return all the subdomains in the domain." + + def clear_subdomains(self): + "Remove all points from the interior of subdomains." + + def volume(self, subdomain): + "Return the volume of a subdomain." + + def subvolumes(self, subdomain): + "Return the volumes of the sub-subdomains." + + +class Interval(Domain): + """A 1D domain (an interval). + + Subdomains are pairs of floats (a, b). + """ + + def __init__(self, a, b): + if a >= b: + raise ValueError("'a' must be less than 'b'") + + # If a sub-interval contains points in its interior, they are stored + # in 'sub_intervals' in a SortedList. + self.bounds = (a, b) + self.sub_intervals = dict() + self.points = sortedcontainers.SortedList([a, b]) + + def insert_points(self, subdomain, n, *, _check_membership=True): + if _check_membership and subdomain not in self: + raise ValueError("{} is not present in this interval".format(subdomain)) + try: + p = self.sub_intervals[subdomain] + except KeyError: # first point in the interior of this subdomain + a, b = subdomain + points = np.linspace(a, b, 2 + n) + self.sub_intervals[subdomain] = sortedcontainers.SortedList(points) + return [(x,) for x in points[1:-1]] + + # XXX: allow this + if n != 1: + raise ValueError("Can't add more than one point to a full subinterval") + + subsubdomains = zip(p, p.islice(1)) + a, b = max(subsubdomains, key=lambda ival: ival[1] - ival[0]) + m = (b - a) / 2 + p.add(m) + return [(m,)] + + def insert_into(self, subdomain, x, *, _check_membership=True): + x, = x + a, b = subdomain + if _check_membership: + if subdomain not in self: + raise ValueError("{} is not present in this interval".format(subdomain)) + if not (a < x < b): + raise ValueError("{} is not in ({}, {})".format(x, a, b)) + + try: + p = self.sub_intervals[subdomain] + except KeyError: + self.sub_intervals[subdomain] = sortedcontainers.SortedList([a, x, b]) + else: + p.add(x) + + def split_at(self, x, *, _check_membership=True): + x, = x + a, b = self.bounds + if _check_membership: + if not (a < x < b): + raise ValueError("Can only split at points within the interval") + if x in self.points: + raise ValueError("Cannot split at an existing point") + + p = self.points + i = p.bisect_left(x) + a, b = old_interval = p[i - 1], p[i] + new_intervals = [(a, x), (x, b)] + + p.add(x) + try: + sub_points = self.sub_intervals.pop(old_interval) + except KeyError: + pass + else: # update sub_intervals + for ival in new_intervals: + new_sub_points = sortedcontainers.SortedList(sub_points.irange(*ival)) + if x not in new_sub_points: + new_sub_points.add(x) + if len(new_sub_points) > 2: + self.sub_intervals[ival] = new_sub_points + + return [old_interval], new_intervals + + def which_subdomain(self, x): + x, = x + a, b = self.bounds + if not (a <= x <= b): + raise ValueError("{} is outside the interval".format(x)) + p = self.points + i = p.bisect_left(x) + if p[i] == x: + raise ValueError("{} belongs to 2 subdomains".format(x)) + return (p[i], p[i + 1]) + + def __contains__(self, subdomain): + a, b = subdomain + try: + ia = self.points.index(a) + ib = self.points.index(b) + except ValueError: + return False + return ia + 1 == ib + + def neighbors(self, subdomain, n=1): + "Return all neighboring subdomains up to degree 'n'." + a, b = subdomain + p = self.points + ia = p.index(a) + neighbors = [] + for i in range(n): + if ia - i > 0: # left neighbor exists + neighbors.append((p[ia - i - 1], p[ia - i])) + if ia + i < len(p) - 2: # right neighbor exists + neighbors.append((p[ia + i + 1], p[ia + i + 2])) + return neighbors + + def points(self, subdomain): + "Return all the points that define a given subdomain." + try: + return self.sub_intervals[subdomain] + except KeyError: + return subdomain + + def subdomains(self): + "Return all the subdomains in the domain." + p = self.points + return zip(p, p.islice(1)) + + def clear_subdomains(self): + self.sub_intervals = dict() + + def volume(self, subdomain): + "Return the volume of a subdomain" + a, b = subdomain + return b - a + + def subvolumes(self, subdomain): + "Return the volumes of the sub-subdomains." + try: + p = self.sub_intervals[subdomain] + except KeyError: + return [self.volume(subdomain)] + else: + return [self.volume(s) for s in zip(p, p.islice(1))] + + +class Queue: + "Priority queue supporting update and removal at arbitrary position." + + def __init__(self): + self.queue = sortedcontainers.SortedDict() + # 'self.queue' cannot be keyed only on priority, as there may be several + # items that have the same priority. To keep unique elements the key + # will be '(priority, self.n)', where 'self.n' is incremented whenever + # we add a new element. + self.n = 0 + # To efficiently support updating and removing items if their priority + # is unknown we have to keep the reverse map of 'self.queue'. Because + # items may not be hashable we cannot use a SortedDict, so we use a + # SortedList storing '(item, key)'. + self.items = sortedcontainers.SortedList() + + def peek(self): + "Return the item and priority at the front of the queue." + ((priority, _), item) = self.queue.peekitem() + return item, priority + + def pop(self): + "Remove and return the item and priority at the front of the queue." + (key, item) = self.queue.popitem() + i = self.items.index((item, key)) + del self.items[i] + priority, _ = key + return item, priority + + def insert(self, item, priority): + "Insert 'item' into the queue with the given priority." + key = (priority, self.n) + self.items.add((item, key)) + self.queue[key] = item + self.n += 1 + + def remove(self, item): + "Remove the 'item' from the queue." + i = self.items.bisect_left((item, ())) + should_be, key = self.items[i] + if item != should_be: + raise KeyError("item is not in queue") + + del self.queue[key] + del self.items[i] + + def update(self, item, new_priority): + """Update 'item' in the queue with the given priority. + + Raises + ------ + KeyError : if 'item' is not in the queue. + """ + i = self.items.bisect_left((item, ())) + should_be, key = self.items[i] + if item != should_be: + raise KeyError("item is not in queue") + + _, n = key + new_key = (new_priority, n) + + del self.queue[key] + self.queue[new_key] = item + del self.items[i] + self.items.add((item, new_key)) + + +class LossFunction: + @property + def n_neighbors(self): + "The maximum degree of neighboring subdomains required." + + def __call__(self, domain, subdomain, data): + """Return the loss for 'subdomain' given 'data' + + Neighboring subdomains can be obtained with + 'domain.neighbors(subdomain, self.n_neighbors)'. + """ + + +class DistanceLoss(LossFunction): + @property + def n_neighbors(self): + return 0 + + def __call__(self, domain, subdomain, data): + # XXX: this is specialised to 1D + a, b = subdomain + ya, yb = data[(a,)], data[(b,)] + return sqrt((b - a) ** 2 + (yb - ya) ** 2) + + +def _scaled_loss(loss, domain, subdomain, data): + subvolumes = domain.subvolumes(subdomain) + max_relative_subvolume = max(subvolumes) / sum(subvolumes) + L_0 = loss(domain, subdomain, data) + return max_relative_subvolume * L_0 + + +class LearnerND(BaseLearner): + def __init__(self, f, bounds, loss=None): + + if len(bounds) == 1: + (a, b), = bounds + self.domain = Interval(a, b) + self.loss = loss or DistanceLoss() + else: + raise ValueError("Can only handle 1D functions for now") + + self.queue = Queue() + self.data = dict() + self.function = f + + # Evaluate boundary points right away to avoid handling edge + # cases in the ask and tell logic + bound_points = sorted(map(tuple, itertools.product(*bounds))) + for x in bound_points: + self.data[x] = f(x) + + try: + self.vdim = len(np.squeeze(self.data[x])) + except TypeError: # Trying to take the length of a number + self.vdim = 1 + + d, = self.domain.subdomains() + self.queue.insert(d, priority=self.loss(self.domain, d, self.data)) + + def ask(self, n, tell_pending=True): + if not tell_pending: + # XXX: handle this case + raise RuntimeError("tell_pending=False not supported yet") + new_points = [] + new_losses = [] + for _ in range(n): + subdomain, _ = self.queue.pop() + new_point, = self.domain.insert_points(subdomain, 1) + self.data[new_point] = None + new_loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + self.queue.insert(subdomain, priority=new_loss) + new_points.append(new_point) + new_losses.append(new_loss) + return new_points, new_losses + + def tell_pending(self, x): + self.data[x] = None + subdomain = self.domain.which_subdomain(x) + self.domain.insert_into(subdomain, x) + loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + self.queue.update(subdomain, priority=loss) + + def tell_many(self, xs, ys): + for x, y in zip(xs, ys): + self.data[x] = y + + old = set() + new = set() + for x in xs: + old_subdomains, new_subdomains = self.domain.split_at(x) + old.update(old_subdomains) + new.update(new_subdomains) + # remove any subdomains that were new at some point but are now old + new -= old + + for subdomain in old: + self.queue.remove(subdomain) + for subdomain in new: + loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + self.queue.insert(subdomain, priority=loss) + + if self.loss.n_neighbors > 0: + subdomains_to_update = sum( + (set(self.domain.neighbors(d, self.loss.n_neighbors)) for d in new), + set(), + ) + subdomains_to_update -= new + for subdomain in subdomains_to_update: + loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + self.queue.update(subdomain, priority=loss) + + def remove_unfinished(self): + self.data = {k: v for k, v in self.data.items() if v is not None} + self.domain.clear_subdomains() + + def loss(self): + _, loss = self.queue.peek() + return loss + + def plot(self): + # XXX: specialized to 1D + hv = ensure_holoviews() + + xs, ys = zip(*sorted(self.data.items())) if self.data else ([], []) + if self.vdim == 1: + p = hv.Path([]) * hv.Scatter((xs, ys)) + else: + p = hv.Path((xs, ys)) * hv.Scatter([]) + + # Plot with 5% empty margins such that the boundary points are visible + a, b = self.domain.bounds + margin = 0.05 * (b - a) + plot_bounds = (a - margin, b + margin) + + return p.redim(x=dict(range=plot_bounds)) + + def _get_data(self): + pass + + def _set_data(self, data): + pass diff --git a/proof-of-concept-learner.ipynb b/proof-of-concept-learner.ipynb new file mode 100644 index 000000000..ffa3a8cd9 --- /dev/null +++ b/proof-of-concept-learner.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "from sortedcontainers import SortedList\n", + "import adaptive\n", + "from adaptive.learner.new_learnerND import LearnerND\n", + "\n", + "adaptive.notebook_extension()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "offset = random.uniform(-0.5, 0.5)\n", + "\n", + "def peak(x, offset=offset):\n", + " a = 0.02\n", + " return x + a**2 / (a**2 + (x - offset)**2)\n", + "\n", + "def peak2(x, offset=offset):\n", + " return peak(x[0], offset=offset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\\learner = adaptive.Learner1D(peak, (-1, 1))\n", + "adaptive.runner.simple(learner, goal=lambda l: len(l.data) > 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner2 = LearnerND(peak2, [(-1, 1)])\n", + "adaptive.runner.simple(learner2, goal=lambda l: len(l.data) > 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner.plot() + learner2.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%time adaptive.runner.simple(learner, goal=lambda l: len(l.data) > 10000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%time adaptive.runner.simple(learner2, goal=lambda l: len(l.data) > 10000)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "adaptive", + "language": "python", + "name": "adaptive" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From a5a8f5fb3dc1bdedb0dbc911e693988c64e4b956 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 14:12:25 +0200 Subject: [PATCH 002/105] WIP: correct implementation of unfinished point removal --- adaptive/learner/new_learnerND.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index a5159502a..89099c9f1 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -46,7 +46,12 @@ def subdomains(self): "Return all the subdomains in the domain." def clear_subdomains(self): - "Remove all points from the interior of subdomains." + """Remove all points from the interior of subdomains. + + Returns + ------- + subdomains : the subdomains who's interior points were removed + """ def volume(self, subdomain): "Return the volume of a subdomain." @@ -183,7 +188,9 @@ def subdomains(self): return zip(p, p.islice(1)) def clear_subdomains(self): + subdomains = list(self.sub_intervals.keys()) self.sub_intervals = dict() + return subdomains def volume(self, subdomain): "Return the volume of a subdomain" @@ -381,7 +388,11 @@ def tell_many(self, xs, ys): def remove_unfinished(self): self.data = {k: v for k, v in self.data.items() if v is not None} - self.domain.clear_subdomains() + cleared_subdomains = self.domain.clear_subdomains() + # Subdomains who had internal points removed need their losses updating + for subdomain in cleared_subdomains: + loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + self.queue.update(subdomain, priority=loss) def loss(self): _, loss = self.queue.peek() From d7cb56c1f12fd0fd83d5864e8f47782199539f4c Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 15:23:34 +0200 Subject: [PATCH 003/105] add necessary information to scale data for loss functions that want to --- adaptive/learner/new_learnerND.py | 38 +++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 89099c9f1..fac6a8770 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -39,6 +39,9 @@ def which_subdomain(self, x): ValueError: if x is on a subdomain boundary """ + def transform(self, x): + "Transform 'x' to the unit hypercube" + def neighbors(self, subdomain, n=1): "Return all neighboring subdomains up to degree 'n'." @@ -162,6 +165,10 @@ def __contains__(self, subdomain): return False return ia + 1 == ib + def transform(self, x): + a, b = self.bounds + return (x - a) / (b - a) + def neighbors(self, subdomain, n=1): "Return all neighboring subdomains up to degree 'n'." a, b = subdomain @@ -292,17 +299,17 @@ class DistanceLoss(LossFunction): def n_neighbors(self): return 0 - def __call__(self, domain, subdomain, data): + def __call__(self, domain, subdomain, codomain_bounds, data): # XXX: this is specialised to 1D a, b = subdomain ya, yb = data[(a,)], data[(b,)] return sqrt((b - a) ** 2 + (yb - ya) ** 2) -def _scaled_loss(loss, domain, subdomain, data): +def _scaled_loss(loss, domain, subdomain, codomain_bounds, data): subvolumes = domain.subvolumes(subdomain) max_relative_subvolume = max(subvolumes) / sum(subvolumes) - L_0 = loss(domain, subdomain, data) + L_0 = loss(domain, subdomain, codomain_bounds, data) return max_relative_subvolume * L_0 @@ -324,15 +331,23 @@ def __init__(self, f, bounds, loss=None): # cases in the ask and tell logic bound_points = sorted(map(tuple, itertools.product(*bounds))) for x in bound_points: + y = f(x) self.data[x] = f(x) + vals = list(self.data.values()) + self.codomain_bounds = ( + np.min(vals, axis=0), + np.max(vals, axis=0), + ) + try: self.vdim = len(np.squeeze(self.data[x])) except TypeError: # Trying to take the length of a number self.vdim = 1 d, = self.domain.subdomains() - self.queue.insert(d, priority=self.loss(self.domain, d, self.data)) + loss = self.loss(self.domain, d, self.codomain_bounds, self.data) + self.queue.insert(d, priority=loss) def ask(self, n, tell_pending=True): if not tell_pending: @@ -344,7 +359,8 @@ def ask(self, n, tell_pending=True): subdomain, _ = self.queue.pop() new_point, = self.domain.insert_points(subdomain, 1) self.data[new_point] = None - new_loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + new_loss = _scaled_loss(self.loss, self.domain, subdomain, + self.codomain_bounds, self.data) self.queue.insert(subdomain, priority=new_loss) new_points.append(new_point) new_losses.append(new_loss) @@ -354,7 +370,8 @@ def tell_pending(self, x): self.data[x] = None subdomain = self.domain.which_subdomain(x) self.domain.insert_into(subdomain, x) - loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + loss = _scaled_loss(self.loss, self.domain, subdomain, + self.codomain_bounds, self.data) self.queue.update(subdomain, priority=loss) def tell_many(self, xs, ys): @@ -373,7 +390,8 @@ def tell_many(self, xs, ys): for subdomain in old: self.queue.remove(subdomain) for subdomain in new: - loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + loss = _scaled_loss(self.loss, self.domain, subdomain, + self.codomain_bounds, self.data) self.queue.insert(subdomain, priority=loss) if self.loss.n_neighbors > 0: @@ -383,7 +401,8 @@ def tell_many(self, xs, ys): ) subdomains_to_update -= new for subdomain in subdomains_to_update: - loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + loss = _scaled_loss(self.loss, self.domain, subdomain, + self.codomain_bounds, self.data) self.queue.update(subdomain, priority=loss) def remove_unfinished(self): @@ -391,7 +410,8 @@ def remove_unfinished(self): cleared_subdomains = self.domain.clear_subdomains() # Subdomains who had internal points removed need their losses updating for subdomain in cleared_subdomains: - loss = _scaled_loss(self.loss, self.domain, subdomain, self.data) + loss = _scaled_loss(self.loss, self.domain, subdomain, + self.codomain_bounds, self.data) self.queue.update(subdomain, priority=loss) def loss(self): From a288b646a61808bbfcc4c72e2c45248e33ad8d37 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 16:19:40 +0200 Subject: [PATCH 004/105] start internal queue attributes with underscore --- adaptive/learner/new_learnerND.py | 68 +++++++++++++++++++------------ 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index fac6a8770..2fcfe6b4b 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -215,50 +215,66 @@ def subvolumes(self, subdomain): class Queue: - "Priority queue supporting update and removal at arbitrary position." + """Priority queue supporting update and removal at arbitrary position. - def __init__(self): - self.queue = sortedcontainers.SortedDict() - # 'self.queue' cannot be keyed only on priority, as there may be several + Parameters + ---------- + entries : iterable of (item, priority) + The initial data in the queue. Providing this is faster than + calling 'insert' a bunch of times. + """ + + def __init__(self, entries=()): + self._queue = sortedcontainers.SortedDict( + ((priority, n), item) + for n, (item, priority) in enumerate(entries) + ) + # 'self._queue' cannot be keyed only on priority, as there may be several # items that have the same priority. To keep unique elements the key - # will be '(priority, self.n)', where 'self.n' is incremented whenever + # will be '(priority, self._n)', where 'self._n' is incremented whenever # we add a new element. - self.n = 0 + self._n = len(self._queue) # To efficiently support updating and removing items if their priority - # is unknown we have to keep the reverse map of 'self.queue'. Because + # is unknown we have to keep the reverse map of 'self._queue'. Because # items may not be hashable we cannot use a SortedDict, so we use a # SortedList storing '(item, key)'. - self.items = sortedcontainers.SortedList() + self._items = sortedcontainers.SortedList( + ((v, k) for k, v in self._queue.items()) + ) + + def items(self): + "Return an iterator over the items in the queue in priority order." + return reversed(self._queue.values()) def peek(self): "Return the item and priority at the front of the queue." - ((priority, _), item) = self.queue.peekitem() + ((priority, _), item) = self._queue.peekitem() return item, priority def pop(self): "Remove and return the item and priority at the front of the queue." - (key, item) = self.queue.popitem() - i = self.items.index((item, key)) - del self.items[i] + (key, item) = self._queue.popitem() + i = self._items.index((item, key)) + del self._items[i] priority, _ = key return item, priority def insert(self, item, priority): "Insert 'item' into the queue with the given priority." - key = (priority, self.n) - self.items.add((item, key)) - self.queue[key] = item - self.n += 1 + key = (priority, self._n) + self._items.add((item, key)) + self._queue[key] = item + self._n += 1 def remove(self, item): "Remove the 'item' from the queue." - i = self.items.bisect_left((item, ())) - should_be, key = self.items[i] + i = self._items.bisect_left((item, ())) + should_be, key = self._items[i] if item != should_be: raise KeyError("item is not in queue") - del self.queue[key] - del self.items[i] + del self._queue[key] + del self._items[i] def update(self, item, new_priority): """Update 'item' in the queue with the given priority. @@ -267,18 +283,18 @@ def update(self, item, new_priority): ------ KeyError : if 'item' is not in the queue. """ - i = self.items.bisect_left((item, ())) - should_be, key = self.items[i] + i = self._items.bisect_left((item, ())) + should_be, key = self._items[i] if item != should_be: raise KeyError("item is not in queue") _, n = key new_key = (new_priority, n) - del self.queue[key] - self.queue[new_key] = item - del self.items[i] - self.items.add((item, new_key)) + del self._queue[key] + self._queue[new_key] = item + del self._items[i] + self._items.add((item, new_key)) class LossFunction: From f2d6125297aeba429186f6f7acf56c961a984544 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 16:21:13 +0200 Subject: [PATCH 005/105] recalculate losses when scale increases by a certain factor --- adaptive/learner/new_learnerND.py | 69 ++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 2fcfe6b4b..2362993f3 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -351,10 +351,12 @@ def __init__(self, f, bounds, loss=None): self.data[x] = f(x) vals = list(self.data.values()) - self.codomain_bounds = ( - np.min(vals, axis=0), - np.max(vals, axis=0), - ) + codomain_min = np.min(vals, axis=0) + codomain_max = np.max(vals, axis=0) + self.codomain_bounds = (codomain_min, codomain_max) + self.codomain_scale_at_last_update = codomain_max - codomain_min + + self.need_loss_update_factor = 1.1 try: self.vdim = len(np.squeeze(self.data[x])) @@ -394,6 +396,8 @@ def tell_many(self, xs, ys): for x, y in zip(xs, ys): self.data[x] = y + need_loss_update = self._update_codomain_bounds(ys) + old = set() new = set() for x in xs: @@ -405,21 +409,56 @@ def tell_many(self, xs, ys): for subdomain in old: self.queue.remove(subdomain) - for subdomain in new: - loss = _scaled_loss(self.loss, self.domain, subdomain, - self.codomain_bounds, self.data) - self.queue.insert(subdomain, priority=loss) - if self.loss.n_neighbors > 0: - subdomains_to_update = sum( - (set(self.domain.neighbors(d, self.loss.n_neighbors)) for d in new), - set(), + if need_loss_update: + # Need to recalculate all losses anyway + subdomains = itertools.chain(self.queue.items(), new) + self.queue = Queue( + (subdomain, _scaled_loss(self.loss, self.domain, subdomain, + self.codomain_bounds, self.data)) + for subdomain in itertools.chain(self.queue.items(), new) ) - subdomains_to_update -= new - for subdomain in subdomains_to_update: + else: + # Compute the losses for the new subdomains and re-compute the + # losses for the neighboring subdomains, if necessary. + for subdomain in new: loss = _scaled_loss(self.loss, self.domain, subdomain, self.codomain_bounds, self.data) - self.queue.update(subdomain, priority=loss) + self.queue.insert(subdomain, priority=loss) + + if self.loss.n_neighbors > 0: + subdomains_to_update = sum( + (set(self.domain.neighbors(d, self.loss.n_neighbors)) for d in new), + set(), + ) + subdomains_to_update -= new + for subdomain in subdomains_to_update: + loss = _scaled_loss(self.loss, self.domain, subdomain, + self.codomain_bounds, self.data) + self.queue.update(subdomain, priority=loss) + + def _update_codomain_bounds(self, ys): + mn, mx = self.codomain_bounds + if self.vdim == 1: + mn = min(mn, *ys) + mx = max(mx, *ys) + else: + mn = np.min(np.vstack([mn, ys]), axis=0) + mx = np.max(np.vstack([mx, ys]), axis=0) + self.codomain_bounds = (mn, mx) + + scale = mx - mn + + scale_factor = (scale / self.codomain_scale_at_last_update) + if self.vdim == 1: + need_loss_update = scale_factor > self.need_loss_update_factor + else: + need_loss_update = np.any(scale_factor > self.need_loss_update_factor) + if need_loss_update: + self.codomain_scale_at_last_update = scale + return True + else: + return False def remove_unfinished(self): self.data = {k: v for k, v in self.data.items() if v is not None} From 5d3c2b10a807a370cc44a4a8d2051ce2a5f41f23 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 16:31:16 +0200 Subject: [PATCH 006/105] blackify --- adaptive/learner/new_learnerND.py | 46 +++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 2362993f3..f60526dbd 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -226,8 +226,7 @@ class Queue: def __init__(self, entries=()): self._queue = sortedcontainers.SortedDict( - ((priority, n), item) - for n, (item, priority) in enumerate(entries) + ((priority, n), item) for n, (item, priority) in enumerate(entries) ) # 'self._queue' cannot be keyed only on priority, as there may be several # items that have the same priority. To keep unique elements the key @@ -377,8 +376,9 @@ def ask(self, n, tell_pending=True): subdomain, _ = self.queue.pop() new_point, = self.domain.insert_points(subdomain, 1) self.data[new_point] = None - new_loss = _scaled_loss(self.loss, self.domain, subdomain, - self.codomain_bounds, self.data) + new_loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) self.queue.insert(subdomain, priority=new_loss) new_points.append(new_point) new_losses.append(new_loss) @@ -388,8 +388,9 @@ def tell_pending(self, x): self.data[x] = None subdomain = self.domain.which_subdomain(x) self.domain.insert_into(subdomain, x) - loss = _scaled_loss(self.loss, self.domain, subdomain, - self.codomain_bounds, self.data) + loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) self.queue.update(subdomain, priority=loss) def tell_many(self, xs, ys): @@ -414,16 +415,25 @@ def tell_many(self, xs, ys): # Need to recalculate all losses anyway subdomains = itertools.chain(self.queue.items(), new) self.queue = Queue( - (subdomain, _scaled_loss(self.loss, self.domain, subdomain, - self.codomain_bounds, self.data)) + ( + subdomain, + _scaled_loss( + self.loss, + self.domain, + subdomain, + self.codomain_bounds, + self.data, + ), + ) for subdomain in itertools.chain(self.queue.items(), new) ) else: # Compute the losses for the new subdomains and re-compute the # losses for the neighboring subdomains, if necessary. for subdomain in new: - loss = _scaled_loss(self.loss, self.domain, subdomain, - self.codomain_bounds, self.data) + loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) self.queue.insert(subdomain, priority=loss) if self.loss.n_neighbors > 0: @@ -433,8 +443,13 @@ def tell_many(self, xs, ys): ) subdomains_to_update -= new for subdomain in subdomains_to_update: - loss = _scaled_loss(self.loss, self.domain, subdomain, - self.codomain_bounds, self.data) + loss = _scaled_loss( + self.loss, + self.domain, + subdomain, + self.codomain_bounds, + self.data, + ) self.queue.update(subdomain, priority=loss) def _update_codomain_bounds(self, ys): @@ -449,7 +464,7 @@ def _update_codomain_bounds(self, ys): scale = mx - mn - scale_factor = (scale / self.codomain_scale_at_last_update) + scale_factor = scale / self.codomain_scale_at_last_update if self.vdim == 1: need_loss_update = scale_factor > self.need_loss_update_factor else: @@ -465,8 +480,9 @@ def remove_unfinished(self): cleared_subdomains = self.domain.clear_subdomains() # Subdomains who had internal points removed need their losses updating for subdomain in cleared_subdomains: - loss = _scaled_loss(self.loss, self.domain, subdomain, - self.codomain_bounds, self.data) + loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) self.queue.update(subdomain, priority=loss) def loss(self): From 9db175a3969d2d517b11ea88d64691c00e19717d Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 20:58:05 +0200 Subject: [PATCH 007/105] make 1D points use floats rather than length-1 tuples --- adaptive/learner/new_learnerND.py | 13 ++++--------- proof-of-concept-learner.ipynb | 9 +++------ 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index f60526dbd..a98752413 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -88,7 +88,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): a, b = subdomain points = np.linspace(a, b, 2 + n) self.sub_intervals[subdomain] = sortedcontainers.SortedList(points) - return [(x,) for x in points[1:-1]] + return points[1:-1] # XXX: allow this if n != 1: @@ -98,10 +98,9 @@ def insert_points(self, subdomain, n, *, _check_membership=True): a, b = max(subsubdomains, key=lambda ival: ival[1] - ival[0]) m = (b - a) / 2 p.add(m) - return [(m,)] + return [m] def insert_into(self, subdomain, x, *, _check_membership=True): - x, = x a, b = subdomain if _check_membership: if subdomain not in self: @@ -117,7 +116,6 @@ def insert_into(self, subdomain, x, *, _check_membership=True): p.add(x) def split_at(self, x, *, _check_membership=True): - x, = x a, b = self.bounds if _check_membership: if not (a < x < b): @@ -146,7 +144,6 @@ def split_at(self, x, *, _check_membership=True): return [old_interval], new_intervals def which_subdomain(self, x): - x, = x a, b = self.bounds if not (a <= x <= b): raise ValueError("{} is outside the interval".format(x)) @@ -317,7 +314,7 @@ def n_neighbors(self): def __call__(self, domain, subdomain, codomain_bounds, data): # XXX: this is specialised to 1D a, b = subdomain - ya, yb = data[(a,)], data[(b,)] + ya, yb = data[a], data[b] return sqrt((b - a) ** 2 + (yb - ya) ** 2) @@ -332,7 +329,7 @@ class LearnerND(BaseLearner): def __init__(self, f, bounds, loss=None): if len(bounds) == 1: - (a, b), = bounds + (a, b), = bound_points, = bounds self.domain = Interval(a, b) self.loss = loss or DistanceLoss() else: @@ -344,9 +341,7 @@ def __init__(self, f, bounds, loss=None): # Evaluate boundary points right away to avoid handling edge # cases in the ask and tell logic - bound_points = sorted(map(tuple, itertools.product(*bounds))) for x in bound_points: - y = f(x) self.data[x] = f(x) vals = list(self.data.values()) diff --git a/proof-of-concept-learner.ipynb b/proof-of-concept-learner.ipynb index ffa3a8cd9..1c5bef079 100644 --- a/proof-of-concept-learner.ipynb +++ b/proof-of-concept-learner.ipynb @@ -25,10 +25,7 @@ "\n", "def peak(x, offset=offset):\n", " a = 0.02\n", - " return x + a**2 / (a**2 + (x - offset)**2)\n", - "\n", - "def peak2(x, offset=offset):\n", - " return peak(x[0], offset=offset)" + " return x + a**2 / (a**2 + (x - offset)**2)" ] }, { @@ -37,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "\\learner = adaptive.Learner1D(peak, (-1, 1))\n", + "learner = adaptive.Learner1D(peak, (-1, 1))\n", "adaptive.runner.simple(learner, goal=lambda l: len(l.data) > 50)" ] }, @@ -47,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "learner2 = LearnerND(peak2, [(-1, 1)])\n", + "learner2 = LearnerND(peak, [(-1, 1)])\n", "adaptive.runner.simple(learner2, goal=lambda l: len(l.data) > 50)" ] }, From cdb669d8362340da1cadf1bc2915a491f322db99 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 6 Oct 2019 20:58:26 +0200 Subject: [PATCH 008/105] correct point insertion logic --- adaptive/learner/new_learnerND.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index a98752413..7304fe27a 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -96,7 +96,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): subsubdomains = zip(p, p.islice(1)) a, b = max(subsubdomains, key=lambda ival: ival[1] - ival[0]) - m = (b - a) / 2 + m = a + (b - a) / 2 p.add(m) return [m] From 042eba0ebcbc23606adedae25ac294e79d84d351 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Mon, 7 Oct 2019 11:18:48 +0200 Subject: [PATCH 009/105] add parallel example to proof of concept notebook --- proof-of-concept-learner.ipynb | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/proof-of-concept-learner.ipynb b/proof-of-concept-learner.ipynb index 1c5bef079..f6bc8d15a 100644 --- a/proof-of-concept-learner.ipynb +++ b/proof-of-concept-learner.ipynb @@ -28,6 +28,13 @@ " return x + a**2 / (a**2 + (x - offset)**2)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The new implementation has the same API as the old one (except bounds)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -57,6 +64,13 @@ "learner.plot() + learner2.plot()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The new implementation is as fast as the old one" + ] + }, { "cell_type": "code", "execution_count": null, @@ -74,6 +88,55 @@ "source": [ "%time adaptive.runner.simple(learner2, goal=lambda l: len(l.data) > 10000)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Also works in parallel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "def peak(x, offset=offset):\n", + " time.sleep(0.5)\n", + " a = 0.02\n", + " return x + a**2 / (a**2 + (x - offset)**2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner2 = LearnerND(peak, [(-1, 1)])\n", + "runner = adaptive.Runner(learner2, goal=lambda l: len(l.data) > 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runner.live_info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runner.live_plot()" + ] } ], "metadata": { From 84cc671718da2c7a6b67eccb883235f76fc1ec36 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Mon, 7 Oct 2019 11:35:53 +0200 Subject: [PATCH 010/105] remove superfluous method and docstrings from Interval class --- adaptive/learner/new_learnerND.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 7304fe27a..a11d40a56 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -167,7 +167,6 @@ def transform(self, x): return (x - a) / (b - a) def neighbors(self, subdomain, n=1): - "Return all neighboring subdomains up to degree 'n'." a, b = subdomain p = self.points ia = p.index(a) @@ -179,15 +178,7 @@ def neighbors(self, subdomain, n=1): neighbors.append((p[ia + i + 1], p[ia + i + 2])) return neighbors - def points(self, subdomain): - "Return all the points that define a given subdomain." - try: - return self.sub_intervals[subdomain] - except KeyError: - return subdomain - def subdomains(self): - "Return all the subdomains in the domain." p = self.points return zip(p, p.islice(1)) @@ -197,12 +188,10 @@ def clear_subdomains(self): return subdomains def volume(self, subdomain): - "Return the volume of a subdomain" a, b = subdomain return b - a def subvolumes(self, subdomain): - "Return the volumes of the sub-subdomains." try: p = self.sub_intervals[subdomain] except KeyError: From 9c565ce63aafaa9997918d08236e70b0040e2973 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Mon, 7 Oct 2019 13:06:18 +0200 Subject: [PATCH 011/105] import SortedList and SortedDict and update 'insert_points' implementation --- adaptive/learner/new_learnerND.py | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index a11d40a56..c671a8e00 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -2,7 +2,7 @@ import itertools import numpy as np -import sortedcontainers +from sortedcontainers import SortedList, SortedDict from adaptive.learner.base_learner import BaseLearner from adaptive.notebook_integration import ensure_holoviews @@ -77,7 +77,7 @@ def __init__(self, a, b): # in 'sub_intervals' in a SortedList. self.bounds = (a, b) self.sub_intervals = dict() - self.points = sortedcontainers.SortedList([a, b]) + self.points = SortedList([a, b]) def insert_points(self, subdomain, n, *, _check_membership=True): if _check_membership and subdomain not in self: @@ -86,19 +86,19 @@ def insert_points(self, subdomain, n, *, _check_membership=True): p = self.sub_intervals[subdomain] except KeyError: # first point in the interior of this subdomain a, b = subdomain - points = np.linspace(a, b, 2 + n) - self.sub_intervals[subdomain] = sortedcontainers.SortedList(points) - return points[1:-1] + p = SortedList(subdomain) + self.sub_intervals[subdomain] = p - # XXX: allow this - if n != 1: - raise ValueError("Can't add more than one point to a full subinterval") + points = [] + subsubdomains = SortedList(zip(p, p.islice(1)), key=lambda iv: iv[1] - iv[0]) + for _ in range(n): + a, b = subsubdomains.pop() + m = a + (b - a) / 2 + subsubdomains.update([(a, m), (m, b)]) + points.append(m) + p.update(points) - subsubdomains = zip(p, p.islice(1)) - a, b = max(subsubdomains, key=lambda ival: ival[1] - ival[0]) - m = a + (b - a) / 2 - p.add(m) - return [m] + return points def insert_into(self, subdomain, x, *, _check_membership=True): a, b = subdomain @@ -111,7 +111,7 @@ def insert_into(self, subdomain, x, *, _check_membership=True): try: p = self.sub_intervals[subdomain] except KeyError: - self.sub_intervals[subdomain] = sortedcontainers.SortedList([a, x, b]) + self.sub_intervals[subdomain] = SortedList([a, x, b]) else: p.add(x) @@ -135,7 +135,7 @@ def split_at(self, x, *, _check_membership=True): pass else: # update sub_intervals for ival in new_intervals: - new_sub_points = sortedcontainers.SortedList(sub_points.irange(*ival)) + new_sub_points = SortedList(sub_points.irange(*ival)) if x not in new_sub_points: new_sub_points.add(x) if len(new_sub_points) > 2: @@ -211,7 +211,7 @@ class Queue: """ def __init__(self, entries=()): - self._queue = sortedcontainers.SortedDict( + self._queue = SortedDict( ((priority, n), item) for n, (item, priority) in enumerate(entries) ) # 'self._queue' cannot be keyed only on priority, as there may be several @@ -223,7 +223,7 @@ def __init__(self, entries=()): # is unknown we have to keep the reverse map of 'self._queue'. Because # items may not be hashable we cannot use a SortedDict, so we use a # SortedList storing '(item, key)'. - self._items = sortedcontainers.SortedList( + self._items = SortedList( ((v, k) for k, v in self._queue.items()) ) @@ -261,8 +261,8 @@ def remove(self, item): del self._queue[key] del self._items[i] - def update(self, item, new_priority): - """Update 'item' in the queue with the given priority. + def update(self, item, priority): + """Update 'item' in the queue to have the given priority. Raises ------ @@ -274,7 +274,7 @@ def update(self, item, new_priority): raise KeyError("item is not in queue") _, n = key - new_key = (new_priority, n) + new_key = (priority, n) del self._queue[key] self._queue[new_key] = item From 963fb071ab2f280af815980d961f2928fcb3973e Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Mon, 7 Oct 2019 17:13:27 +0200 Subject: [PATCH 012/105] fixup docstrings etc. --- adaptive/learner/new_learnerND.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index c671a8e00..412516970 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -10,25 +10,35 @@ class Domain: def insert_points(self, subdomain, n): - "Insert 'n' points into 'subdomain'." + """Insert 'n' points into 'subdomain'. + + May not return a point on the boundary of subdomain. + """ def insert_into(self, subdomain, x): """Insert 'x' into 'subdomain'. Raises ------ - ValueError : if x is outside of subdomain or on its boundary + ValueError : if x is outside subdomain or exists already + NotImplementedError : if x is on the boundary of subdomain """ def split_at(self, x): """Split the domain at 'x'. + Removes and adds subdomains. + Returns ------- old_subdomains : list of subdomains The subdomains that were removed when splitting at 'x'. new_subdomains : list of subdomains The subdomains that were added when splitting at 'x'. + + Raises + ------ + ValueError : if x is outside of the domain or exists already """ def which_subdomain(self, x): @@ -36,7 +46,8 @@ def which_subdomain(self, x): Raises ------ - ValueError: if x is on a subdomain boundary + ValueError : if x is outside of the domain + NotImplementedError : if x is on a subdomain boundary """ def transform(self, x): @@ -80,17 +91,20 @@ def __init__(self, a, b): self.points = SortedList([a, b]) def insert_points(self, subdomain, n, *, _check_membership=True): + assert n > 0 if _check_membership and subdomain not in self: raise ValueError("{} is not present in this interval".format(subdomain)) try: p = self.sub_intervals[subdomain] - except KeyError: # first point in the interior of this subdomain + except KeyError: # No points yet in the interior of this subdomain a, b = subdomain p = SortedList(subdomain) self.sub_intervals[subdomain] = p + # Choose new points in the centre of the largest subdomain + # of this subinterval. points = [] - subsubdomains = SortedList(zip(p, p.islice(1)), key=lambda iv: iv[1] - iv[0]) + subsubdomains = SortedList(zip(p, p.islice(1)), key=self.volume) for _ in range(n): a, b = subsubdomains.pop() m = a + (b - a) / 2 @@ -150,7 +164,7 @@ def which_subdomain(self, x): p = self.points i = p.bisect_left(x) if p[i] == x: - raise ValueError("{} belongs to 2 subdomains".format(x)) + raise NotImplementedError("{} is on a subdomain boundary".format(x)) return (p[i], p[i + 1]) def __contains__(self, subdomain): From eb712b9be805d4bc7ad78e6a8513f4db0a9286bc Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Mon, 7 Oct 2019 17:14:29 +0200 Subject: [PATCH 013/105] add first implementation of ConvexHull domain --- adaptive/learner/new_learnerND.py | 285 ++++++++++++++++++++++++++++-- 1 file changed, 273 insertions(+), 12 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 412516970..9494a3906 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -1,10 +1,14 @@ from math import sqrt import itertools +from collections.abc import Iterable import numpy as np +import scipy.spatial +import scipy.interpolate from sortedcontainers import SortedList, SortedDict from adaptive.learner.base_learner import BaseLearner +from adaptive.learner.triangulation import Triangulation, simplex_volume_in_embedding from adaptive.notebook_integration import ensure_holoviews @@ -214,6 +218,174 @@ def subvolumes(self, subdomain): return [self.volume(s) for s in zip(p, p.islice(1))] +class ConvexHull(Domain): + """A convex hull domain in $ℝ^N$ (N >=2). + + Subdomains are simplices represented by integer tuples of length (N + 1). + """ + + def __init__(self, hull): + assert isinstance(hull, scipy.spatial.ConvexHull) + + self.bounds = hull + self.triangulation = Triangulation(hull.points[hull.vertices]) + # if a subdomain has interior points, then it appears as a key + # in 'sub_domains' and maps to a 'Triangulation' of the + # interior of the subdomain. + self.sub_domains = dict() + + @property + def bounding_box(self): + hull_points = self.bounds.points[self.bounds.vertices] + return tuple(zip(hull_points.min(axis=0), hull_points.max(axis=0))) + + def insert_points(self, subdomain, n, *, _check_membership=True): + assert n > 0 + tri = self.triangulation + if _check_membership and subdomain not in tri.simplices: + raise ValueError("{} is not present in this domain".format(subdomain)) + try: + subtri = self.sub_domains[subdomain] + except KeyError: # No points in the interior of this subdomain yet + subtri = Triangulation([tri.vertices[x] for x in subdomain]) + self.sub_domains[subdomain] = subtri + + # Choose new points in the centre of the largest sub-subdomain + # of this subdomain + points = [] + for _ in range(n): + # O(N) in the number of sub-simplices, but typically we only have a few + largest_simplex = max(subtri.simplices, key=lambda s: subtri.volume(s)) + simplex_vertices = np.array([subtri.vertices[s] for s in largest_simplex]) + # XXX: choose the centre of the simplex. Update this to be able to handle + # choosing points on a face. This requires updating the ABC and having + # this method also return the subdomains that are affected by the new + # points + point = np.average(simplex_vertices, axis=0) + subtri.add_point(point, largest_simplex) + points.append(point) + + return [tuple(p) for p in points] + + def insert_into(self, subdomain, x, *, _check_membership=True): + tri = self.triangulation + if _check_membership: + if subdomain not in tri.simplices: + raise ValueError("{} is not present in this domain".format(subdomain)) + if not tri.point_in_simplex(x, subdomain): + raise ValueError("{} is not present in this subdomain".format(x)) + + try: + subtri = self.sub_domains[subdomain] + except KeyError: # No points in the interior of this subdomain yet + subtri = Triangulation([tri.vertices[x] for x in subdomain]) + self.sub_domains[subdomain] = subtri + + subtri.add_point(x) + + def split_at(self, x, *, _check_membership=True): + tri = self.triangulation + # XXX: O(N) in the number of simplices. As typically 'x' will have been + # obtained by 'insert_points' or by calling 'insert_into' we can keep + # a hashmap of x→simplex to make this lookup faster and fall back to + # 'locate_point' otherwise + simplex = tri.locate_point(x) + if not simplex: + raise ValueError("Can only split at points within the domain.") + + old_subdomains, new_subdomains = tri.add_point(x, simplex) + + if _check_membership: + assert not any(s in self.sub_domains for s in new_subdomains) + + # Re-assign all the interior points of 'old_subdomains' to 'new_subdomains' + + interior_points = [] + for d in old_subdomains: + try: + subtri = self.sub_domains.pop(d) + except KeyError: + continue + else: + # Get the points in the interior of the subtriangulation + verts = set(range(len(subtri.vertices))) - subtri.hull + assert verts + verts = np.array([subtri.vertices[i] for i in verts]) + # Remove 'x' if it is one of the points + verts = verts[np.all(verts != x, axis=1)] + interior_points.append(verts) + if interior_points: + interior_points = np.vstack(interior_points) + for p in interior_points: + # XXX: handle case where points lie on simplex boundaries + for subdomain in new_subdomains: + if tri.point_in_simplex(p, subdomain): + try: + subtri = self.sub_domains[subdomain] + except KeyError: # No points in this subdomain yet + subtri = Triangulation([tri.vertices[i] for i in subdomain]) + self.sub_domains[subdomain] = subtri + subtri.add_point(p) + break + else: + assert False, "{} was not in the interior of new simplices".format(x) + + return old_subdomains, new_subdomains + + def which_subdomain(self, x): + tri = self.triangulation + member = np.array([tri.point_in_simplex(x, s) for s in tri.simplices]) + n_simplices = member.sum() + if n_simplices < 1: + raise ValueError("{} is not in the domain".format(x)) + elif n_simplices == 1: + return member.argmax() + else: + raise ValueError("{} is on a subdomain boundary".format(x)) + + def transform(self, x): + # XXX: implement this + raise NotImplementedError() + + def neighbors(self, subdomain, n=1): + "Return all neighboring subdomains up to degree 'n'." + tri = self.triangulation + neighbors = {subdomain} + for _ in range(n): + for face in tri.faces(simplices=neighbors): + neighbors.update(tri.containing(face)) + neighbors.remove(subdomain) + return neighbors + + def subdomains(self): + "Return all the subdomains in the domain." + return self.triangulation.simplices + + def clear_subdomains(self): + """Remove all points from the interior of subdomains. + + Returns + ------- + subdomains : the subdomains who's interior points were removed + """ + sub_domains = list(self.sub_domains.keys()) + self.sub_domains = dict() + return sub_domains + + def volume(self, subdomain): + "Return the volume of a subdomain." + return self.triangulation.volume(subdomain) + + def subvolumes(self, subdomain): + "Return the volumes of the sub-subdomains." + try: + subtri = self.sub_domains[subdomain] + except KeyError: + return [self.triangulation.volume(subdomain)] + else: + return [subtri.volume(s) for s in subtri.simplices] + + class Queue: """Priority queue supporting update and removal at arbitrary position. @@ -237,9 +409,7 @@ def __init__(self, entries=()): # is unknown we have to keep the reverse map of 'self._queue'. Because # items may not be hashable we cannot use a SortedDict, so we use a # SortedList storing '(item, key)'. - self._items = SortedList( - ((v, k) for k, v in self._queue.items()) - ) + self._items = SortedList(((v, k) for k, v in self._queue.items())) def items(self): "Return an iterator over the items in the queue in priority order." @@ -315,12 +485,28 @@ def n_neighbors(self): return 0 def __call__(self, domain, subdomain, codomain_bounds, data): - # XXX: this is specialised to 1D + assert isinstance(domain, Interval) a, b = subdomain ya, yb = data[a], data[b] return sqrt((b - a) ** 2 + (yb - ya) ** 2) +class EmbeddedVolumeLoss(LossFunction): + @property + def n_neighbors(self): + return 0 + + def __call__(self, domain, subdomain, codomain_bounds, data): + assert isinstance(domain, ConvexHull) + xs = [tuple(domain.triangulation.vertices[x]) for x in subdomain] + ys = [data[x] for x in xs] + if isinstance(ys[0], Iterable): + pts = [(*x, *y) for x, y in zip(xs, ys)] + else: + pts = [(*x, y) for x, y in zip(xs, ys)] + return simplex_volume_in_embedding(pts) + + def _scaled_loss(loss, domain, subdomain, codomain_bounds, data): subvolumes = domain.subvolumes(subdomain) max_relative_subvolume = max(subvolumes) / sum(subvolumes) @@ -332,11 +518,15 @@ class LearnerND(BaseLearner): def __init__(self, f, bounds, loss=None): if len(bounds) == 1: - (a, b), = bound_points, = bounds + (a, b), = (bound_points,) = bounds self.domain = Interval(a, b) self.loss = loss or DistanceLoss() + self.ndim = 1 else: - raise ValueError("Can only handle 1D functions for now") + bound_points = sorted(tuple(p) for p in itertools.product(*bounds)) + self.domain = ConvexHull(scipy.spatial.ConvexHull(bound_points)) + self.loss = loss or EmbeddedVolumeLoss() + self.ndim = len(bound_points[0]) self.queue = Queue() self.data = dict() @@ -360,9 +550,13 @@ def __init__(self, f, bounds, loss=None): except TypeError: # Trying to take the length of a number self.vdim = 1 - d, = self.domain.subdomains() - loss = self.loss(self.domain, d, self.codomain_bounds, self.data) - self.queue.insert(d, priority=loss) + for subdomain in self.domain.subdomains(): + # NOTE: could just call 'self.loss' here, as we *know* that each + # subdomain does not have internal points. + loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) + self.queue.insert(subdomain, priority=loss) def ask(self, n, tell_pending=True): if not tell_pending: @@ -411,7 +605,6 @@ def tell_many(self, xs, ys): if need_loss_update: # Need to recalculate all losses anyway - subdomains = itertools.chain(self.queue.items(), new) self.queue = Queue( ( subdomain, @@ -487,8 +680,76 @@ def loss(self): _, loss = self.queue.peek() return loss - def plot(self): - # XXX: specialized to 1D + def plot(self, **kwargs): + if isinstance(self.domain, Interval): + return self._plot_1d(**kwargs) + elif isinstance(self.domain, ConvexHull): + return self._plot_nd(**kwargs) + + def _plot_nd(self, n=None, tri_alpha=0): + # XXX: Copied from LearnerND. At the moment we reach deep into internal + # datastructures of self.domain. We should see what data we need and + # add APIs to 'Domain' to support this. + hv = ensure_holoviews() + if self.vdim > 1: + raise NotImplementedError( + "holoviews currently does not support", "3D surface plots in bokeh." + ) + if self.ndim != 2: + raise NotImplementedError( + "Only 2D plots are implemented: You can " + "plot a 2D slice with 'plot_slice'." + ) + x, y = self.domain.bounding_box + lbrt = x[0], y[0], x[1], y[1] + + if len(self.data) >= 4: + if n is None: + # Calculate how many grid points are needed. + # factor from A=√3/4 * a² (equilateral triangle) + scale_factor = 1 # np.product(np.diag(self._transform)) + min_volume = min(map(self.domain.volume, self.domain.subdomains())) + a_sq = np.sqrt(scale_factor * min_volume) + n = max(10, int(0.658 / a_sq)) + + xs = ys = np.linspace(0, 1, n) + xs = xs * (x[1] - x[0]) + x[0] + ys = ys * (y[1] - y[0]) + y[0] + keys = np.array(list(self.data.keys())) + values = np.array(list(self.data.values())) + interpolator = scipy.interpolate.LinearNDInterpolator(keys, values) + z = interpolator(xs[:, None], ys[None, :]).squeeze() + + im = hv.Image(np.rot90(z), bounds=lbrt) + + if tri_alpha: + points = np.array( + [ + [self.domain.triangulation.vertices[i] for i in s] + for s in self.domain.subdomains() + ] + ) + points = np.pad( + points[:, [0, 1, 2, 0], :], + pad_width=((0, 0), (0, 1), (0, 0)), + mode="constant", + constant_values=np.nan, + ).reshape(-1, 2) + tris = hv.EdgePaths([points]) + else: + tris = hv.EdgePaths([]) + else: + im = hv.Image([], bounds=lbrt) + tris = hv.EdgePaths([]) + + im_opts = dict(cmap="viridis") + tri_opts = dict(line_width=0.5, alpha=tri_alpha) + no_hover = dict(plot=dict(inspection_policy=None, tools=[])) + + return im.opts(style=im_opts) * tris.opts(style=tri_opts, **no_hover) + + def _plot_1d(self): + assert isinstance(self.domain, Interval) hv = ensure_holoviews() xs, ys = zip(*sorted(self.data.items())) if self.data else ([], []) From 18a88bc1a068bc604f756b4055ae14826b517345 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Tue, 8 Oct 2019 10:38:24 +0200 Subject: [PATCH 014/105] make everything work --- adaptive/learner/new_learnerND.py | 238 +++++++++++++++++++++--------- 1 file changed, 166 insertions(+), 72 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 9494a3906..8d244e4d1 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -8,7 +8,12 @@ from sortedcontainers import SortedList, SortedDict from adaptive.learner.base_learner import BaseLearner -from adaptive.learner.triangulation import Triangulation, simplex_volume_in_embedding +from adaptive.learner.triangulation import ( + Triangulation, + simplex_volume_in_embedding, + circumsphere, + point_in_simplex, +) from adaptive.notebook_integration import ensure_holoviews @@ -16,16 +21,27 @@ class Domain: def insert_points(self, subdomain, n): """Insert 'n' points into 'subdomain'. - May not return a point on the boundary of subdomain. + Returns + ------- + affected_subdomains : Iterable of subdomains + If some points were added on the boundary of 'subdomain' + then they will also have been added to the neighboring + subdomains. """ def insert_into(self, subdomain, x): """Insert 'x' into 'subdomain'. + Returns + ------- + affected_subdomains : Iterable of subdomains + If some points were added on the boundary of 'subdomain' + then they will also have been added to the neighboring + subdomains. + Raises ------ ValueError : if x is outside subdomain or exists already - NotImplementedError : if x is on the boundary of subdomain """ def split_at(self, x): @@ -45,13 +61,17 @@ def split_at(self, x): ValueError : if x is outside of the domain or exists already """ - def which_subdomain(self, x): - """Return the subdomain that contains 'x'. + def which_subdomains(self, x): + """Return the subdomains that contains 'x'. + + Return + ------ + subdomains : Iterable of subdomains + The subdomains to which 'x' belongs. Raises ------ ValueError : if x is outside of the domain - NotImplementedError : if x is on a subdomain boundary """ def transform(self, x): @@ -93,6 +113,7 @@ def __init__(self, a, b): self.bounds = (a, b) self.sub_intervals = dict() self.points = SortedList([a, b]) + self.ndim = 1 def insert_points(self, subdomain, n, *, _check_membership=True): assert n > 0 @@ -116,7 +137,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): points.append(m) p.update(points) - return points + return points, [subdomain] def insert_into(self, subdomain, x, *, _check_membership=True): a, b = subdomain @@ -133,6 +154,8 @@ def insert_into(self, subdomain, x, *, _check_membership=True): else: p.add(x) + return [subdomain] + def split_at(self, x, *, _check_membership=True): a, b = self.bounds if _check_membership: @@ -161,15 +184,16 @@ def split_at(self, x, *, _check_membership=True): return [old_interval], new_intervals - def which_subdomain(self, x): + def which_subdomains(self, x): a, b = self.bounds if not (a <= x <= b): raise ValueError("{} is outside the interval".format(x)) p = self.points i = p.bisect_left(x) if p[i] == x: - raise NotImplementedError("{} is on a subdomain boundary".format(x)) - return (p[i], p[i + 1]) + # XXX + return (p[i - 1], p[i]), (p[i], p[i + 1]) + return [(p[i], p[i + 1])] def __contains__(self, subdomain): a, b = subdomain @@ -218,6 +242,53 @@ def subvolumes(self, subdomain): return [self.volume(s) for s in zip(p, p.islice(1))] +def _choose_point_in_simplex(simplex, transform=None): + if transform is not None: + simplex = np.dot(simplex, transform) + + # choose center if and only if the shape of the simplex is nice, + # otherwise: the center the longest edge + center, _radius = circumsphere(simplex) + if point_in_simplex(center, simplex): + point = np.average(simplex, axis=0) + edge = () + else: + distances = scipy.spatial.distance.pdist(simplex) + distance_matrix = scipy.spatial.distance.squareform(distances) + i, j = np.unravel_index(np.argmax(distance_matrix), distance_matrix.shape) + point = (simplex[i, :] + simplex[j, :]) / 2 + edge = (i, j) + + if transform is not None: + point = np.linalg.solve(transform, point) # undo the transform + + return point, edge + + +def _face(simplex, x, eps=1e-8): + # Given simplex [(N + 1, N) array] and a point [(N,) array] return + # the face that x belongs to, or the empty tuple if not on a face. + # The face is returned as a tuple of integers, the vertices of + # 'simplex' that bound the face. + raise NotImplementedError() + x0 = np.asarray(simplex[0], dtype=float) + vectors = np.asarray(simplex[1:], dtype=float) - x0 + t = np.linalg.solve(vectors.T, x - x0) + if not all(t > -eps) and sum(t) < 1 + eps: + raise ValueError("{} not in simplex".format(x)) + points = set(range(len(simplex))) + where, = np.where(t < eps) + for i in where: + points.remove(i + 1) + if 1 - eps < sum(t) < 1 + eps: + points.remove(0) + if len(points) == len(simplex): + # At a general point in the simplex + return () + else: + return tuple(sorted(points)) + + class ConvexHull(Domain): """A convex hull domain in $ℝ^N$ (N >=2). @@ -231,41 +302,58 @@ def __init__(self, hull): self.triangulation = Triangulation(hull.points[hull.vertices]) # if a subdomain has interior points, then it appears as a key # in 'sub_domains' and maps to a 'Triangulation' of the - # interior of the subdomain. + # interior of the subdomain. By definition the triangulation + # is over a simplex, and the first 'ndim + 1' points in the + # triangulation are the boundary points. self.sub_domains = dict() + self.ndim = self.bounds.points.shape[1] @property def bounding_box(self): hull_points = self.bounds.points[self.bounds.vertices] return tuple(zip(hull_points.min(axis=0), hull_points.max(axis=0))) + def _get_subtriangulation(self, subdomain): + try: + subtri = self.sub_domains[subdomain] + except KeyError: # No points in the interior of this subdomain yet + subtri = Triangulation([self.triangulation.vertices[x] for x in subdomain]) + self.sub_domains[subdomain] = subtri + return subtri + def insert_points(self, subdomain, n, *, _check_membership=True): assert n > 0 tri = self.triangulation if _check_membership and subdomain not in tri.simplices: raise ValueError("{} is not present in this domain".format(subdomain)) - try: - subtri = self.sub_domains[subdomain] - except KeyError: # No points in the interior of this subdomain yet - subtri = Triangulation([tri.vertices[x] for x in subdomain]) - self.sub_domains[subdomain] = subtri - # Choose new points in the centre of the largest sub-subdomain - # of this subdomain + subtri = self._get_subtriangulation(subdomain) + points = [] + affected_subdomains = {subdomain} for _ in range(n): # O(N) in the number of sub-simplices, but typically we only have a few - largest_simplex = max(subtri.simplices, key=lambda s: subtri.volume(s)) + largest_simplex = max(subtri.simplices, key=subtri.volume) simplex_vertices = np.array([subtri.vertices[s] for s in largest_simplex]) - # XXX: choose the centre of the simplex. Update this to be able to handle - # choosing points on a face. This requires updating the ABC and having - # this method also return the subdomains that are affected by the new - # points - point = np.average(simplex_vertices, axis=0) - subtri.add_point(point, largest_simplex) + point, face = _choose_point_in_simplex(simplex_vertices) + face = [largest_simplex[i] for i in face] points.append(point) - - return [tuple(p) for p in points] + subtri.add_point(point, largest_simplex) + # If we chose a point on a face (or edge) of 'subdomain' then we need to + # add it to the subtriangulations of the neighboring subdomains. + # The first 'ndim + 1' points are the boundary points of the subtriangulation + # because it is by definition a simplex. + if face and all(f < self.ndim + 1 for f in face): + # Translate vertex indices from subtriangulation to triangulation + face = [subdomain[f] for f in face] + # Loop over the simplices that contain 'face', skipping 'subdomain', + # which was already added above. + for sd in tri.containing(face): + if sd != subdomain: + self._get_subtriangulation(sd).add_point(point) + affected_subdomains.add(sd) + + return [tuple(p) for p in points], affected_subdomains def insert_into(self, subdomain, x, *, _check_membership=True): tri = self.triangulation @@ -275,14 +363,21 @@ def insert_into(self, subdomain, x, *, _check_membership=True): if not tri.point_in_simplex(x, subdomain): raise ValueError("{} is not present in this subdomain".format(x)) - try: - subtri = self.sub_domains[subdomain] - except KeyError: # No points in the interior of this subdomain yet - subtri = Triangulation([tri.vertices[x] for x in subdomain]) - self.sub_domains[subdomain] = subtri - + subtri = self._get_subtriangulation(subdomain) subtri.add_point(x) + affected_subdomains = set() + simplex = [tri.vertices[i] for i in subdomain] + face = [subdomain[i] for i in _face(simplex, x)] + if face: + for i in tri.containing(face): + sd = tri.simplices[i] + if sd != subdomain: + self._get_subtriangulation(sd).add_point(point) + affected_subdomains.add(sd) + + return affected_subdomains + def split_at(self, x, *, _check_membership=True): tri = self.triangulation # XXX: O(N) in the number of simplices. As typically 'x' will have been @@ -300,24 +395,31 @@ def split_at(self, x, *, _check_membership=True): # Re-assign all the interior points of 'old_subdomains' to 'new_subdomains' - interior_points = [] + # Keep the interior points as a set, because interior points on a shared face + # appear in the subtriangulations of both the neighboring simplices + interior_points = set() for d in old_subdomains: try: subtri = self.sub_domains.pop(d) except KeyError: continue else: - # Get the points in the interior of the subtriangulation - verts = set(range(len(subtri.vertices))) - subtri.hull - assert verts - verts = np.array([subtri.vertices[i] for i in verts]) + # Get the points in the interior of the subtriangulation. + # Because a subtriangulation is always defined over a simplex, + # the first ndim + 1 points are the boundary points + interior = set(range(self.ndim + 1, len(subtri.vertices))) + # If the subtriangulation was in 'self.sub_domains' there must + # be at least 1 interior point. + assert interior + interior = [subtri.vertices[i] for i in interior] # Remove 'x' if it is one of the points - verts = verts[np.all(verts != x, axis=1)] - interior_points.append(verts) - if interior_points: - interior_points = np.vstack(interior_points) + interior = [i for i in interior if i != x] + if interior: + interior_points.update(interior) for p in interior_points: - # XXX: handle case where points lie on simplex boundaries + # Try to add 'p' to all the new subdomains. It may belong to more than 1 + # if it lies on a subdomain boundary. + p_was_added = False for subdomain in new_subdomains: if tri.point_in_simplex(p, subdomain): try: @@ -326,29 +428,28 @@ def split_at(self, x, *, _check_membership=True): subtri = Triangulation([tri.vertices[i] for i in subdomain]) self.sub_domains[subdomain] = subtri subtri.add_point(p) - break - else: - assert False, "{} was not in the interior of new simplices".format(x) + p_was_added = True + assert ( + p_was_added + ), "{} was not in the interior of any new simplices".format(x) return old_subdomains, new_subdomains - def which_subdomain(self, x): + def which_subdomains(self, x): tri = self.triangulation + # XXX: O(N) in the number of simplices member = np.array([tri.point_in_simplex(x, s) for s in tri.simplices]) n_simplices = member.sum() if n_simplices < 1: raise ValueError("{} is not in the domain".format(x)) - elif n_simplices == 1: - return member.argmax() - else: - raise ValueError("{} is on a subdomain boundary".format(x)) + which = np.argwhere(member).squeeze() + return [tri.simplices[i] for i in which] def transform(self, x): # XXX: implement this raise NotImplementedError() def neighbors(self, subdomain, n=1): - "Return all neighboring subdomains up to degree 'n'." tri = self.triangulation neighbors = {subdomain} for _ in range(n): @@ -358,26 +459,17 @@ def neighbors(self, subdomain, n=1): return neighbors def subdomains(self): - "Return all the subdomains in the domain." return self.triangulation.simplices def clear_subdomains(self): - """Remove all points from the interior of subdomains. - - Returns - ------- - subdomains : the subdomains who's interior points were removed - """ sub_domains = list(self.sub_domains.keys()) self.sub_domains = dict() return sub_domains def volume(self, subdomain): - "Return the volume of a subdomain." return self.triangulation.volume(subdomain) def subvolumes(self, subdomain): - "Return the volumes of the sub-subdomains." try: subtri = self.sub_domains[subdomain] except KeyError: @@ -565,13 +657,14 @@ def ask(self, n, tell_pending=True): new_points = [] new_losses = [] for _ in range(n): - subdomain, _ = self.queue.pop() - new_point, = self.domain.insert_points(subdomain, 1) + subdomain, _ = self.queue.peek() + (new_point,), affected_subdomains = self.domain.insert_points(subdomain, 1) self.data[new_point] = None - new_loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data - ) - self.queue.insert(subdomain, priority=new_loss) + for subdomain in affected_subdomains: + new_loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) + self.queue.update(subdomain, priority=new_loss) new_points.append(new_point) new_losses.append(new_loss) return new_points, new_losses @@ -579,11 +672,12 @@ def ask(self, n, tell_pending=True): def tell_pending(self, x): self.data[x] = None subdomain = self.domain.which_subdomain(x) - self.domain.insert_into(subdomain, x) - loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data - ) - self.queue.update(subdomain, priority=loss) + affected_subdomains = self.domain.insert_into(subdomain, x) + for subdomain in affected_subdomains: + loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) + self.queue.update(subdomain, priority=loss) def tell_many(self, xs, ys): for x, y in zip(xs, ys): From e3fd6cc7259150f8253076b0fdae9b1d258773a3 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Tue, 8 Oct 2019 16:50:57 +0200 Subject: [PATCH 015/105] make everything work again --- adaptive/learner/new_learnerND.py | 210 ++++++++++++++++++------------ 1 file changed, 127 insertions(+), 83 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 8d244e4d1..1fbd89a7a 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -29,8 +29,8 @@ def insert_points(self, subdomain, n): subdomains. """ - def insert_into(self, subdomain, x): - """Insert 'x' into 'subdomain'. + def insert(self, x): + """Insert 'x' into any subdomains to which it belongs. Returns ------- @@ -41,7 +41,7 @@ def insert_into(self, subdomain, x): Raises ------ - ValueError : if x is outside subdomain or exists already + ValueError : if x is outside the domain or exists already """ def split_at(self, x): @@ -139,19 +139,25 @@ def insert_points(self, subdomain, n, *, _check_membership=True): return points, [subdomain] - def insert_into(self, subdomain, x, *, _check_membership=True): - a, b = subdomain + def insert(self, x, *, _check_membership=True): if _check_membership: - if subdomain not in self: - raise ValueError("{} is not present in this interval".format(subdomain)) + a, b = self.bounds if not (a < x < b): - raise ValueError("{} is not in ({}, {})".format(x, a, b)) + raise ValueError("{} is outside of this interval".format(x)) + + p = self.points + i = p.bisect_left(x) + if p[i] == x: + raise ValueError("{} exists in this interval already".format(x)) + subdomain = p[i - 1], p[i] try: p = self.sub_intervals[subdomain] except KeyError: self.sub_intervals[subdomain] = SortedList([a, x, b]) else: + if x in p: + raise ValueError("{} exists in a subinterval already".format(x)) p.add(x) return [subdomain] @@ -190,9 +196,15 @@ def which_subdomains(self, x): raise ValueError("{} is outside the interval".format(x)) p = self.points i = p.bisect_left(x) - if p[i] == x: - # XXX - return (p[i - 1], p[i]), (p[i], p[i + 1]) + if p[i] != x: # general point inside a subinterval + return [(p[i - 1], p[i])] + else: # boundary of a subinterval + neighbors = [] + if i > 0: + neighbors.append((p[i - 1], p[i])) + if i < len(p) - 1: + neighbors.append((p[i], p[i + 1])) + return neighbors return [(p[i], p[i + 1])] def __contains__(self, subdomain): @@ -243,50 +255,44 @@ def subvolumes(self, subdomain): def _choose_point_in_simplex(simplex, transform=None): + """Choose a good point at which to split a simplex. + + Parameters + ---------- + simplex : (n+1, n) array + The simplex vertices + transform : (n, n) array + The linear transform to apply to the simplex vertices + before determining which point to choose. Must be + invertible. + + Returns + ------- + point : (n,) array + The point that was chosen in the simplex + face : tuple of int + If the chosen point was + """ if transform is not None: simplex = np.dot(simplex, transform) - # choose center if and only if the shape of the simplex is nice, + # Choose center only if the shape of the simplex is nice, # otherwise: the center the longest edge center, _radius = circumsphere(simplex) if point_in_simplex(center, simplex): point = np.average(simplex, axis=0) - edge = () + face = () else: distances = scipy.spatial.distance.pdist(simplex) distance_matrix = scipy.spatial.distance.squareform(distances) i, j = np.unravel_index(np.argmax(distance_matrix), distance_matrix.shape) point = (simplex[i, :] + simplex[j, :]) / 2 - edge = (i, j) + face = (i, j) if transform is not None: point = np.linalg.solve(transform, point) # undo the transform - return point, edge - - -def _face(simplex, x, eps=1e-8): - # Given simplex [(N + 1, N) array] and a point [(N,) array] return - # the face that x belongs to, or the empty tuple if not on a face. - # The face is returned as a tuple of integers, the vertices of - # 'simplex' that bound the face. - raise NotImplementedError() - x0 = np.asarray(simplex[0], dtype=float) - vectors = np.asarray(simplex[1:], dtype=float) - x0 - t = np.linalg.solve(vectors.T, x - x0) - if not all(t > -eps) and sum(t) < 1 + eps: - raise ValueError("{} not in simplex".format(x)) - points = set(range(len(simplex))) - where, = np.where(t < eps) - for i in where: - points.remove(i + 1) - if 1 - eps < sum(t) < 1 + eps: - points.remove(0) - if len(points) == len(simplex): - # At a general point in the simplex - return () - else: - return tuple(sorted(points)) + return point, face class ConvexHull(Domain): @@ -342,7 +348,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): # If we chose a point on a face (or edge) of 'subdomain' then we need to # add it to the subtriangulations of the neighboring subdomains. # The first 'ndim + 1' points are the boundary points of the subtriangulation - # because it is by definition a simplex. + # because it is a simplex by definition. if face and all(f < self.ndim + 1 for f in face): # Translate vertex indices from subtriangulation to triangulation face = [subdomain[f] for f in face] @@ -355,26 +361,17 @@ def insert_points(self, subdomain, n, *, _check_membership=True): return [tuple(p) for p in points], affected_subdomains - def insert_into(self, subdomain, x, *, _check_membership=True): + def insert(self, x, *, _check_membership=True): tri = self.triangulation - if _check_membership: - if subdomain not in tri.simplices: - raise ValueError("{} is not present in this domain".format(subdomain)) - if not tri.point_in_simplex(x, subdomain): - raise ValueError("{} is not present in this subdomain".format(x)) - - subtri = self._get_subtriangulation(subdomain) - subtri.add_point(x) - - affected_subdomains = set() - simplex = [tri.vertices[i] for i in subdomain] - face = [subdomain[i] for i in _face(simplex, x)] - if face: - for i in tri.containing(face): - sd = tri.simplices[i] - if sd != subdomain: - self._get_subtriangulation(sd).add_point(point) - affected_subdomains.add(sd) + # O(N) in the number of simplices + affected_subdomains = self.which_subdomains(x) + if not affected_subdomains: + raise ValueError("{} is not present in this domain".format(x)) + for subdomain in affected_subdomains: + subtri = self._get_subtriangulation(subdomain) + if x in subtri.vertices: # O(N) in the number of vertices + raise ValueError("{} exists in a subinterval already".format(x)) + subtri.add_point(x) return affected_subdomains @@ -396,7 +393,8 @@ def split_at(self, x, *, _check_membership=True): # Re-assign all the interior points of 'old_subdomains' to 'new_subdomains' # Keep the interior points as a set, because interior points on a shared face - # appear in the subtriangulations of both the neighboring simplices + # appear in the subtriangulations of both the neighboring simplices, and we + # don't want those points to appear twice. interior_points = set() for d in old_subdomains: try: @@ -404,18 +402,14 @@ def split_at(self, x, *, _check_membership=True): except KeyError: continue else: - # Get the points in the interior of the subtriangulation. - # Because a subtriangulation is always defined over a simplex, - # the first ndim + 1 points are the boundary points + # Get all points in the subtriangulation except the boundary + # points. Because a subtriangulation is always defined over + # a simplex, the first ndim + 1 points are the boundary points. interior = set(range(self.ndim + 1, len(subtri.vertices))) - # If the subtriangulation was in 'self.sub_domains' there must - # be at least 1 interior point. - assert interior interior = [subtri.vertices[i] for i in interior] # Remove 'x' if it is one of the points interior = [i for i in interior if i != x] - if interior: - interior_points.update(interior) + interior_points.update(interior) for p in interior_points: # Try to add 'p' to all the new subdomains. It may belong to more than 1 # if it lies on a subdomain boundary. @@ -438,12 +432,10 @@ def split_at(self, x, *, _check_membership=True): def which_subdomains(self, x): tri = self.triangulation # XXX: O(N) in the number of simplices - member = np.array([tri.point_in_simplex(x, s) for s in tri.simplices]) - n_simplices = member.sum() - if n_simplices < 1: + subdomains = [s for s in tri.simplices if tri.point_in_simplex(x, s)] + if not subdomains: raise ValueError("{} is not in the domain".format(x)) - which = np.argwhere(member).squeeze() - return [tri.simplices[i] for i in which] + return subdomains def transform(self, x): # XXX: implement this @@ -453,7 +445,7 @@ def neighbors(self, subdomain, n=1): tri = self.triangulation neighbors = {subdomain} for _ in range(n): - for face in tri.faces(simplices=neighbors): + for face in list(tri.faces(simplices=neighbors)): neighbors.update(tri.containing(face)) neighbors.remove(subdomain) return neighbors @@ -599,6 +591,56 @@ def __call__(self, domain, subdomain, codomain_bounds, data): return simplex_volume_in_embedding(pts) +class TriangleLoss(LossFunction): + @property + def n_neighbors(self): + return 1 + + def __call__(self, domain, subdomain, codomain_bounds, data): + assert isinstance(domain, ConvexHull) + loss = EmbeddedVolumeLoss() + neighbors = domain.neighbors(subdomain, self.n_neighbors) + if not neighbors: + return 0 + neighbor_points = set.union(*(set(n) - set(subdomain) for n in neighbors)) + + neighbor_points = [domain.triangulation.vertices[p] for p in neighbor_points] + + simplex = [domain.triangulation.vertices[p] for p in subdomain] + + z = data[simplex[0]] + if isinstance(z, Iterable): + s = [(*x, *data[x]) for x in simplex] + n = [(*x, *data[x]) for x in neighbor_points] + else: + s = [(*x, data[x]) for x in simplex] + n = [(*x, data[x]) for x in neighbor_points] + + return sum(simplex_volume_in_embedding([*s, neigh]) for neigh in n) / len( + neighbors + ) + + +class CurvatureLoss(LossFunction): + def __init__(self, exploration=0.05): + self.exploration = exploration + + @property + def n_neighbors(self): + return 1 + + def __call__(self, domain, subdomain, codomain_bounds, data): + dim = domain.ndim + + loss_input_volume = domain.volume(subdomain) + triangle_loss = TriangleLoss() + + loss_curvature = triangle_loss(domain, subdomain, codomain_bounds, data) + return ( + loss_curvature + self.exploration * loss_input_volume ** ((2 + dim) / dim) + ) ** (1 / (2 + dim)) + + def _scaled_loss(loss, domain, subdomain, codomain_bounds, data): subvolumes = domain.subvolumes(subdomain) max_relative_subvolume = max(subvolumes) / sum(subvolumes) @@ -651,9 +693,6 @@ def __init__(self, f, bounds, loss=None): self.queue.insert(subdomain, priority=loss) def ask(self, n, tell_pending=True): - if not tell_pending: - # XXX: handle this case - raise RuntimeError("tell_pending=False not supported yet") new_points = [] new_losses = [] for _ in range(n): @@ -667,12 +706,16 @@ def ask(self, n, tell_pending=True): self.queue.update(subdomain, priority=new_loss) new_points.append(new_point) new_losses.append(new_loss) + + if not tell_pending: + # XXX: (1) revert subdomain losses to their original values + # (2) remove points from 'self.domain' + raise NotImplementedError("tell_pending=False not supported yet") return new_points, new_losses def tell_pending(self, x): self.data[x] = None - subdomain = self.domain.which_subdomain(x) - affected_subdomains = self.domain.insert_into(subdomain, x) + affected_subdomains = self.domain.insert(x) for subdomain in affected_subdomains: loss = _scaled_loss( self.loss, self.domain, subdomain, self.codomain_bounds, self.data @@ -722,10 +765,11 @@ def tell_many(self, xs, ys): self.queue.insert(subdomain, priority=loss) if self.loss.n_neighbors > 0: - subdomains_to_update = sum( - (set(self.domain.neighbors(d, self.loss.n_neighbors)) for d in new), - set(), - ) + subdomains_to_update = set() + for subdomain in new: + subdomains_to_update.update( + self.domain.neighbors(subdomain, self.loss.n_neighbors) + ) subdomains_to_update -= new for subdomain in subdomains_to_update: loss = _scaled_loss( From 632faf9d3e98546c64f3a9f46f0e36ebfeb02cce Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Tue, 8 Oct 2019 17:47:16 +0200 Subject: [PATCH 016/105] update Interval class --- adaptive/learner/new_learnerND.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 1fbd89a7a..5b001d8b6 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -142,14 +142,14 @@ def insert_points(self, subdomain, n, *, _check_membership=True): def insert(self, x, *, _check_membership=True): if _check_membership: a, b = self.bounds - if not (a < x < b): + if not (a <= x <= b): raise ValueError("{} is outside of this interval".format(x)) p = self.points i = p.bisect_left(x) if p[i] == x: raise ValueError("{} exists in this interval already".format(x)) - subdomain = p[i - 1], p[i] + subdomain = (p[i - 1], p[i]) try: p = self.sub_intervals[subdomain] From bd574ab6d2cfbbcb6d3dc7a41ce6dd8762f8a9cd Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Tue, 8 Oct 2019 17:47:39 +0200 Subject: [PATCH 017/105] add 'remove' method to Domain --- adaptive/learner/new_learnerND.py | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 5b001d8b6..ee6e5a090 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -44,6 +44,20 @@ def insert(self, x): ValueError : if x is outside the domain or exists already """ + def remove(self, x): + """Remove 'x' from any subdomains to which it belongs. + + Returns + ------- + affected_subdomains : Iterable of subdomains + The subdomains from which 'x' was removed. + + Raises + ------ + ValueError : if x is a subdomain vertex + ValueError : if x is not in any subdomain + """ + def split_at(self, x): """Split the domain at 'x'. @@ -162,6 +176,26 @@ def insert(self, x, *, _check_membership=True): return [subdomain] + def remove(self, x, *, _check_membership=True): + if _check_membership: + a, b = self.bounds + if not (a <= x <= b): + raise ValueError("{} is outside of this interval".format(x)) + + p = self.points + i = p.bisect_left(x) + if p[i] == x: + raise ValueError("Cannot remove subdomain vertices") + subdomain = (p[i - 1], p[i]) + + try: + sub_points = self.sub_domains[subdomain] + except KeyError: + raise ValueError("{} not in any subdomain".format(x)) + else: + sub_points.remove(x) + return [subdomain] + def split_at(self, x, *, _check_membership=True): a, b = self.bounds if _check_membership: @@ -375,6 +409,24 @@ def insert(self, x, *, _check_membership=True): return affected_subdomains + def remove(self, x): + affected_subdomains = self.which_subdomains(x) + for subdomains in affected_subdomains: + # Check that it's not a vertex of the subdomain + if any(x == tri.vertices[i] for i in subdomain): + raise ValueError("Cannot remove subdomain vertices") + try: + subtri = self.sub_domains[subdomains] + except KeyError: + raise ValueError("{} not present in any subdomain".format(x)) + else: + if x not in subtri.vertices: + raise ValueError("{} not present in any subdomain".format(x)) + # Rebuild the subtriangulation from scratch + self.sub_domains[subdomain] = Triangulation( + [v for v in subtri.vertices if v != x] + ) + def split_at(self, x, *, _check_membership=True): tri = self.triangulation # XXX: O(N) in the number of simplices. As typically 'x' will have been From 3e27066fcfbfd205472ffea15a5d95942c91f229 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Tue, 8 Oct 2019 17:47:52 +0200 Subject: [PATCH 018/105] implement 'ask(tell_pending=False)' --- adaptive/learner/new_learnerND.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index ee6e5a090..c3bbf0b5c 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -760,9 +760,16 @@ def ask(self, n, tell_pending=True): new_losses.append(new_loss) if not tell_pending: - # XXX: (1) revert subdomain losses to their original values - # (2) remove points from 'self.domain' - raise NotImplementedError("tell_pending=False not supported yet") + + affected_subdomains = set() + for point in new_points: + del self.data[point] + affected_subdomains.update(self.domain.remove(point)) + for subdomain in affected_subdomains: + new_loss = _scaled_loss( + self.loss, self.domain, subdomain, self.codomain_bounds, self.data + ) + self.queue.update(subdomain, priority=new_loss) return new_points, new_losses def tell_pending(self, x): From 772a5f4a43b56685336e922152a630bb01ce7657 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Tue, 8 Oct 2019 17:54:39 +0200 Subject: [PATCH 019/105] flakify --- adaptive/learner/new_learnerND.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index c3bbf0b5c..07c1da75e 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -396,8 +396,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): return [tuple(p) for p in points], affected_subdomains def insert(self, x, *, _check_membership=True): - tri = self.triangulation - # O(N) in the number of simplices + # XXX: O(N) in the number of simplices affected_subdomains = self.which_subdomains(x) if not affected_subdomains: raise ValueError("{} is not present in this domain".format(x)) @@ -410,13 +409,14 @@ def insert(self, x, *, _check_membership=True): return affected_subdomains def remove(self, x): + # XXX: O(N) in the number of simplices affected_subdomains = self.which_subdomains(x) - for subdomains in affected_subdomains: + for subdomain in affected_subdomains: # Check that it's not a vertex of the subdomain - if any(x == tri.vertices[i] for i in subdomain): + if any(x == self.triangulation.vertices[i] for i in subdomain): raise ValueError("Cannot remove subdomain vertices") try: - subtri = self.sub_domains[subdomains] + subtri = self.sub_domains[subdomain] except KeyError: raise ValueError("{} not present in any subdomain".format(x)) else: @@ -650,7 +650,6 @@ def n_neighbors(self): def __call__(self, domain, subdomain, codomain_bounds, data): assert isinstance(domain, ConvexHull) - loss = EmbeddedVolumeLoss() neighbors = domain.neighbors(subdomain, self.n_neighbors) if not neighbors: return 0 From 053ba2406ec4e62595d521506516066b9d3d405c Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 11:11:51 +0200 Subject: [PATCH 020/105] small refactors and comments --- adaptive/learner/new_learnerND.py | 35 +++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 07c1da75e..9092f2ba0 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -130,13 +130,13 @@ def __init__(self, a, b): self.ndim = 1 def insert_points(self, subdomain, n, *, _check_membership=True): - assert n > 0 + if n <= 0: + raise ValueError("n must be positive") if _check_membership and subdomain not in self: raise ValueError("{} is not present in this interval".format(subdomain)) try: p = self.sub_intervals[subdomain] except KeyError: # No points yet in the interior of this subdomain - a, b = subdomain p = SortedList(subdomain) self.sub_intervals[subdomain] = p @@ -199,13 +199,13 @@ def remove(self, x, *, _check_membership=True): def split_at(self, x, *, _check_membership=True): a, b = self.bounds if _check_membership: - if not (a < x < b): + if not (a <= x <= b): raise ValueError("Can only split at points within the interval") - if x in self.points: - raise ValueError("Cannot split at an existing point") p = self.points i = p.bisect_left(x) + if p[i] == x: + raise ValueError("Cannot split at an existing point") a, b = old_interval = p[i - 1], p[i] new_intervals = [(a, x), (x, b)] @@ -214,12 +214,16 @@ def split_at(self, x, *, _check_membership=True): sub_points = self.sub_intervals.pop(old_interval) except KeyError: pass - else: # update sub_intervals + else: + # Update subintervals for ival in new_intervals: new_sub_points = SortedList(sub_points.irange(*ival)) if x not in new_sub_points: + # This should add 'x' to the start or the end new_sub_points.add(x) if len(new_sub_points) > 2: + # We don't store subintervals if they don't contain + # any points in their interior. self.sub_intervals[ival] = new_sub_points return [old_interval], new_intervals @@ -230,16 +234,17 @@ def which_subdomains(self, x): raise ValueError("{} is outside the interval".format(x)) p = self.points i = p.bisect_left(x) - if p[i] != x: # general point inside a subinterval + if p[i] != x: + # general point inside a subinterval return [(p[i - 1], p[i])] - else: # boundary of a subinterval + else: + # boundary of a subinterval neighbors = [] if i > 0: neighbors.append((p[i - 1], p[i])) if i < len(p) - 1: neighbors.append((p[i], p[i + 1])) return neighbors - return [(p[i], p[i + 1])] def __contains__(self, subdomain): a, b = subdomain @@ -362,13 +367,17 @@ def _get_subtriangulation(self, subdomain): return subtri def insert_points(self, subdomain, n, *, _check_membership=True): - assert n > 0 + if n <= 0: + raise ValueError("n must be positive") tri = self.triangulation if _check_membership and subdomain not in tri.simplices: raise ValueError("{} is not present in this domain".format(subdomain)) subtri = self._get_subtriangulation(subdomain) + # Choose the largest volume sub-simplex and insert a point into it. + # Also insert the point into neighboring subdomains if it was chosen + # on the subdomain boundary. points = [] affected_subdomains = {subdomain} for _ in range(n): @@ -376,13 +385,13 @@ def insert_points(self, subdomain, n, *, _check_membership=True): largest_simplex = max(subtri.simplices, key=subtri.volume) simplex_vertices = np.array([subtri.vertices[s] for s in largest_simplex]) point, face = _choose_point_in_simplex(simplex_vertices) - face = [largest_simplex[i] for i in face] points.append(point) subtri.add_point(point, largest_simplex) # If we chose a point on a face (or edge) of 'subdomain' then we need to # add it to the subtriangulations of the neighboring subdomains. - # The first 'ndim + 1' points are the boundary points of the subtriangulation - # because it is a simplex by definition. + # This check relies on the fact that the first 'ndim + 1' points in the + # subtriangulation are the boundary points. + face = [largest_simplex[i] for i in face] if face and all(f < self.ndim + 1 for f in face): # Translate vertex indices from subtriangulation to triangulation face = [subdomain[f] for f in face] From 5e36e24356eab1e01cdf0f7a0fcfa959b79e0fa7 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 13:25:01 +0200 Subject: [PATCH 021/105] rename bound_points to boundary_points --- adaptive/learner/new_learnerND.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 9092f2ba0..996bf372a 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -712,16 +712,17 @@ class LearnerND(BaseLearner): def __init__(self, f, bounds, loss=None): if len(bounds) == 1: - (a, b), = (bound_points,) = bounds + (a, b), = (boundary_points,) = bounds self.domain = Interval(a, b) self.loss = loss or DistanceLoss() self.ndim = 1 else: - bound_points = sorted(tuple(p) for p in itertools.product(*bounds)) - self.domain = ConvexHull(scipy.spatial.ConvexHull(bound_points)) + boundary_points = sorted(tuple(p) for p in itertools.product(*bounds)) + self.domain = ConvexHull(scipy.spatial.ConvexHull(boundary_points)) self.loss = loss or EmbeddedVolumeLoss() - self.ndim = len(bound_points[0]) + self.ndim = len(boundary_points[0]) + self.boundary_points = boundary_points self.queue = Queue() self.data = dict() self.function = f From fd6bce47895a4742719bc5c02b5d04f7891efde2 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 13:25:43 +0200 Subject: [PATCH 022/105] do not evaluate boundary points in __init__ --- adaptive/learner/new_learnerND.py | 75 ++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 996bf372a..e464544b8 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -725,35 +725,73 @@ def __init__(self, f, bounds, loss=None): self.boundary_points = boundary_points self.queue = Queue() self.data = dict() + self.need_loss_update_factor = 1.1 self.function = f + self.n_asked = 0 + + self._initialized = False + self.vdim = None + self.codomain_bounds = None + self.codomain_scale_at_last_update = None - # Evaluate boundary points right away to avoid handling edge - # cases in the ask and tell logic - for x in bound_points: - self.data[x] = f(x) + # No points are yet evaluated, so we assign equal loss to each subdomain. + # We will not split the subdomains until we have data for all the boundary + # points. + for subdomain in self.domain.subdomains(): + self.queue.insert(subdomain, priority=1) - vals = list(self.data.values()) + def _finalize_initialization(self): + assert all(self.data.get(x) is not None for x in self.boundary_points) + + vals = [x for x in self.data.values() if x is not None] codomain_min = np.min(vals, axis=0) codomain_max = np.max(vals, axis=0) self.codomain_bounds = (codomain_min, codomain_max) self.codomain_scale_at_last_update = codomain_max - codomain_min - self.need_loss_update_factor = 1.1 - try: - self.vdim = len(np.squeeze(self.data[x])) + self.vdim = len(np.squeeze(self.data[self.boundary_points[0]])) except TypeError: # Trying to take the length of a number self.vdim = 1 + for x, y in self.data.items(): + if y is None: + continue + if x in self.boundary_points: + continue + self.domain.split_at(x) + + self.queue = Queue() for subdomain in self.domain.subdomains(): - # NOTE: could just call 'self.loss' here, as we *know* that each - # subdomain does not have internal points. loss = _scaled_loss( self.loss, self.domain, subdomain, self.codomain_bounds, self.data ) self.queue.insert(subdomain, priority=loss) + self._initialized = True + def ask(self, n, tell_pending=True): + loss = self.loss if self._initialized else lambda *_: 1 + if self.n_asked >= len(self.boundary_points): + points, losses = self._ask(n, tell_pending, loss) + else: + points = self.boundary_points[self.n_asked:self.n_asked + n] + losses = [float('inf')] * len(points) + if tell_pending: + for x in points: + self.data[x] = None + n_extra = n - len(points) + if n_extra > 0: + extra_points, extra_losses = self._ask(n_extra, tell_pending, loss) + points += tuple(extra_points) + losses += tuple(extra_losses) + + if tell_pending: + self.n_asked += n + + return points, losses + + def _ask(self, n, tell_pending, loss): new_points = [] new_losses = [] for _ in range(n): @@ -762,21 +800,20 @@ def ask(self, n, tell_pending=True): self.data[new_point] = None for subdomain in affected_subdomains: new_loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data + loss, self.domain, subdomain, self.codomain_bounds, self.data ) self.queue.update(subdomain, priority=new_loss) new_points.append(new_point) new_losses.append(new_loss) if not tell_pending: - affected_subdomains = set() for point in new_points: del self.data[point] affected_subdomains.update(self.domain.remove(point)) for subdomain in affected_subdomains: new_loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data + loss, self.domain, subdomain, self.codomain_bounds, self.data ) self.queue.update(subdomain, priority=new_loss) return new_points, new_losses @@ -794,6 +831,11 @@ def tell_many(self, xs, ys): for x, y in zip(xs, ys): self.data[x] = y + if not self._initialized: + if all(self.data.get(x) is not None for x in self.boundary_points): + self._finalize_initialization() + return + need_loss_update = self._update_codomain_bounds(ys) old = set() @@ -876,11 +918,12 @@ def remove_unfinished(self): self.data = {k: v for k, v in self.data.items() if v is not None} cleared_subdomains = self.domain.clear_subdomains() # Subdomains who had internal points removed need their losses updating + loss = self.loss if self._initialized else lambda *_: 1 for subdomain in cleared_subdomains: - loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data + new_loss = _scaled_loss( + loss, self.domain, subdomain, self.codomain_bounds, self.data ) - self.queue.update(subdomain, priority=loss) + self.queue.update(subdomain, priority=new_loss) def loss(self): _, loss = self.queue.peek() From be07de9e81b58b0901e7a227cc04b7f59c2db2d2 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 13:35:31 +0200 Subject: [PATCH 023/105] put pending points in separate data structure from data --- adaptive/learner/new_learnerND.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index e464544b8..d382fb506 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -724,7 +724,8 @@ def __init__(self, f, bounds, loss=None): self.boundary_points = boundary_points self.queue = Queue() - self.data = dict() + self.data = dict() # Contains the evaluated data only + self.pending_points = set() self.need_loss_update_factor = 1.1 self.function = f self.n_asked = 0 @@ -741,9 +742,9 @@ def __init__(self, f, bounds, loss=None): self.queue.insert(subdomain, priority=1) def _finalize_initialization(self): - assert all(self.data.get(x) is not None for x in self.boundary_points) + assert all(x in self.data for x in self.boundary_points) - vals = [x for x in self.data.values() if x is not None] + vals = list(self.data.values()) codomain_min = np.min(vals, axis=0) codomain_max = np.max(vals, axis=0) self.codomain_bounds = (codomain_min, codomain_max) @@ -754,9 +755,7 @@ def _finalize_initialization(self): except TypeError: # Trying to take the length of a number self.vdim = 1 - for x, y in self.data.items(): - if y is None: - continue + for x in self.data: if x in self.boundary_points: continue self.domain.split_at(x) @@ -779,7 +778,7 @@ def ask(self, n, tell_pending=True): losses = [float('inf')] * len(points) if tell_pending: for x in points: - self.data[x] = None + self.pending_points.add(x) n_extra = n - len(points) if n_extra > 0: extra_points, extra_losses = self._ask(n_extra, tell_pending, loss) @@ -797,19 +796,20 @@ def _ask(self, n, tell_pending, loss): for _ in range(n): subdomain, _ = self.queue.peek() (new_point,), affected_subdomains = self.domain.insert_points(subdomain, 1) - self.data[new_point] = None + self.pending_points.add(new_point) for subdomain in affected_subdomains: new_loss = _scaled_loss( loss, self.domain, subdomain, self.codomain_bounds, self.data ) self.queue.update(subdomain, priority=new_loss) new_points.append(new_point) + # XXX: this is not correct; need to set loss to loss of 'subdomain' new_losses.append(new_loss) if not tell_pending: affected_subdomains = set() for point in new_points: - del self.data[point] + self.pending_points.remove(point) affected_subdomains.update(self.domain.remove(point)) for subdomain in affected_subdomains: new_loss = _scaled_loss( @@ -819,7 +819,7 @@ def _ask(self, n, tell_pending, loss): return new_points, new_losses def tell_pending(self, x): - self.data[x] = None + self.pending_points.add(x) affected_subdomains = self.domain.insert(x) for subdomain in affected_subdomains: loss = _scaled_loss( @@ -832,7 +832,7 @@ def tell_many(self, xs, ys): self.data[x] = y if not self._initialized: - if all(self.data.get(x) is not None for x in self.boundary_points): + if all(x in self.data for x in self.boundary_points): self._finalize_initialization() return @@ -915,7 +915,7 @@ def _update_codomain_bounds(self, ys): return False def remove_unfinished(self): - self.data = {k: v for k, v in self.data.items() if v is not None} + self.pending_points = set() cleared_subdomains = self.domain.clear_subdomains() # Subdomains who had internal points removed need their losses updating loss = self.loss if self._initialized else lambda *_: 1 From d07b9c3e4d974d0ac5026943dfca26cabe296098 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 14:21:22 +0200 Subject: [PATCH 024/105] replace '_scaled_loss' with 'priority' Be more explicit about the difference between the loss and the priority --- adaptive/learner/new_learnerND.py | 83 ++++++++++--------------------- 1 file changed, 25 insertions(+), 58 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index d382fb506..da453e1b4 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -701,13 +701,6 @@ def __call__(self, domain, subdomain, codomain_bounds, data): ) ** (1 / (2 + dim)) -def _scaled_loss(loss, domain, subdomain, codomain_bounds, data): - subvolumes = domain.subvolumes(subdomain) - max_relative_subvolume = max(subvolumes) / sum(subvolumes) - L_0 = loss(domain, subdomain, codomain_bounds, data) - return max_relative_subvolume * L_0 - - class LearnerND(BaseLearner): def __init__(self, f, bounds, loss=None): @@ -735,11 +728,8 @@ def __init__(self, f, bounds, loss=None): self.codomain_bounds = None self.codomain_scale_at_last_update = None - # No points are yet evaluated, so we assign equal loss to each subdomain. - # We will not split the subdomains until we have data for all the boundary - # points. for subdomain in self.domain.subdomains(): - self.queue.insert(subdomain, priority=1) + self.queue.insert(subdomain, priority=self.priority(subdomain)) def _finalize_initialization(self): assert all(x in self.data for x in self.boundary_points) @@ -762,26 +752,33 @@ def _finalize_initialization(self): self.queue = Queue() for subdomain in self.domain.subdomains(): - loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data - ) - self.queue.insert(subdomain, priority=loss) + self.queue.insert(subdomain, priority=self.priority(subdomain)) self._initialized = True + def priority(self, subdomain): + if self._initialized: + L_0 = self.loss(self.domain, subdomain, self.codomain_bounds, self.data) + else: + # Before we're initialized we don't have enough data to calculate losses, + # so we just assign the same loss to each subdomain + L_0 = 1 + subvolumes = self.domain.subvolumes(subdomain) + return (max(subvolumes) / sum(subvolumes)) * L_0 + def ask(self, n, tell_pending=True): - loss = self.loss if self._initialized else lambda *_: 1 if self.n_asked >= len(self.boundary_points): - points, losses = self._ask(n, tell_pending, loss) + points, losses = self._ask(n, tell_pending) else: points = self.boundary_points[self.n_asked:self.n_asked + n] + # The boundary points should always be evaluated with the highest priority losses = [float('inf')] * len(points) if tell_pending: for x in points: self.pending_points.add(x) n_extra = n - len(points) if n_extra > 0: - extra_points, extra_losses = self._ask(n_extra, tell_pending, loss) + extra_points, extra_losses = self._ask(n_extra, tell_pending) points += tuple(extra_points) losses += tuple(extra_losses) @@ -790,7 +787,7 @@ def ask(self, n, tell_pending=True): return points, losses - def _ask(self, n, tell_pending, loss): + def _ask(self, n, tell_pending): new_points = [] new_losses = [] for _ in range(n): @@ -798,9 +795,7 @@ def _ask(self, n, tell_pending, loss): (new_point,), affected_subdomains = self.domain.insert_points(subdomain, 1) self.pending_points.add(new_point) for subdomain in affected_subdomains: - new_loss = _scaled_loss( - loss, self.domain, subdomain, self.codomain_bounds, self.data - ) + new_loss = self.priority(subdomain) self.queue.update(subdomain, priority=new_loss) new_points.append(new_point) # XXX: this is not correct; need to set loss to loss of 'subdomain' @@ -812,20 +807,14 @@ def _ask(self, n, tell_pending, loss): self.pending_points.remove(point) affected_subdomains.update(self.domain.remove(point)) for subdomain in affected_subdomains: - new_loss = _scaled_loss( - loss, self.domain, subdomain, self.codomain_bounds, self.data - ) - self.queue.update(subdomain, priority=new_loss) + self.queue.update(subdomain, priority=self.priority(subdomain)) return new_points, new_losses def tell_pending(self, x): self.pending_points.add(x) affected_subdomains = self.domain.insert(x) for subdomain in affected_subdomains: - loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data - ) - self.queue.update(subdomain, priority=loss) + self.queue.update(subdomain, priority=self.priority(subdomain)) def tell_many(self, xs, ys): for x, y in zip(xs, ys): @@ -853,26 +842,14 @@ def tell_many(self, xs, ys): if need_loss_update: # Need to recalculate all losses anyway self.queue = Queue( - ( - subdomain, - _scaled_loss( - self.loss, - self.domain, - subdomain, - self.codomain_bounds, - self.data, - ), - ) + (subdomain, self.priority(subdomain)) for subdomain in itertools.chain(self.queue.items(), new) ) else: # Compute the losses for the new subdomains and re-compute the # losses for the neighboring subdomains, if necessary. for subdomain in new: - loss = _scaled_loss( - self.loss, self.domain, subdomain, self.codomain_bounds, self.data - ) - self.queue.insert(subdomain, priority=loss) + self.queue.insert(subdomain, priority=self.priority(subdomain)) if self.loss.n_neighbors > 0: subdomains_to_update = set() @@ -882,14 +859,7 @@ def tell_many(self, xs, ys): ) subdomains_to_update -= new for subdomain in subdomains_to_update: - loss = _scaled_loss( - self.loss, - self.domain, - subdomain, - self.codomain_bounds, - self.data, - ) - self.queue.update(subdomain, priority=loss) + self.queue.update(subdomain, priority=self.priority(subdomain)) def _update_codomain_bounds(self, ys): mn, mx = self.codomain_bounds @@ -917,15 +887,12 @@ def _update_codomain_bounds(self, ys): def remove_unfinished(self): self.pending_points = set() cleared_subdomains = self.domain.clear_subdomains() - # Subdomains who had internal points removed need their losses updating - loss = self.loss if self._initialized else lambda *_: 1 + # Subdomains who had internal points removed need their priority updating for subdomain in cleared_subdomains: - new_loss = _scaled_loss( - loss, self.domain, subdomain, self.codomain_bounds, self.data - ) - self.queue.update(subdomain, priority=new_loss) + self.queue.update(subdomain, priority=self.priority(subdomain)) def loss(self): + # XXX: update this to return the *loss*, rather than the priority _, loss = self.queue.peek() return loss From df667df6545fc7e6aa6fbd801699e11c82e952e1 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 14:55:03 +0200 Subject: [PATCH 025/105] rename 'self.loss' to 'self.loss_function' --- adaptive/learner/new_learnerND.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index da453e1b4..7d4e5b60b 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -707,12 +707,12 @@ def __init__(self, f, bounds, loss=None): if len(bounds) == 1: (a, b), = (boundary_points,) = bounds self.domain = Interval(a, b) - self.loss = loss or DistanceLoss() + self.loss_function = loss or DistanceLoss() self.ndim = 1 else: boundary_points = sorted(tuple(p) for p in itertools.product(*bounds)) self.domain = ConvexHull(scipy.spatial.ConvexHull(boundary_points)) - self.loss = loss or EmbeddedVolumeLoss() + self.loss_function = loss or EmbeddedVolumeLoss() self.ndim = len(boundary_points[0]) self.boundary_points = boundary_points @@ -758,7 +758,7 @@ def _finalize_initialization(self): def priority(self, subdomain): if self._initialized: - L_0 = self.loss(self.domain, subdomain, self.codomain_bounds, self.data) + L_0 = self.loss_function(self.domain, subdomain, self.codomain_bounds, self.data) else: # Before we're initialized we don't have enough data to calculate losses, # so we just assign the same loss to each subdomain @@ -851,11 +851,11 @@ def tell_many(self, xs, ys): for subdomain in new: self.queue.insert(subdomain, priority=self.priority(subdomain)) - if self.loss.n_neighbors > 0: + if self.loss_function.n_neighbors > 0: subdomains_to_update = set() for subdomain in new: subdomains_to_update.update( - self.domain.neighbors(subdomain, self.loss.n_neighbors) + self.domain.neighbors(subdomain, self.loss_function.n_neighbors) ) subdomains_to_update -= new for subdomain in subdomains_to_update: From 5377438944497ca400259fe4ff04d86d2d81f2dd Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 14:59:44 +0200 Subject: [PATCH 026/105] store priorities as (priority, loss) This allows us to simply implement self.loss() by iterating over the priorities in the queue --- adaptive/learner/new_learnerND.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 7d4e5b60b..c571a3820 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -560,6 +560,10 @@ def items(self): "Return an iterator over the items in the queue in priority order." return reversed(self._queue.values()) + def priorities(self): + "Return an iterator over the priorities in the queue." + return reversed(self._queue) + def peek(self): "Return the item and priority at the front of the queue." ((priority, _), item) = self._queue.peekitem() @@ -764,7 +768,9 @@ def priority(self, subdomain): # so we just assign the same loss to each subdomain L_0 = 1 subvolumes = self.domain.subvolumes(subdomain) - return (max(subvolumes) / sum(subvolumes)) * L_0 + # We use this tuple as the priority, and we also store the loss directly so that + # we can easily look it up later. + return ((max(subvolumes) / sum(subvolumes)) * L_0, L_0) def ask(self, n, tell_pending=True): if self.n_asked >= len(self.boundary_points): @@ -891,10 +897,18 @@ def remove_unfinished(self): for subdomain in cleared_subdomains: self.queue.update(subdomain, priority=self.priority(subdomain)) - def loss(self): - # XXX: update this to return the *loss*, rather than the priority - _, loss = self.queue.peek() - return loss + def loss(self, real=True): + if real: + # NOTE: O(N) in the number of subintervals, but with a low prefactor. + # We have to do this because the queue is sorted in *priority* + # order, and it's possible that a subinterval with a high loss + # may have a low priority (if there are many pending points). + return max(loss for _, loss in self.queue.priorities()) + else: + # This depends on the implementation of 'self.priority'. Currently + # it returns a tuple (priority, loss). + _, (priority, _) = self.queue.peek() + return priority def plot(self, **kwargs): if isinstance(self.domain, Interval): From fec432ba03f1114e1ec5d885a61a1db16d95c064 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 15:32:14 +0200 Subject: [PATCH 027/105] docstring update --- adaptive/learner/new_learnerND.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index c571a3820..7be8b49a0 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -35,9 +35,7 @@ def insert(self, x): Returns ------- affected_subdomains : Iterable of subdomains - If some points were added on the boundary of 'subdomain' - then they will also have been added to the neighboring - subdomains. + The subdomains to which 'x' was added. Raises ------ From 61bf9dfdc978e181bb618add191540ed2292aa86 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 15:32:28 +0200 Subject: [PATCH 028/105] correctly compute point priorities --- adaptive/learner/new_learnerND.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 7be8b49a0..d044d18d6 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -793,17 +793,15 @@ def ask(self, n, tell_pending=True): def _ask(self, n, tell_pending): new_points = [] - new_losses = [] + point_priorities = [] for _ in range(n): subdomain, _ = self.queue.peek() (new_point,), affected_subdomains = self.domain.insert_points(subdomain, 1) self.pending_points.add(new_point) for subdomain in affected_subdomains: - new_loss = self.priority(subdomain) - self.queue.update(subdomain, priority=new_loss) + self.queue.update(subdomain, priority=self.priority(subdomain)) new_points.append(new_point) - # XXX: this is not correct; need to set loss to loss of 'subdomain' - new_losses.append(new_loss) + point_priorities.append(self.priority(subdomain)) if not tell_pending: affected_subdomains = set() @@ -812,7 +810,7 @@ def _ask(self, n, tell_pending): affected_subdomains.update(self.domain.remove(point)) for subdomain in affected_subdomains: self.queue.update(subdomain, priority=self.priority(subdomain)) - return new_points, new_losses + return new_points, point_priorities def tell_pending(self, x): self.pending_points.add(x) From ce4d9d2e03135d3d53130335b4934affe821d08d Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 15:49:38 +0200 Subject: [PATCH 029/105] refactor comments and fix minor logic bug --- adaptive/learner/new_learnerND.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index d044d18d6..8621a854c 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -736,6 +736,8 @@ def __init__(self, f, bounds, loss=None): def _finalize_initialization(self): assert all(x in self.data for x in self.boundary_points) + self._initialized = True + vals = list(self.data.values()) codomain_min = np.min(vals, axis=0) codomain_max = np.max(vals, axis=0) @@ -756,14 +758,13 @@ def _finalize_initialization(self): for subdomain in self.domain.subdomains(): self.queue.insert(subdomain, priority=self.priority(subdomain)) - self._initialized = True def priority(self, subdomain): if self._initialized: L_0 = self.loss_function(self.domain, subdomain, self.codomain_bounds, self.data) else: - # Before we're initialized we don't have enough data to calculate losses, - # so we just assign the same loss to each subdomain + # Before we have all the boundary points we can't calculate losses because we + # do not have enough data. We just assign a constant loss to each subdomain. L_0 = 1 subvolumes = self.domain.subvolumes(subdomain) # We use this tuple as the priority, and we also store the loss directly so that @@ -842,14 +843,14 @@ def tell_many(self, xs, ys): self.queue.remove(subdomain) if need_loss_update: - # Need to recalculate all losses anyway + # Need to recalculate all priorities anyway self.queue = Queue( (subdomain, self.priority(subdomain)) for subdomain in itertools.chain(self.queue.items(), new) ) else: - # Compute the losses for the new subdomains and re-compute the - # losses for the neighboring subdomains, if necessary. + # Compute the priorities for the new subdomains and re-compute the + # priorities for the neighboring subdomains, if necessary. for subdomain in new: self.queue.insert(subdomain, priority=self.priority(subdomain)) From ccabce38bd846bbed5be16f537dfd7b2279a6d96 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 16:12:06 +0200 Subject: [PATCH 030/105] introduce a dict of losses as an optimization --- adaptive/learner/new_learnerND.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 8621a854c..03acafc27 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -725,6 +725,15 @@ def __init__(self, f, bounds, loss=None): self.function = f self.n_asked = 0 + # As an optimization we keep a map from subdomain to loss. + # This is updated in 'self.priority' whenever the loss function is evaluated + # for a new subdomain. 'self.tell_many' removes subdomains from here when + # they are split, and also removes neighboring subdomains from here (to force + # a loss function recomputation) + self.losses = dict() + + # We must wait until the boundary points have been evaluated before we can + # set these attributes. self._initialized = False self.vdim = None self.codomain_bounds = None @@ -758,10 +767,13 @@ def _finalize_initialization(self): for subdomain in self.domain.subdomains(): self.queue.insert(subdomain, priority=self.priority(subdomain)) - def priority(self, subdomain): if self._initialized: - L_0 = self.loss_function(self.domain, subdomain, self.codomain_bounds, self.data) + if subdomain in self.losses: + L_0 = self.losses[subdomain] + else: + L_0 = self.loss_function(self.domain, subdomain, self.codomain_bounds, self.data) + self.losses[subdomain] = L_0 else: # Before we have all the boundary points we can't calculate losses because we # do not have enough data. We just assign a constant loss to each subdomain. @@ -841,6 +853,7 @@ def tell_many(self, xs, ys): for subdomain in old: self.queue.remove(subdomain) + del self.losses[subdomain] if need_loss_update: # Need to recalculate all priorities anyway @@ -862,6 +875,7 @@ def tell_many(self, xs, ys): ) subdomains_to_update -= new for subdomain in subdomains_to_update: + del self.losses[subdomain] # Force loss recomputation self.queue.update(subdomain, priority=self.priority(subdomain)) def _update_codomain_bounds(self, ys): From 1434f69f1d5f816f36e4393fd410e5e2d4e3cd20 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 16:16:32 +0200 Subject: [PATCH 031/105] clarify the ordering of queried items and priorities --- adaptive/learner/new_learnerND.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 03acafc27..99b1fa404 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -555,11 +555,11 @@ def __init__(self, entries=()): self._items = SortedList(((v, k) for k, v in self._queue.items())) def items(self): - "Return an iterator over the items in the queue in priority order." + "Return an iterator over the items in the queue in arbitrary order." return reversed(self._queue.values()) def priorities(self): - "Return an iterator over the priorities in the queue." + "Return an iterator over the priorities in the queue in arbitrary order." return reversed(self._queue) def peek(self): From cdc6d498e7d9cd8308c3605727a75e5625d2cc46 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 16:27:02 +0200 Subject: [PATCH 032/105] don't store a tuple as priority, use 'self.losses' from with 'self.loss' --- adaptive/learner/new_learnerND.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 99b1fa404..e739a0ba9 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -778,10 +778,9 @@ def priority(self, subdomain): # Before we have all the boundary points we can't calculate losses because we # do not have enough data. We just assign a constant loss to each subdomain. L_0 = 1 + subvolumes = self.domain.subvolumes(subdomain) - # We use this tuple as the priority, and we also store the loss directly so that - # we can easily look it up later. - return ((max(subvolumes) / sum(subvolumes)) * L_0, L_0) + return (max(subvolumes) / sum(subvolumes)) * L_0 def ask(self, n, tell_pending=True): if self.n_asked >= len(self.boundary_points): @@ -914,11 +913,11 @@ def loss(self, real=True): # We have to do this because the queue is sorted in *priority* # order, and it's possible that a subinterval with a high loss # may have a low priority (if there are many pending points). - return max(loss for _, loss in self.queue.priorities()) + return max(self.losses.values()) else: # This depends on the implementation of 'self.priority'. Currently # it returns a tuple (priority, loss). - _, (priority, _) = self.queue.peek() + _, priority = self.queue.peek() return priority def plot(self, **kwargs): From 166a912799e39b1c4c7cd7eed596ca841074d134 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 16:27:35 +0200 Subject: [PATCH 033/105] comments --- adaptive/learner/new_learnerND.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index e739a0ba9..2cd621cb8 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -813,6 +813,9 @@ def _ask(self, n, tell_pending): for subdomain in affected_subdomains: self.queue.update(subdomain, priority=self.priority(subdomain)) new_points.append(new_point) + # TODO: don't call 'priority' again here: we already called it above, we just + # need to identify 'subdomin' within 'affected_subdomains'. Maybe change + # the API of 'Domain.insert_points' to not return 'subdomain'... point_priorities.append(self.priority(subdomain)) if not tell_pending: @@ -822,6 +825,7 @@ def _ask(self, n, tell_pending): affected_subdomains.update(self.domain.remove(point)) for subdomain in affected_subdomains: self.queue.update(subdomain, priority=self.priority(subdomain)) + return new_points, point_priorities def tell_pending(self, x): From ba5d207b852b79b7946ef0cc9bcc7da34a550d83 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 17:21:00 +0200 Subject: [PATCH 034/105] make points a tuple as they should be --- adaptive/learner/new_learnerND.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 2cd621cb8..6cd7fc133 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -329,7 +329,7 @@ def _choose_point_in_simplex(simplex, transform=None): if transform is not None: point = np.linalg.solve(transform, point) # undo the transform - return point, face + return tuple(point), face class ConvexHull(Domain): From 52fed32284444e94e601d1a0ad1f5a9a888fbf9e Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 18:34:36 +0200 Subject: [PATCH 035/105] correctly update pending points and data --- adaptive/learner/new_learnerND.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 6cd7fc133..49d9d1921 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -835,8 +835,8 @@ def tell_pending(self, x): self.queue.update(subdomain, priority=self.priority(subdomain)) def tell_many(self, xs, ys): - for x, y in zip(xs, ys): - self.data[x] = y + self.data.update(zip(xs, ys)) + self.pending_points -= set(xs) if not self._initialized: if all(x in self.data for x in self.boundary_points): From 5ea9b563a1a36c2ff5b9a89f47b23919fd091291 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 9 Oct 2019 18:36:36 +0200 Subject: [PATCH 036/105] update example notebook --- proof-of-concept-learner.ipynb | 185 +++++++++++++++++++++++++++++++-- 1 file changed, 174 insertions(+), 11 deletions(-) diff --git a/proof-of-concept-learner.ipynb b/proof-of-concept-learner.ipynb index f6bc8d15a..19fcc1b41 100644 --- a/proof-of-concept-learner.ipynb +++ b/proof-of-concept-learner.ipynb @@ -7,10 +7,14 @@ "outputs": [], "source": [ "import random\n", + "import math\n", + "import numpy as np\n", "\n", "from sortedcontainers import SortedList\n", "import adaptive\n", - "from adaptive.learner.new_learnerND import LearnerND\n", + "from adaptive.learner.new_learnerND import LearnerND, CurvatureLoss\n", + "\n", + "import holoviews as hv\n", "\n", "adaptive.notebook_extension()" ] @@ -55,15 +59,6 @@ "adaptive.runner.simple(learner2, goal=lambda l: len(l.data) > 50)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "learner.plot() + learner2.plot()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -135,7 +130,175 @@ "metadata": {}, "outputs": [], "source": [ - "runner.live_plot()" + "runner.live_plot(update_interval=0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Works for ND input also" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def ring_of_fire(xy, d=0.75):\n", + " a = 0.2\n", + " x, y = xy\n", + " return x + math.exp(-(x ** 2 + y ** 2 - d ** 2) ** 2 / a ** 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner3 = adaptive.LearnerND(ring_of_fire, [(-1, 1), (-1, 1)],\n", + " loss_per_simplex=adaptive.learner.learnerND.curvature_loss_function())\n", + "adaptive.runner.simple(learner3, goal=lambda l: len(l.data) > 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner4 = adaptive.Learner2D(ring_of_fire, [(-1, 1), (-1, 1)])\n", + "adaptive.runner.simple(learner4, goal=lambda l: len(l.data) > 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner5 = LearnerND(ring_of_fire, [(-1, 1), (-1, 1)])\n", + "adaptive.runner.simple(learner5, goal=lambda l: len(l.data) > 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner6 = LearnerND(ring_of_fire, [(-1, 1), (-1, 1)], loss=CurvatureLoss())\n", + "adaptive.runner.simple(learner6, goal=lambda l: len(l.data) > 50)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the points are not exactly the same as for the old LearnerND.\n", + "\n", + "This is because for the moment it is illegal to insert points onto the boundary of an existing simplex.\n", + "\n", + "We shall lift this restriction soon." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner3.plot(tri_alpha=0.5) + learner4.plot(tri_alpha=0.5) + learner5.plot(tri_alpha=0.5) + learner6.plot(tri_alpha=0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%time adaptive.runner.simple(learner3, goal=lambda l: len(l.data) > 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%time adaptive.runner.simple(learner4, goal=lambda l: len(l.data) > 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%time adaptive.runner.simple(learner5, goal=lambda l: len(l.data) > 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%time adaptive.runner.simple(learner6, goal=lambda l: len(l.data) > 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner3.plot(tri_alpha=0.5) + learner4.plot(tri_alpha=0.5) + learner5.plot(tri_alpha=0.5) + learner6.plot(tri_alpha=0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "def ring_of_fire(xy, d=0.75):\n", + " time.sleep(0.5)\n", + " a = 0.2\n", + " x, y = xy\n", + " return x + math.exp(-(x ** 2 + y ** 2 - d ** 2) ** 2 / a ** 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner6 = LearnerND(ring_of_fire, [(-1, 1), (-1, 1)], loss=CurvatureLoss())\n", + "runner = adaptive.Runner(learner6, log=True, goal=lambda l: len(l.data) > 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runner.live_info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runner.live_plot(plotter=lambda l: l.plot(tri_alpha=0.5), update_interval=0.5)" ] } ], From 224d9ce6ca1098f53939b3b19425a595cee60e04 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Fri, 11 Oct 2019 15:36:44 +0200 Subject: [PATCH 037/105] implement methods 'vertices' and 'subpoints' for 'Domain' --- adaptive/learner/new_learnerND.py | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 49d9d1921..488f071ff 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -86,6 +86,9 @@ def which_subdomains(self, x): ValueError : if x is outside of the domain """ + def vertices(self): + """Returns the vertices of the domain.""" + def transform(self, x): "Transform 'x' to the unit hypercube" @@ -95,6 +98,9 @@ def neighbors(self, subdomain, n=1): def subdomains(self): "Return all the subdomains in the domain." + def subpoints(self, subdomain): + "Return all points in the interior of a subdomain." + def clear_subdomains(self): """Remove all points from the interior of subdomains. @@ -253,6 +259,9 @@ def __contains__(self, subdomain): return False return ia + 1 == ib + def vertices(self): + return self.points + def transform(self, x): a, b = self.bounds return (x - a) / (b - a) @@ -273,6 +282,17 @@ def subdomains(self): p = self.points return zip(p, p.islice(1)) + def subpoints(self, subdomain, *, _check_membership=True): + if _check_membership and subdomain not in self: + raise ValueError("{} is not present in this interval".format(subdomain)) + try: + p = self.sub_intervals[subdomain] + except KeyError: + return [] + else: + # subinterval points contain the vertex points + return p[1:-1] + def clear_subdomains(self): subdomains = list(self.sub_intervals.keys()) self.sub_intervals = dict() @@ -512,6 +532,21 @@ def neighbors(self, subdomain, n=1): def subdomains(self): return self.triangulation.simplices + def vertices(self): + return self.triangulation.vertices + + def subpoints(self, subdomain, *, _check_membership=True): + if _check_membership and subdomain not in self: + raise ValueError("{} is not present in this domain".format(subdomain)) + try: + subtri = self.sub_domains[subdomain] + except KeyError: + return [] + else: + # Subtriangulations are, by definition, over simplices. This means + # that the first ndim + 1 points are the simplex vertices, which we skip + return subtri.vertices[self.ndim + 1:] + def clear_subdomains(self): sub_domains = list(self.sub_domains.keys()) self.sub_domains = dict() From bbed8c6e30143d1cd44d46942fae488c09ecdb8a Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Fri, 11 Oct 2019 15:37:10 +0200 Subject: [PATCH 038/105] abstract out choosing points in an interval --- adaptive/learner/new_learnerND.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 488f071ff..da7b2a6a8 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -116,6 +116,13 @@ def subvolumes(self, subdomain): "Return the volumes of the sub-subdomains." +def _choose_point_in_subinterval(a, b): + m = a + (b - a) / 2 + if not a < m < b: + raise ValueError("{} cannot be split further".format(subinterval)) + return m + + class Interval(Domain): """A 1D domain (an interval). @@ -150,7 +157,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): subsubdomains = SortedList(zip(p, p.islice(1)), key=self.volume) for _ in range(n): a, b = subsubdomains.pop() - m = a + (b - a) / 2 + m = _choose_point_in_subinterval(a, b) subsubdomains.update([(a, m), (m, b)]) points.append(m) p.update(points) From 0f59184c72f9f010a63d83aa33aeb38904395bab Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Fri, 11 Oct 2019 15:37:42 +0200 Subject: [PATCH 039/105] catch interface bugs --- adaptive/learner/new_learnerND.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index da7b2a6a8..9611b0e61 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -200,7 +200,7 @@ def remove(self, x, *, _check_membership=True): subdomain = (p[i - 1], p[i]) try: - sub_points = self.sub_domains[subdomain] + sub_points = self.sub_intervals[subdomain] except KeyError: raise ValueError("{} not in any subdomain".format(x)) else: @@ -430,6 +430,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): return [tuple(p) for p in points], affected_subdomains def insert(self, x, *, _check_membership=True): + x = tuple(x) # XXX: O(N) in the number of simplices affected_subdomains = self.which_subdomains(x) if not affected_subdomains: @@ -443,6 +444,7 @@ def insert(self, x, *, _check_membership=True): return affected_subdomains def remove(self, x): + x = tuple(x) # XXX: O(N) in the number of simplices affected_subdomains = self.which_subdomains(x) for subdomain in affected_subdomains: @@ -462,6 +464,7 @@ def remove(self, x): ) def split_at(self, x, *, _check_membership=True): + x = tuple(x) tri = self.triangulation # XXX: O(N) in the number of simplices. As typically 'x' will have been # obtained by 'insert_points' or by calling 'insert_into' we can keep @@ -516,6 +519,7 @@ def split_at(self, x, *, _check_membership=True): return old_subdomains, new_subdomains def which_subdomains(self, x): + x = tuple(x) tri = self.triangulation # XXX: O(N) in the number of simplices subdomains = [s for s in tri.simplices if tri.point_in_simplex(x, s)] From 3d88c2cfe4356737d421c38255f9cb05388f822f Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Fri, 11 Oct 2019 15:38:00 +0200 Subject: [PATCH 040/105] implement '__contains__' for 'ConvexHull' --- adaptive/learner/new_learnerND.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 9611b0e61..0e2c53bbe 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -527,6 +527,9 @@ def which_subdomains(self, x): raise ValueError("{} is not in the domain".format(x)) return subdomains + def __contains__(self, subdomain): + return subdomain in self.triangulation.simplices + def transform(self, x): # XXX: implement this raise NotImplementedError() From c570cd5c99f08fc75a316f3816dd37ba25dd85c2 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Fri, 11 Oct 2019 15:38:20 +0200 Subject: [PATCH 041/105] use subdomain volume rather than constant for the loss before data is added --- adaptive/learner/new_learnerND.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 0e2c53bbe..a828e537b 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -825,8 +825,8 @@ def priority(self, subdomain): self.losses[subdomain] = L_0 else: # Before we have all the boundary points we can't calculate losses because we - # do not have enough data. We just assign a constant loss to each subdomain. - L_0 = 1 + # do not have enough data. We just assign the subdomain volume as the loss. + L_0 = self.domain.volume(subdomain) subvolumes = self.domain.subvolumes(subdomain) return (max(subvolumes) / sum(subvolumes)) * L_0 From 207ac471abe603c3bc5be556300005ce4b1a26bf Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 13:31:54 +0200 Subject: [PATCH 042/105] correct implementation of queue --- adaptive/learner/new_learnerND.py | 43 +++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index a828e537b..f78b4c3a2 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -578,6 +578,10 @@ def subvolumes(self, subdomain): return [subtri.volume(s) for s in subtri.simplices] +class Empty(KeyError): + pass + + class Queue: """Priority queue supporting update and removal at arbitrary position. @@ -590,19 +594,23 @@ class Queue: def __init__(self, entries=()): self._queue = SortedDict( - ((priority, n), item) for n, (item, priority) in enumerate(entries) + ((priority, -n), item) for n, (item, priority) in enumerate(entries) ) # 'self._queue' cannot be keyed only on priority, as there may be several # items that have the same priority. To keep unique elements the key - # will be '(priority, self._n)', where 'self._n' is incremented whenever - # we add a new element. - self._n = len(self._queue) + # will be '(priority, self._n)', where 'self._n' is decremented whenever + # we add a new element. 'self._n' is negative so that elements with equal + # priority are sorted by insertion order. + self._n = -len(self._queue) # To efficiently support updating and removing items if their priority # is unknown we have to keep the reverse map of 'self._queue'. Because # items may not be hashable we cannot use a SortedDict, so we use a # SortedList storing '(item, key)'. self._items = SortedList(((v, k) for k, v in self._queue.items())) + def __len__(self): + return len(self._queue) + def items(self): "Return an iterator over the items in the queue in arbitrary order." return reversed(self._queue.values()) @@ -613,11 +621,13 @@ def priorities(self): def peek(self): "Return the item and priority at the front of the queue." + self._check_nonempty() ((priority, _), item) = self._queue.peekitem() return item, priority def pop(self): "Remove and return the item and priority at the front of the queue." + self._check_nonempty() (key, item) = self._queue.popitem() i = self._items.index((item, key)) del self._items[i] @@ -629,15 +639,26 @@ def insert(self, item, priority): key = (priority, self._n) self._items.add((item, key)) self._queue[key] = item - self._n += 1 + self._n -= 1 - def remove(self, item): - "Remove the 'item' from the queue." + def _check_nonempty(self): + if not self._queue: + raise Empty() + + def _find_first(self, item): + self._check_nonempty() i = self._items.bisect_left((item, ())) - should_be, key = self._items[i] + try: + should_be, key = self._items[i] + except IndexError: + raise KeyError("item is not in queue") if item != should_be: raise KeyError("item is not in queue") + return i, key + def remove(self, item): + "Remove the 'item' from the queue." + i, key = self._find_first(item) del self._queue[key] del self._items[i] @@ -648,11 +669,7 @@ def update(self, item, priority): ------ KeyError : if 'item' is not in the queue. """ - i = self._items.bisect_left((item, ())) - should_be, key = self._items[i] - if item != should_be: - raise KeyError("item is not in queue") - + i, key = self._find_first(item) _, n = key new_key = (priority, n) From f1fdfeebb6ef0c5f5664a2805546614bf87d8ff0 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 13:33:55 +0200 Subject: [PATCH 043/105] black --- adaptive/learner/new_learnerND.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index f78b4c3a2..66a2c08d3 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -559,7 +559,7 @@ def subpoints(self, subdomain, *, _check_membership=True): else: # Subtriangulations are, by definition, over simplices. This means # that the first ndim + 1 points are the simplex vertices, which we skip - return subtri.vertices[self.ndim + 1:] + return subtri.vertices[self.ndim + 1 :] def clear_subdomains(self): sub_domains = list(self.sub_domains.keys()) @@ -838,7 +838,9 @@ def priority(self, subdomain): if subdomain in self.losses: L_0 = self.losses[subdomain] else: - L_0 = self.loss_function(self.domain, subdomain, self.codomain_bounds, self.data) + L_0 = self.loss_function( + self.domain, subdomain, self.codomain_bounds, self.data + ) self.losses[subdomain] = L_0 else: # Before we have all the boundary points we can't calculate losses because we @@ -852,9 +854,9 @@ def ask(self, n, tell_pending=True): if self.n_asked >= len(self.boundary_points): points, losses = self._ask(n, tell_pending) else: - points = self.boundary_points[self.n_asked:self.n_asked + n] + points = self.boundary_points[self.n_asked : self.n_asked + n] # The boundary points should always be evaluated with the highest priority - losses = [float('inf')] * len(points) + losses = [float("inf")] * len(points) if tell_pending: for x in points: self.pending_points.add(x) From bc38a7b0d039ee3c91e2c040ed78889344ab1f7a Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 13:38:27 +0200 Subject: [PATCH 044/105] fix neighbor detection when adding points We now directly check whether an added point lies on a boundary of the subdomain, and then query the neighbors of the boundary. --- adaptive/learner/new_learnerND.py | 141 +++++++++++++++++++++++++----- 1 file changed, 120 insertions(+), 21 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 66a2c08d3..11793b326 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -1,8 +1,10 @@ from math import sqrt import itertools +import functools from collections.abc import Iterable import numpy as np +import scipy.linalg import scipy.spatial import scipy.interpolate from sortedcontainers import SortedList, SortedDict @@ -334,8 +336,6 @@ def _choose_point_in_simplex(simplex, transform=None): ------- point : (n,) array The point that was chosen in the simplex - face : tuple of int - If the chosen point was """ if transform is not None: simplex = np.dot(simplex, transform) @@ -345,18 +345,116 @@ def _choose_point_in_simplex(simplex, transform=None): center, _radius = circumsphere(simplex) if point_in_simplex(center, simplex): point = np.average(simplex, axis=0) - face = () else: distances = scipy.spatial.distance.pdist(simplex) distance_matrix = scipy.spatial.distance.squareform(distances) i, j = np.unravel_index(np.argmax(distance_matrix), distance_matrix.shape) point = (simplex[i, :] + simplex[j, :]) / 2 - face = (i, j) if transform is not None: point = np.linalg.solve(transform, point) # undo the transform - return tuple(point), face + return tuple(point) + + +def _simplex_facets(ndim): + """Return the facets of a simplex in 'ndim' dimensions + + A simplex in 'ndim' dimensions consists of 'ndim + 1' points + [0, ndim + 1) + + Parameters + ---------- + ndim : positive int + + Returns + ------- + facets : Iterable of integer tuples + Contains 'ndim + 1' tuples, and each tuple contains + 'ndim' integers. + """ + return itertools.combinations(range(ndim + 1), ndim) + + +def _boundary_equations(simplex): + """Return the set of equations defining the boundary of a simplex + + This is slower than using scipy.spatial.ConvexHull, however the ordering + of the equations is not clear for that case. + + Care is not taken to orient the facets to point out of the simplex; the + equations should only be used for verifying if a point lies on a boundary, + rather than if it lies inside the simplex. + + >>> simplex = [(0, 0), (1, 0), (0, 1)] + >>> A, b = _boundary_equations(simplex) + >>> x = [0.5, 0] + >>> which_boundary = np.isclose(A @ x + b, 0) + >>> # facet #0 is the line between (0, 0) and (1, 0) + >>> assert which_boundary[0] == True + + Parameters + ---------- + simplex : (N + 1, N) float array-like + The vertices of an N-dimensional simplex. + + Returns + ------- + A : (N + 1, N) float array + Each row is a normal vector to a facet of 'simplex'. + The facets are in the same order as returned by + '_simplex_facets(N)'. + b : (N + 1,) float array + Each element is the offset from the origin of the + corresponding facet of 'simplex' + """ + points = np.asarray(simplex) + ndim = points.shape[1] + assert points.shape == (ndim + 1, ndim) + A = np.empty((ndim + 1, ndim), dtype=float) + b = np.empty((ndim + 1), dtype=float) + for i, (x0, *v) in enumerate(_simplex_facets(ndim)): + facet_tangent_space = points[list(v)] - points[x0] + facet_normal = scipy.linalg.null_space(facet_tangent_space).squeeze() + A[i, :] = facet_normal + b[i] = np.dot(points[x0], facet_normal) + return A, b + + +def _on_which_boundary(equations, x, eps=1e-8): + """Returns the simplex boundary on which 'x' is found. + + >>> simplex = [(0., 0.), (2., 0.), (0., 4.)] + >>> eq = _boundary_equations(simplex) + >>> x = [0.5, 0.] + >>> _on_which_boundary(eq, x) == (0, 1) + >>> assert boundary == (0, 1) + >>> x = [2., 0.] + >>> _on_which_boundary(eq, x) == (1,) + + Parameters + ---------- + equations : the output of _boundary_equations + The equations defining a simplex in 'N' dimensions + x : (N,) float array-like + + Returns + ------- + None if 'x' is not on a simplex boundary. + Otherwise, returns a tuple containing integers defining + the boundary on which 'x' is found. + """ + ndim = len(x) + A, b = equations + assert len(b) == ndim + 1 + on_boundary = np.isclose(A @ x + b, 0, atol=1e-8) + if not any(on_boundary): + return None + # The point is on the boundary of all the following facets + facets = [facet for i, facet in enumerate(_simplex_facets(ndim)) if on_boundary[i]] + # If the point is on the boundary of more than 1 facet, then it is on a lower-dimension facet. + boundary_facet = set.intersection(*map(set, facets)) + return tuple(sorted(boundary_facet)) class ConvexHull(Domain): @@ -387,7 +485,11 @@ def _get_subtriangulation(self, subdomain): try: subtri = self.sub_domains[subdomain] except KeyError: # No points in the interior of this subdomain yet - subtri = Triangulation([self.triangulation.vertices[x] for x in subdomain]) + points = [self.triangulation.vertices[x] for x in subdomain] + subtri = Triangulation(points) + subtri.on_which_boundary = functools.partial( + _on_which_boundary, _boundary_equations(points) + ) self.sub_domains[subdomain] = subtri return subtri @@ -409,23 +511,20 @@ def insert_points(self, subdomain, n, *, _check_membership=True): # O(N) in the number of sub-simplices, but typically we only have a few largest_simplex = max(subtri.simplices, key=subtri.volume) simplex_vertices = np.array([subtri.vertices[s] for s in largest_simplex]) - point, face = _choose_point_in_simplex(simplex_vertices) + point = _choose_point_in_simplex(simplex_vertices) points.append(point) subtri.add_point(point, largest_simplex) - # If we chose a point on a face (or edge) of 'subdomain' then we need to - # add it to the subtriangulations of the neighboring subdomains. - # This check relies on the fact that the first 'ndim + 1' points in the - # subtriangulation are the boundary points. - face = [largest_simplex[i] for i in face] - if face and all(f < self.ndim + 1 for f in face): - # Translate vertex indices from subtriangulation to triangulation - face = [subdomain[f] for f in face] - # Loop over the simplices that contain 'face', skipping 'subdomain', - # which was already added above. - for sd in tri.containing(face): - if sd != subdomain: - self._get_subtriangulation(sd).add_point(point) - affected_subdomains.add(sd) + # If the point was added to a boundary of the subdomain we should + # add it to the neighboring subdomains. + boundary = subtri.on_which_boundary(point) + if boundary is not None: + # Convert subtriangulation indices to triangulation indices + boundary = tuple(sorted(subdomain[i] for i in boundary)) + neighbors = set(tri.containing(boundary)) + neighbors.remove(subdomain) + for sd in neighbors: + self._get_subtriangulation(sd).add_point(point) + affected_subdomains.update(neighbors) return [tuple(p) for p in points], affected_subdomains From b3c394d6d3828f7915b2feaa17d4e978e8c5ab15 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 15:08:52 +0200 Subject: [PATCH 045/105] abstract out making new subtriangulations --- adaptive/learner/new_learnerND.py | 33 +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 11793b326..581d76db6 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -457,6 +457,17 @@ def _on_which_boundary(equations, x, eps=1e-8): return tuple(sorted(boundary_facet)) +def _make_new_subtriangulation(points): + points = np.asarray(points) + ndim = points.shape[1] + boundary_points = points[:ndim + 1] + subtri = Triangulation(points) + subtri.on_which_boundary = functools.partial( + _on_which_boundary, _boundary_equations(boundary_points) + ) + return subtri + + class ConvexHull(Domain): """A convex hull domain in $ℝ^N$ (N >=2). @@ -486,10 +497,7 @@ def _get_subtriangulation(self, subdomain): subtri = self.sub_domains[subdomain] except KeyError: # No points in the interior of this subdomain yet points = [self.triangulation.vertices[x] for x in subdomain] - subtri = Triangulation(points) - subtri.on_which_boundary = functools.partial( - _on_which_boundary, _boundary_equations(points) - ) + subtri = _make_new_subtriangulation(points) self.sub_domains[subdomain] = subtri return subtri @@ -557,10 +565,13 @@ def remove(self, x): else: if x not in subtri.vertices: raise ValueError("{} not present in any subdomain".format(x)) - # Rebuild the subtriangulation from scratch - self.sub_domains[subdomain] = Triangulation( - [v for v in subtri.vertices if v != x] - ) + points = [v for v in subtri.vertices if v != x] + if len(points) == self.ndim + 1: + # No more points inside the subdomain + del self.sub_domains[subdomain] + else: + # Rebuild the subtriangulation from scratch + self.sub_domains[subdomain] = _make_new_subtriangulation(points) def split_at(self, x, *, _check_membership=True): x = tuple(x) @@ -604,11 +615,7 @@ def split_at(self, x, *, _check_membership=True): p_was_added = False for subdomain in new_subdomains: if tri.point_in_simplex(p, subdomain): - try: - subtri = self.sub_domains[subdomain] - except KeyError: # No points in this subdomain yet - subtri = Triangulation([tri.vertices[i] for i in subdomain]) - self.sub_domains[subdomain] = subtri + subtri = self._get_subtriangulation(subdomain) subtri.add_point(p) p_was_added = True assert ( From f38e903cd6bd95f3c69986919b58e0a7c0482522 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 15:47:39 +0200 Subject: [PATCH 046/105] black --- adaptive/learner/new_learnerND.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 581d76db6..b3f349f40 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -460,7 +460,7 @@ def _on_which_boundary(equations, x, eps=1e-8): def _make_new_subtriangulation(points): points = np.asarray(points) ndim = points.shape[1] - boundary_points = points[:ndim + 1] + boundary_points = points[: ndim + 1] subtri = Triangulation(points) subtri.on_which_boundary = functools.partial( _on_which_boundary, _boundary_equations(boundary_points) From 0cff46778e65a2b71d8ea056759007a5fda360f6 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 15:48:46 +0200 Subject: [PATCH 047/105] add map from subpoints to subdomains to speed up triangulation splitting --- adaptive/learner/new_learnerND.py | 81 ++++++++++++++++++------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index b3f349f40..25379a44f 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -1,6 +1,7 @@ from math import sqrt import itertools import functools +from collections import defaultdict from collections.abc import Iterable import numpy as np @@ -487,6 +488,12 @@ def __init__(self, hull): self.sub_domains = dict() self.ndim = self.bounds.points.shape[1] + # As an optimization we store any points inserted with 'insert_points' + # and 'insert' and point to the subdomains to which they belong. This + # allows 'which_subdomains' and 'split_at' to work faster when given points + # that were previously added with 'insert' or 'insert_points' + self.subpoints_to_subdomains = defaultdict(set) + @property def bounding_box(self): hull_points = self.bounds.points[self.bounds.vertices] @@ -522,6 +529,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): point = _choose_point_in_simplex(simplex_vertices) points.append(point) subtri.add_point(point, largest_simplex) + self.subpoints_to_subdomains[point].add(subdomain) # If the point was added to a boundary of the subdomain we should # add it to the neighboring subdomains. boundary = subtri.on_which_boundary(point) @@ -533,6 +541,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): for sd in neighbors: self._get_subtriangulation(sd).add_point(point) affected_subdomains.update(neighbors) + self.subpoints_to_subdomains[point].update(neighbors) return [tuple(p) for p in points], affected_subdomains @@ -547,44 +556,42 @@ def insert(self, x, *, _check_membership=True): if x in subtri.vertices: # O(N) in the number of vertices raise ValueError("{} exists in a subinterval already".format(x)) subtri.add_point(x) + self.subpoints_to_subdomains[x].update(affected_subdomains) return affected_subdomains def remove(self, x): x = tuple(x) - # XXX: O(N) in the number of simplices - affected_subdomains = self.which_subdomains(x) + try: + affected_subdomains = self.subpoints_to_subdomains.pop(x) + except KeyError: + raise ValueError("Can only remove points inside subdomains") for subdomain in affected_subdomains: # Check that it's not a vertex of the subdomain - if any(x == self.triangulation.vertices[i] for i in subdomain): - raise ValueError("Cannot remove subdomain vertices") - try: - subtri = self.sub_domains[subdomain] - except KeyError: - raise ValueError("{} not present in any subdomain".format(x)) + subtri = self.sub_domains[subdomain] + assert x in subtri.vertices + points = [v for v in subtri.vertices if v != x] + if len(points) == self.ndim + 1: + # No more points inside the subdomain + del self.sub_domains[subdomain] else: - if x not in subtri.vertices: - raise ValueError("{} not present in any subdomain".format(x)) - points = [v for v in subtri.vertices if v != x] - if len(points) == self.ndim + 1: - # No more points inside the subdomain - del self.sub_domains[subdomain] - else: - # Rebuild the subtriangulation from scratch - self.sub_domains[subdomain] = _make_new_subtriangulation(points) + # Rebuild the subtriangulation from scratch + self.sub_domains[subdomain] = _make_new_subtriangulation(points) def split_at(self, x, *, _check_membership=True): x = tuple(x) tri = self.triangulation - # XXX: O(N) in the number of simplices. As typically 'x' will have been - # obtained by 'insert_points' or by calling 'insert_into' we can keep - # a hashmap of x→simplex to make this lookup faster and fall back to - # 'locate_point' otherwise - simplex = tri.locate_point(x) - if not simplex: - raise ValueError("Can only split at points within the domain.") + try: + containing_subdomains = self.subpoints_to_subdomains.pop(x) + # Only need a single subdomaing 'x' to make 'tri.add_point' fast. + subdomain = next(iter(containing_subdomains)) + except KeyError: + # XXX: O(N) in the number of simplices. + subdomain = tri.locate_point(x) + if not subdomain: + raise ValueError("Can only split at points within the domain.") - old_subdomains, new_subdomains = tri.add_point(x, simplex) + old_subdomains, new_subdomains = tri.add_point(x, subdomain) if _check_membership: assert not any(s in self.sub_domains for s in new_subdomains) @@ -604,10 +611,12 @@ def split_at(self, x, *, _check_membership=True): # Get all points in the subtriangulation except the boundary # points. Because a subtriangulation is always defined over # a simplex, the first ndim + 1 points are the boundary points. - interior = set(range(self.ndim + 1, len(subtri.vertices))) - interior = [subtri.vertices[i] for i in interior] - # Remove 'x' if it is one of the points - interior = [i for i in interior if i != x] + interior = [v for v in subtri.vertices[self.ndim + 1 :] if v != x] + for v in interior: + s = self.subpoints_to_subdomains[v] + s.remove(d) + if not s: + del self.subpoints_to_subdomains[v] interior_points.update(interior) for p in interior_points: # Try to add 'p' to all the new subdomains. It may belong to more than 1 @@ -617,6 +626,7 @@ def split_at(self, x, *, _check_membership=True): if tri.point_in_simplex(p, subdomain): subtri = self._get_subtriangulation(subdomain) subtri.add_point(p) + self.subpoints_to_subdomains[p].add(subdomain) p_was_added = True assert ( p_was_added @@ -627,11 +637,14 @@ def split_at(self, x, *, _check_membership=True): def which_subdomains(self, x): x = tuple(x) tri = self.triangulation - # XXX: O(N) in the number of simplices - subdomains = [s for s in tri.simplices if tri.point_in_simplex(x, s)] - if not subdomains: - raise ValueError("{} is not in the domain".format(x)) - return subdomains + if x in self.subpoints_to_subdomains: + subdomains = self.subpoints_to_subdomains[x] + else: + # XXX: O(N) in the number of simplices + subdomains = [s for s in tri.simplices if tri.point_in_simplex(x, s)] + if not subdomains: + raise ValueError("{} is not in the domain".format(x)) + return list(subdomains) def __contains__(self, subdomain): return subdomain in self.triangulation.simplices From 5d5402b81ff7ca5d340f884f92114b351cacef59 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 16:29:40 +0200 Subject: [PATCH 048/105] separate Queue and Domain into separate modules --- adaptive/domain.py | 700 ++++++++++++++++++++++++++ adaptive/learner/new_learnerND.py | 796 +----------------------------- adaptive/priority_queue.py | 102 ++++ 3 files changed, 807 insertions(+), 791 deletions(-) create mode 100644 adaptive/domain.py create mode 100644 adaptive/priority_queue.py diff --git a/adaptive/domain.py b/adaptive/domain.py new file mode 100644 index 000000000..1b99027d5 --- /dev/null +++ b/adaptive/domain.py @@ -0,0 +1,700 @@ +import abc +import functools +import itertools +from collections import defaultdict + +import numpy as np +import scipy.linalg +import scipy.spatial +from sortedcontainers import SortedDict, SortedList + +from adaptive.learner.triangulation import ( + Triangulation, + circumsphere, + point_in_simplex, + simplex_volume_in_embedding, +) + +__all__ = ["Domain", "Interval", "ConvexHull"] + + +class Domain(metaclass=abc.ABCMeta): + @abc.abstractmethod + def insert_points(self, subdomain, n): + """Insert 'n' points into 'subdomain'. + + Returns + ------- + affected_subdomains : Iterable of subdomains + If some points were added on the boundary of 'subdomain' + then they will also have been added to the neighboring + subdomains. + """ + + @abc.abstractmethod + def insert(self, x): + """Insert 'x' into any subdomains to which it belongs. + + Returns + ------- + affected_subdomains : Iterable of subdomains + The subdomains to which 'x' was added. + + Raises + ------ + ValueError : if x is outside the domain or exists already + """ + + @abc.abstractmethod + def remove(self, x): + """Remove 'x' from any subdomains to which it belongs. + + Returns + ------- + affected_subdomains : Iterable of subdomains + The subdomains from which 'x' was removed. + + Raises + ------ + ValueError : if x is a subdomain vertex + ValueError : if x is not in any subdomain + """ + + @abc.abstractmethod + def split_at(self, x): + """Split the domain at 'x'. + + Removes and adds subdomains. + + Returns + ------- + old_subdomains : list of subdomains + The subdomains that were removed when splitting at 'x'. + new_subdomains : list of subdomains + The subdomains that were added when splitting at 'x'. + + Raises + ------ + ValueError : if x is outside of the domain or exists already + """ + + @abc.abstractmethod + def which_subdomains(self, x): + """Return the subdomains that contains 'x'. + + Return + ------ + subdomains : Iterable of subdomains + The subdomains to which 'x' belongs. + + Raises + ------ + ValueError : if x is outside of the domain + """ + + @abc.abstractmethod + def vertices(self): + """Returns the vertices of the domain.""" + + @abc.abstractmethod + def neighbors(self, subdomain, n=1): + "Return all neighboring subdomains up to degree 'n'." + + @abc.abstractmethod + def subdomains(self): + "Return all the subdomains in the domain." + + @abc.abstractmethod + def subpoints(self, subdomain): + "Return all points in the interior of a subdomain." + + @abc.abstractmethod + def clear_subdomains(self): + """Remove all points from the interior of subdomains. + + Returns + ------- + subdomains : the subdomains who's interior points were removed + """ + + @abc.abstractmethod + def volume(self, subdomain): + "Return the volume of a subdomain." + + @abc.abstractmethod + def subvolumes(self, subdomain): + "Return the volumes of the sub-subdomains." + + +def _choose_point_in_subinterval(a, b): + m = a + (b - a) / 2 + if not a < m < b: + raise ValueError("{} cannot be split further".format((a, b))) + return m + + +class Interval(Domain): + """A 1D domain (an interval). + + Subdomains are pairs of floats (a, b). + """ + + def __init__(self, a, b): + if a >= b: + raise ValueError("'a' must be less than 'b'") + + # If a sub-interval contains points in its interior, they are stored + # in 'sub_intervals' in a SortedList. + self.bounds = (a, b) + self.sub_intervals = dict() + self.points = SortedList([a, b]) + self.ndim = 1 + + def insert_points(self, subdomain, n, *, _check_membership=True): + if n <= 0: + raise ValueError("n must be positive") + if _check_membership and subdomain not in self: + raise ValueError("{} is not present in this interval".format(subdomain)) + try: + p = self.sub_intervals[subdomain] + except KeyError: # No points yet in the interior of this subdomain + p = SortedList(subdomain) + self.sub_intervals[subdomain] = p + + # Choose new points in the centre of the largest subdomain + # of this subinterval. + points = [] + subsubdomains = SortedList(zip(p, p.islice(1)), key=self.volume) + for _ in range(n): + a, b = subsubdomains.pop() + m = _choose_point_in_subinterval(a, b) + subsubdomains.update([(a, m), (m, b)]) + points.append(m) + p.update(points) + + return points, [subdomain] + + def insert(self, x, *, _check_membership=True): + if _check_membership: + a, b = self.bounds + if not (a <= x <= b): + raise ValueError("{} is outside of this interval".format(x)) + + p = self.points + i = p.bisect_left(x) + if p[i] == x: + raise ValueError("{} exists in this interval already".format(x)) + subdomain = (p[i - 1], p[i]) + + try: + p = self.sub_intervals[subdomain] + except KeyError: + self.sub_intervals[subdomain] = SortedList([a, x, b]) + else: + if x in p: + raise ValueError("{} exists in a subinterval already".format(x)) + p.add(x) + + return [subdomain] + + def remove(self, x, *, _check_membership=True): + if _check_membership: + a, b = self.bounds + if not (a <= x <= b): + raise ValueError("{} is outside of this interval".format(x)) + + p = self.points + i = p.bisect_left(x) + if p[i] == x: + raise ValueError("Cannot remove subdomain vertices") + subdomain = (p[i - 1], p[i]) + + try: + sub_points = self.sub_intervals[subdomain] + except KeyError: + raise ValueError("{} not in any subdomain".format(x)) + else: + sub_points.remove(x) + return [subdomain] + + def split_at(self, x, *, _check_membership=True): + a, b = self.bounds + if _check_membership: + if not (a <= x <= b): + raise ValueError("Can only split at points within the interval") + + p = self.points + i = p.bisect_left(x) + if p[i] == x: + raise ValueError("Cannot split at an existing point") + a, b = old_interval = p[i - 1], p[i] + new_intervals = [(a, x), (x, b)] + + p.add(x) + try: + sub_points = self.sub_intervals.pop(old_interval) + except KeyError: + pass + else: + # Update subintervals + for ival in new_intervals: + new_sub_points = SortedList(sub_points.irange(*ival)) + if x not in new_sub_points: + # This should add 'x' to the start or the end + new_sub_points.add(x) + if len(new_sub_points) > 2: + # We don't store subintervals if they don't contain + # any points in their interior. + self.sub_intervals[ival] = new_sub_points + + return [old_interval], new_intervals + + def which_subdomains(self, x): + a, b = self.bounds + if not (a <= x <= b): + raise ValueError("{} is outside the interval".format(x)) + p = self.points + i = p.bisect_left(x) + if p[i] != x: + # general point inside a subinterval + return [(p[i - 1], p[i])] + else: + # boundary of a subinterval + neighbors = [] + if i > 0: + neighbors.append((p[i - 1], p[i])) + if i < len(p) - 1: + neighbors.append((p[i], p[i + 1])) + return neighbors + + def __contains__(self, subdomain): + a, b = subdomain + try: + ia = self.points.index(a) + ib = self.points.index(b) + except ValueError: + return False + return ia + 1 == ib + + def vertices(self): + return self.points + + def neighbors(self, subdomain, n=1): + a, b = subdomain + p = self.points + ia = p.index(a) + neighbors = [] + for i in range(n): + if ia - i > 0: # left neighbor exists + neighbors.append((p[ia - i - 1], p[ia - i])) + if ia + i < len(p) - 2: # right neighbor exists + neighbors.append((p[ia + i + 1], p[ia + i + 2])) + return neighbors + + def subdomains(self): + p = self.points + return zip(p, p.islice(1)) + + def subpoints(self, subdomain, *, _check_membership=True): + if _check_membership and subdomain not in self: + raise ValueError("{} is not present in this interval".format(subdomain)) + try: + p = self.sub_intervals[subdomain] + except KeyError: + return [] + else: + # subinterval points contain the vertex points + return p[1:-1] + + def clear_subdomains(self): + subdomains = list(self.sub_intervals.keys()) + self.sub_intervals = dict() + return subdomains + + def volume(self, subdomain): + a, b = subdomain + return b - a + + def subvolumes(self, subdomain): + try: + p = self.sub_intervals[subdomain] + except KeyError: + return [self.volume(subdomain)] + else: + return [self.volume(s) for s in zip(p, p.islice(1))] + + +def _choose_point_in_simplex(simplex, transform=None): + """Choose a good point at which to split a simplex. + + Parameters + ---------- + simplex : (n+1, n) array + The simplex vertices + transform : (n, n) array + The linear transform to apply to the simplex vertices + before determining which point to choose. Must be + invertible. + + Returns + ------- + point : (n,) array + The point that was chosen in the simplex + """ + if transform is not None: + simplex = np.dot(simplex, transform) + + # Choose center only if the shape of the simplex is nice, + # otherwise: the center the longest edge + center, _radius = circumsphere(simplex) + if point_in_simplex(center, simplex): + point = np.average(simplex, axis=0) + else: + distances = scipy.spatial.distance.pdist(simplex) + distance_matrix = scipy.spatial.distance.squareform(distances) + i, j = np.unravel_index(np.argmax(distance_matrix), distance_matrix.shape) + point = (simplex[i, :] + simplex[j, :]) / 2 + + if transform is not None: + point = np.linalg.solve(transform, point) # undo the transform + + return tuple(point) + + +def _simplex_facets(ndim): + """Return the facets of a simplex in 'ndim' dimensions + + A simplex in 'ndim' dimensions consists of 'ndim + 1' points + [0, ndim + 1) + + Parameters + ---------- + ndim : positive int + + Returns + ------- + facets : Iterable of integer tuples + Contains 'ndim + 1' tuples, and each tuple contains + 'ndim' integers. + """ + return itertools.combinations(range(ndim + 1), ndim) + + +def _boundary_equations(simplex): + """Return the set of equations defining the boundary of a simplex + + This is slower than using scipy.spatial.ConvexHull, however the ordering + of the equations is not clear for that case. + + Care is not taken to orient the facets to point out of the simplex; the + equations should only be used for verifying if a point lies on a boundary, + rather than if it lies inside the simplex. + + >>> simplex = [(0, 0), (1, 0), (0, 1)] + >>> A, b = _boundary_equations(simplex) + >>> x = [0.5, 0] + >>> which_boundary = np.isclose(A @ x + b, 0) + >>> # facet #0 is the line between (0, 0) and (1, 0) + >>> assert which_boundary[0] == True + + Parameters + ---------- + simplex : (N + 1, N) float array-like + The vertices of an N-dimensional simplex. + + Returns + ------- + A : (N + 1, N) float array + Each row is a normal vector to a facet of 'simplex'. + The facets are in the same order as returned by + '_simplex_facets(N)'. + b : (N + 1,) float array + Each element is the offset from the origin of the + corresponding facet of 'simplex' + """ + points = np.asarray(simplex) + ndim = points.shape[1] + assert points.shape == (ndim + 1, ndim) + A = np.empty((ndim + 1, ndim), dtype=float) + b = np.empty((ndim + 1), dtype=float) + for i, (x0, *v) in enumerate(_simplex_facets(ndim)): + facet_tangent_space = points[list(v)] - points[x0] + facet_normal = scipy.linalg.null_space(facet_tangent_space).squeeze() + A[i, :] = facet_normal + b[i] = np.dot(points[x0], facet_normal) + return A, b + + +def _on_which_boundary(equations, x, eps=1e-8): + """Returns the simplex boundary on which 'x' is found. + + >>> simplex = [(0., 0.), (2., 0.), (0., 4.)] + >>> eq = _boundary_equations(simplex) + >>> x = [0.5, 0.] + >>> _on_which_boundary(eq, x) == (0, 1) + >>> assert boundary == (0, 1) + >>> x = [2., 0.] + >>> _on_which_boundary(eq, x) == (1,) + + Parameters + ---------- + equations : the output of _boundary_equations + The equations defining a simplex in 'N' dimensions + x : (N,) float array-like + + Returns + ------- + None if 'x' is not on a simplex boundary. + Otherwise, returns a tuple containing integers defining + the boundary on which 'x' is found. + """ + ndim = len(x) + A, b = equations + assert len(b) == ndim + 1 + on_boundary = np.isclose(A @ x + b, 0, atol=1e-8) + if not any(on_boundary): + return None + # The point is on the boundary of all the following facets + facets = [facet for i, facet in enumerate(_simplex_facets(ndim)) if on_boundary[i]] + # If the point is on the boundary of more than 1 facet, then it is on a lower-dimension facet. + boundary_facet = set.intersection(*map(set, facets)) + return tuple(sorted(boundary_facet)) + + +def _make_new_subtriangulation(points): + points = np.asarray(points) + ndim = points.shape[1] + boundary_points = points[: ndim + 1] + subtri = Triangulation(points) + subtri.on_which_boundary = functools.partial( + _on_which_boundary, _boundary_equations(boundary_points) + ) + return subtri + + +class ConvexHull(Domain): + """A convex hull domain in $ℝ^N$ (N >=2). + + Subdomains are simplices represented by integer tuples of length (N + 1). + """ + + def __init__(self, points): + hull = scipy.spatial.ConvexHull(points) + + self.bounds = hull + self.triangulation = Triangulation(hull.points[hull.vertices]) + # if a subdomain has interior points, then it appears as a key + # in 'sub_domains' and maps to a 'Triangulation' of the + # interior of the subdomain. By definition the triangulation + # is over a simplex, and the first 'ndim + 1' points in the + # triangulation are the boundary points. + self.sub_domains = dict() + self.ndim = self.bounds.points.shape[1] + + # As an optimization we store any points inserted with 'insert_points' + # and 'insert' and point to the subdomains to which they belong. This + # allows 'which_subdomains' and 'split_at' to work faster when given points + # that were previously added with 'insert' or 'insert_points' + self.subpoints_to_subdomains = defaultdict(set) + + @property + def bounding_box(self): + hull_points = self.bounds.points[self.bounds.vertices] + return tuple(zip(hull_points.min(axis=0), hull_points.max(axis=0))) + + def _get_subtriangulation(self, subdomain): + try: + subtri = self.sub_domains[subdomain] + except KeyError: # No points in the interior of this subdomain yet + points = [self.triangulation.vertices[x] for x in subdomain] + subtri = _make_new_subtriangulation(points) + self.sub_domains[subdomain] = subtri + return subtri + + def insert_points(self, subdomain, n, *, _check_membership=True): + if n <= 0: + raise ValueError("n must be positive") + tri = self.triangulation + if _check_membership and subdomain not in tri.simplices: + raise ValueError("{} is not present in this domain".format(subdomain)) + + subtri = self._get_subtriangulation(subdomain) + + # Choose the largest volume sub-simplex and insert a point into it. + # Also insert the point into neighboring subdomains if it was chosen + # on the subdomain boundary. + points = [] + affected_subdomains = {subdomain} + for _ in range(n): + # O(N) in the number of sub-simplices, but typically we only have a few + largest_simplex = max(subtri.simplices, key=subtri.volume) + simplex_vertices = np.array([subtri.vertices[s] for s in largest_simplex]) + point = _choose_point_in_simplex(simplex_vertices) + points.append(point) + subtri.add_point(point, largest_simplex) + self.subpoints_to_subdomains[point].add(subdomain) + # If the point was added to a boundary of the subdomain we should + # add it to the neighboring subdomains. + boundary = subtri.on_which_boundary(point) + if boundary is not None: + # Convert subtriangulation indices to triangulation indices + boundary = tuple(sorted(subdomain[i] for i in boundary)) + neighbors = set(tri.containing(boundary)) + neighbors.remove(subdomain) + for sd in neighbors: + self._get_subtriangulation(sd).add_point(point) + affected_subdomains.update(neighbors) + self.subpoints_to_subdomains[point].update(neighbors) + + return [tuple(p) for p in points], affected_subdomains + + def insert(self, x, *, _check_membership=True): + x = tuple(x) + # XXX: O(N) in the number of simplices + affected_subdomains = self.which_subdomains(x) + if not affected_subdomains: + raise ValueError("{} is not present in this domain".format(x)) + for subdomain in affected_subdomains: + subtri = self._get_subtriangulation(subdomain) + if x in subtri.vertices: # O(N) in the number of vertices + raise ValueError("{} exists in a subinterval already".format(x)) + subtri.add_point(x) + self.subpoints_to_subdomains[x].update(affected_subdomains) + + return affected_subdomains + + def remove(self, x): + x = tuple(x) + try: + affected_subdomains = self.subpoints_to_subdomains.pop(x) + except KeyError: + raise ValueError("Can only remove points inside subdomains") + for subdomain in affected_subdomains: + # Check that it's not a vertex of the subdomain + subtri = self.sub_domains[subdomain] + assert x in subtri.vertices + points = [v for v in subtri.vertices if v != x] + if len(points) == self.ndim + 1: + # No more points inside the subdomain + del self.sub_domains[subdomain] + else: + # Rebuild the subtriangulation from scratch + self.sub_domains[subdomain] = _make_new_subtriangulation(points) + + def split_at(self, x, *, _check_membership=True): + x = tuple(x) + tri = self.triangulation + try: + containing_subdomains = self.subpoints_to_subdomains.pop(x) + # Only need a single subdomaing 'x' to make 'tri.add_point' fast. + subdomain = next(iter(containing_subdomains)) + except KeyError: + # XXX: O(N) in the number of simplices. + subdomain = tri.locate_point(x) + if not subdomain: + raise ValueError("Can only split at points within the domain.") + + old_subdomains, new_subdomains = tri.add_point(x, subdomain) + + if _check_membership: + assert not any(s in self.sub_domains for s in new_subdomains) + + # Re-assign all the interior points of 'old_subdomains' to 'new_subdomains' + + # Keep the interior points as a set, because interior points on a shared face + # appear in the subtriangulations of both the neighboring simplices, and we + # don't want those points to appear twice. + interior_points = set() + for d in old_subdomains: + try: + subtri = self.sub_domains.pop(d) + except KeyError: + continue + else: + # Get all points in the subtriangulation except the boundary + # points. Because a subtriangulation is always defined over + # a simplex, the first ndim + 1 points are the boundary points. + interior = [v for v in subtri.vertices[self.ndim + 1 :] if v != x] + for v in interior: + s = self.subpoints_to_subdomains[v] + s.remove(d) + if not s: + del self.subpoints_to_subdomains[v] + interior_points.update(interior) + for p in interior_points: + # Try to add 'p' to all the new subdomains. It may belong to more than 1 + # if it lies on a subdomain boundary. + p_was_added = False + for subdomain in new_subdomains: + if tri.point_in_simplex(p, subdomain): + subtri = self._get_subtriangulation(subdomain) + subtri.add_point(p) + self.subpoints_to_subdomains[p].add(subdomain) + p_was_added = True + assert ( + p_was_added + ), "{} was not in the interior of any new simplices".format(x) + + return old_subdomains, new_subdomains + + def which_subdomains(self, x): + x = tuple(x) + tri = self.triangulation + if x in self.subpoints_to_subdomains: + subdomains = self.subpoints_to_subdomains[x] + else: + # XXX: O(N) in the number of simplices + subdomains = [s for s in tri.simplices if tri.point_in_simplex(x, s)] + if not subdomains: + raise ValueError("{} is not in the domain".format(x)) + return list(subdomains) + + def __contains__(self, subdomain): + return subdomain in self.triangulation.simplices + + def transform(self, x): + # XXX: implement this + raise NotImplementedError() + + def neighbors(self, subdomain, n=1): + tri = self.triangulation + neighbors = {subdomain} + for _ in range(n): + for face in list(tri.faces(simplices=neighbors)): + neighbors.update(tri.containing(face)) + neighbors.remove(subdomain) + return neighbors + + def subdomains(self): + return self.triangulation.simplices + + def vertices(self): + return self.triangulation.vertices + + def subpoints(self, subdomain, *, _check_membership=True): + if _check_membership and subdomain not in self: + raise ValueError("{} is not present in this domain".format(subdomain)) + try: + subtri = self.sub_domains[subdomain] + except KeyError: + return [] + else: + # Subtriangulations are, by definition, over simplices. This means + # that the first ndim + 1 points are the simplex vertices, which we skip + return subtri.vertices[self.ndim + 1 :] + + def clear_subdomains(self): + sub_domains = list(self.sub_domains.keys()) + self.sub_domains = dict() + return sub_domains + + def volume(self, subdomain): + return self.triangulation.volume(subdomain) + + def subvolumes(self, subdomain): + try: + subtri = self.sub_domains[subdomain] + except KeyError: + return [self.triangulation.volume(subdomain)] + else: + return [subtri.volume(s) for s in subtri.simplices] diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 25379a44f..98c2cccab 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -1,801 +1,15 @@ -from math import sqrt import itertools -import functools -from collections import defaultdict from collections.abc import Iterable +from math import sqrt import numpy as np -import scipy.linalg -import scipy.spatial import scipy.interpolate -from sortedcontainers import SortedList, SortedDict from adaptive.learner.base_learner import BaseLearner -from adaptive.learner.triangulation import ( - Triangulation, - simplex_volume_in_embedding, - circumsphere, - point_in_simplex, -) +from adaptive.learner.triangulation import simplex_volume_in_embedding from adaptive.notebook_integration import ensure_holoviews - - -class Domain: - def insert_points(self, subdomain, n): - """Insert 'n' points into 'subdomain'. - - Returns - ------- - affected_subdomains : Iterable of subdomains - If some points were added on the boundary of 'subdomain' - then they will also have been added to the neighboring - subdomains. - """ - - def insert(self, x): - """Insert 'x' into any subdomains to which it belongs. - - Returns - ------- - affected_subdomains : Iterable of subdomains - The subdomains to which 'x' was added. - - Raises - ------ - ValueError : if x is outside the domain or exists already - """ - - def remove(self, x): - """Remove 'x' from any subdomains to which it belongs. - - Returns - ------- - affected_subdomains : Iterable of subdomains - The subdomains from which 'x' was removed. - - Raises - ------ - ValueError : if x is a subdomain vertex - ValueError : if x is not in any subdomain - """ - - def split_at(self, x): - """Split the domain at 'x'. - - Removes and adds subdomains. - - Returns - ------- - old_subdomains : list of subdomains - The subdomains that were removed when splitting at 'x'. - new_subdomains : list of subdomains - The subdomains that were added when splitting at 'x'. - - Raises - ------ - ValueError : if x is outside of the domain or exists already - """ - - def which_subdomains(self, x): - """Return the subdomains that contains 'x'. - - Return - ------ - subdomains : Iterable of subdomains - The subdomains to which 'x' belongs. - - Raises - ------ - ValueError : if x is outside of the domain - """ - - def vertices(self): - """Returns the vertices of the domain.""" - - def transform(self, x): - "Transform 'x' to the unit hypercube" - - def neighbors(self, subdomain, n=1): - "Return all neighboring subdomains up to degree 'n'." - - def subdomains(self): - "Return all the subdomains in the domain." - - def subpoints(self, subdomain): - "Return all points in the interior of a subdomain." - - def clear_subdomains(self): - """Remove all points from the interior of subdomains. - - Returns - ------- - subdomains : the subdomains who's interior points were removed - """ - - def volume(self, subdomain): - "Return the volume of a subdomain." - - def subvolumes(self, subdomain): - "Return the volumes of the sub-subdomains." - - -def _choose_point_in_subinterval(a, b): - m = a + (b - a) / 2 - if not a < m < b: - raise ValueError("{} cannot be split further".format(subinterval)) - return m - - -class Interval(Domain): - """A 1D domain (an interval). - - Subdomains are pairs of floats (a, b). - """ - - def __init__(self, a, b): - if a >= b: - raise ValueError("'a' must be less than 'b'") - - # If a sub-interval contains points in its interior, they are stored - # in 'sub_intervals' in a SortedList. - self.bounds = (a, b) - self.sub_intervals = dict() - self.points = SortedList([a, b]) - self.ndim = 1 - - def insert_points(self, subdomain, n, *, _check_membership=True): - if n <= 0: - raise ValueError("n must be positive") - if _check_membership and subdomain not in self: - raise ValueError("{} is not present in this interval".format(subdomain)) - try: - p = self.sub_intervals[subdomain] - except KeyError: # No points yet in the interior of this subdomain - p = SortedList(subdomain) - self.sub_intervals[subdomain] = p - - # Choose new points in the centre of the largest subdomain - # of this subinterval. - points = [] - subsubdomains = SortedList(zip(p, p.islice(1)), key=self.volume) - for _ in range(n): - a, b = subsubdomains.pop() - m = _choose_point_in_subinterval(a, b) - subsubdomains.update([(a, m), (m, b)]) - points.append(m) - p.update(points) - - return points, [subdomain] - - def insert(self, x, *, _check_membership=True): - if _check_membership: - a, b = self.bounds - if not (a <= x <= b): - raise ValueError("{} is outside of this interval".format(x)) - - p = self.points - i = p.bisect_left(x) - if p[i] == x: - raise ValueError("{} exists in this interval already".format(x)) - subdomain = (p[i - 1], p[i]) - - try: - p = self.sub_intervals[subdomain] - except KeyError: - self.sub_intervals[subdomain] = SortedList([a, x, b]) - else: - if x in p: - raise ValueError("{} exists in a subinterval already".format(x)) - p.add(x) - - return [subdomain] - - def remove(self, x, *, _check_membership=True): - if _check_membership: - a, b = self.bounds - if not (a <= x <= b): - raise ValueError("{} is outside of this interval".format(x)) - - p = self.points - i = p.bisect_left(x) - if p[i] == x: - raise ValueError("Cannot remove subdomain vertices") - subdomain = (p[i - 1], p[i]) - - try: - sub_points = self.sub_intervals[subdomain] - except KeyError: - raise ValueError("{} not in any subdomain".format(x)) - else: - sub_points.remove(x) - return [subdomain] - - def split_at(self, x, *, _check_membership=True): - a, b = self.bounds - if _check_membership: - if not (a <= x <= b): - raise ValueError("Can only split at points within the interval") - - p = self.points - i = p.bisect_left(x) - if p[i] == x: - raise ValueError("Cannot split at an existing point") - a, b = old_interval = p[i - 1], p[i] - new_intervals = [(a, x), (x, b)] - - p.add(x) - try: - sub_points = self.sub_intervals.pop(old_interval) - except KeyError: - pass - else: - # Update subintervals - for ival in new_intervals: - new_sub_points = SortedList(sub_points.irange(*ival)) - if x not in new_sub_points: - # This should add 'x' to the start or the end - new_sub_points.add(x) - if len(new_sub_points) > 2: - # We don't store subintervals if they don't contain - # any points in their interior. - self.sub_intervals[ival] = new_sub_points - - return [old_interval], new_intervals - - def which_subdomains(self, x): - a, b = self.bounds - if not (a <= x <= b): - raise ValueError("{} is outside the interval".format(x)) - p = self.points - i = p.bisect_left(x) - if p[i] != x: - # general point inside a subinterval - return [(p[i - 1], p[i])] - else: - # boundary of a subinterval - neighbors = [] - if i > 0: - neighbors.append((p[i - 1], p[i])) - if i < len(p) - 1: - neighbors.append((p[i], p[i + 1])) - return neighbors - - def __contains__(self, subdomain): - a, b = subdomain - try: - ia = self.points.index(a) - ib = self.points.index(b) - except ValueError: - return False - return ia + 1 == ib - - def vertices(self): - return self.points - - def transform(self, x): - a, b = self.bounds - return (x - a) / (b - a) - - def neighbors(self, subdomain, n=1): - a, b = subdomain - p = self.points - ia = p.index(a) - neighbors = [] - for i in range(n): - if ia - i > 0: # left neighbor exists - neighbors.append((p[ia - i - 1], p[ia - i])) - if ia + i < len(p) - 2: # right neighbor exists - neighbors.append((p[ia + i + 1], p[ia + i + 2])) - return neighbors - - def subdomains(self): - p = self.points - return zip(p, p.islice(1)) - - def subpoints(self, subdomain, *, _check_membership=True): - if _check_membership and subdomain not in self: - raise ValueError("{} is not present in this interval".format(subdomain)) - try: - p = self.sub_intervals[subdomain] - except KeyError: - return [] - else: - # subinterval points contain the vertex points - return p[1:-1] - - def clear_subdomains(self): - subdomains = list(self.sub_intervals.keys()) - self.sub_intervals = dict() - return subdomains - - def volume(self, subdomain): - a, b = subdomain - return b - a - - def subvolumes(self, subdomain): - try: - p = self.sub_intervals[subdomain] - except KeyError: - return [self.volume(subdomain)] - else: - return [self.volume(s) for s in zip(p, p.islice(1))] - - -def _choose_point_in_simplex(simplex, transform=None): - """Choose a good point at which to split a simplex. - - Parameters - ---------- - simplex : (n+1, n) array - The simplex vertices - transform : (n, n) array - The linear transform to apply to the simplex vertices - before determining which point to choose. Must be - invertible. - - Returns - ------- - point : (n,) array - The point that was chosen in the simplex - """ - if transform is not None: - simplex = np.dot(simplex, transform) - - # Choose center only if the shape of the simplex is nice, - # otherwise: the center the longest edge - center, _radius = circumsphere(simplex) - if point_in_simplex(center, simplex): - point = np.average(simplex, axis=0) - else: - distances = scipy.spatial.distance.pdist(simplex) - distance_matrix = scipy.spatial.distance.squareform(distances) - i, j = np.unravel_index(np.argmax(distance_matrix), distance_matrix.shape) - point = (simplex[i, :] + simplex[j, :]) / 2 - - if transform is not None: - point = np.linalg.solve(transform, point) # undo the transform - - return tuple(point) - - -def _simplex_facets(ndim): - """Return the facets of a simplex in 'ndim' dimensions - - A simplex in 'ndim' dimensions consists of 'ndim + 1' points - [0, ndim + 1) - - Parameters - ---------- - ndim : positive int - - Returns - ------- - facets : Iterable of integer tuples - Contains 'ndim + 1' tuples, and each tuple contains - 'ndim' integers. - """ - return itertools.combinations(range(ndim + 1), ndim) - - -def _boundary_equations(simplex): - """Return the set of equations defining the boundary of a simplex - - This is slower than using scipy.spatial.ConvexHull, however the ordering - of the equations is not clear for that case. - - Care is not taken to orient the facets to point out of the simplex; the - equations should only be used for verifying if a point lies on a boundary, - rather than if it lies inside the simplex. - - >>> simplex = [(0, 0), (1, 0), (0, 1)] - >>> A, b = _boundary_equations(simplex) - >>> x = [0.5, 0] - >>> which_boundary = np.isclose(A @ x + b, 0) - >>> # facet #0 is the line between (0, 0) and (1, 0) - >>> assert which_boundary[0] == True - - Parameters - ---------- - simplex : (N + 1, N) float array-like - The vertices of an N-dimensional simplex. - - Returns - ------- - A : (N + 1, N) float array - Each row is a normal vector to a facet of 'simplex'. - The facets are in the same order as returned by - '_simplex_facets(N)'. - b : (N + 1,) float array - Each element is the offset from the origin of the - corresponding facet of 'simplex' - """ - points = np.asarray(simplex) - ndim = points.shape[1] - assert points.shape == (ndim + 1, ndim) - A = np.empty((ndim + 1, ndim), dtype=float) - b = np.empty((ndim + 1), dtype=float) - for i, (x0, *v) in enumerate(_simplex_facets(ndim)): - facet_tangent_space = points[list(v)] - points[x0] - facet_normal = scipy.linalg.null_space(facet_tangent_space).squeeze() - A[i, :] = facet_normal - b[i] = np.dot(points[x0], facet_normal) - return A, b - - -def _on_which_boundary(equations, x, eps=1e-8): - """Returns the simplex boundary on which 'x' is found. - - >>> simplex = [(0., 0.), (2., 0.), (0., 4.)] - >>> eq = _boundary_equations(simplex) - >>> x = [0.5, 0.] - >>> _on_which_boundary(eq, x) == (0, 1) - >>> assert boundary == (0, 1) - >>> x = [2., 0.] - >>> _on_which_boundary(eq, x) == (1,) - - Parameters - ---------- - equations : the output of _boundary_equations - The equations defining a simplex in 'N' dimensions - x : (N,) float array-like - - Returns - ------- - None if 'x' is not on a simplex boundary. - Otherwise, returns a tuple containing integers defining - the boundary on which 'x' is found. - """ - ndim = len(x) - A, b = equations - assert len(b) == ndim + 1 - on_boundary = np.isclose(A @ x + b, 0, atol=1e-8) - if not any(on_boundary): - return None - # The point is on the boundary of all the following facets - facets = [facet for i, facet in enumerate(_simplex_facets(ndim)) if on_boundary[i]] - # If the point is on the boundary of more than 1 facet, then it is on a lower-dimension facet. - boundary_facet = set.intersection(*map(set, facets)) - return tuple(sorted(boundary_facet)) - - -def _make_new_subtriangulation(points): - points = np.asarray(points) - ndim = points.shape[1] - boundary_points = points[: ndim + 1] - subtri = Triangulation(points) - subtri.on_which_boundary = functools.partial( - _on_which_boundary, _boundary_equations(boundary_points) - ) - return subtri - - -class ConvexHull(Domain): - """A convex hull domain in $ℝ^N$ (N >=2). - - Subdomains are simplices represented by integer tuples of length (N + 1). - """ - - def __init__(self, hull): - assert isinstance(hull, scipy.spatial.ConvexHull) - - self.bounds = hull - self.triangulation = Triangulation(hull.points[hull.vertices]) - # if a subdomain has interior points, then it appears as a key - # in 'sub_domains' and maps to a 'Triangulation' of the - # interior of the subdomain. By definition the triangulation - # is over a simplex, and the first 'ndim + 1' points in the - # triangulation are the boundary points. - self.sub_domains = dict() - self.ndim = self.bounds.points.shape[1] - - # As an optimization we store any points inserted with 'insert_points' - # and 'insert' and point to the subdomains to which they belong. This - # allows 'which_subdomains' and 'split_at' to work faster when given points - # that were previously added with 'insert' or 'insert_points' - self.subpoints_to_subdomains = defaultdict(set) - - @property - def bounding_box(self): - hull_points = self.bounds.points[self.bounds.vertices] - return tuple(zip(hull_points.min(axis=0), hull_points.max(axis=0))) - - def _get_subtriangulation(self, subdomain): - try: - subtri = self.sub_domains[subdomain] - except KeyError: # No points in the interior of this subdomain yet - points = [self.triangulation.vertices[x] for x in subdomain] - subtri = _make_new_subtriangulation(points) - self.sub_domains[subdomain] = subtri - return subtri - - def insert_points(self, subdomain, n, *, _check_membership=True): - if n <= 0: - raise ValueError("n must be positive") - tri = self.triangulation - if _check_membership and subdomain not in tri.simplices: - raise ValueError("{} is not present in this domain".format(subdomain)) - - subtri = self._get_subtriangulation(subdomain) - - # Choose the largest volume sub-simplex and insert a point into it. - # Also insert the point into neighboring subdomains if it was chosen - # on the subdomain boundary. - points = [] - affected_subdomains = {subdomain} - for _ in range(n): - # O(N) in the number of sub-simplices, but typically we only have a few - largest_simplex = max(subtri.simplices, key=subtri.volume) - simplex_vertices = np.array([subtri.vertices[s] for s in largest_simplex]) - point = _choose_point_in_simplex(simplex_vertices) - points.append(point) - subtri.add_point(point, largest_simplex) - self.subpoints_to_subdomains[point].add(subdomain) - # If the point was added to a boundary of the subdomain we should - # add it to the neighboring subdomains. - boundary = subtri.on_which_boundary(point) - if boundary is not None: - # Convert subtriangulation indices to triangulation indices - boundary = tuple(sorted(subdomain[i] for i in boundary)) - neighbors = set(tri.containing(boundary)) - neighbors.remove(subdomain) - for sd in neighbors: - self._get_subtriangulation(sd).add_point(point) - affected_subdomains.update(neighbors) - self.subpoints_to_subdomains[point].update(neighbors) - - return [tuple(p) for p in points], affected_subdomains - - def insert(self, x, *, _check_membership=True): - x = tuple(x) - # XXX: O(N) in the number of simplices - affected_subdomains = self.which_subdomains(x) - if not affected_subdomains: - raise ValueError("{} is not present in this domain".format(x)) - for subdomain in affected_subdomains: - subtri = self._get_subtriangulation(subdomain) - if x in subtri.vertices: # O(N) in the number of vertices - raise ValueError("{} exists in a subinterval already".format(x)) - subtri.add_point(x) - self.subpoints_to_subdomains[x].update(affected_subdomains) - - return affected_subdomains - - def remove(self, x): - x = tuple(x) - try: - affected_subdomains = self.subpoints_to_subdomains.pop(x) - except KeyError: - raise ValueError("Can only remove points inside subdomains") - for subdomain in affected_subdomains: - # Check that it's not a vertex of the subdomain - subtri = self.sub_domains[subdomain] - assert x in subtri.vertices - points = [v for v in subtri.vertices if v != x] - if len(points) == self.ndim + 1: - # No more points inside the subdomain - del self.sub_domains[subdomain] - else: - # Rebuild the subtriangulation from scratch - self.sub_domains[subdomain] = _make_new_subtriangulation(points) - - def split_at(self, x, *, _check_membership=True): - x = tuple(x) - tri = self.triangulation - try: - containing_subdomains = self.subpoints_to_subdomains.pop(x) - # Only need a single subdomaing 'x' to make 'tri.add_point' fast. - subdomain = next(iter(containing_subdomains)) - except KeyError: - # XXX: O(N) in the number of simplices. - subdomain = tri.locate_point(x) - if not subdomain: - raise ValueError("Can only split at points within the domain.") - - old_subdomains, new_subdomains = tri.add_point(x, subdomain) - - if _check_membership: - assert not any(s in self.sub_domains for s in new_subdomains) - - # Re-assign all the interior points of 'old_subdomains' to 'new_subdomains' - - # Keep the interior points as a set, because interior points on a shared face - # appear in the subtriangulations of both the neighboring simplices, and we - # don't want those points to appear twice. - interior_points = set() - for d in old_subdomains: - try: - subtri = self.sub_domains.pop(d) - except KeyError: - continue - else: - # Get all points in the subtriangulation except the boundary - # points. Because a subtriangulation is always defined over - # a simplex, the first ndim + 1 points are the boundary points. - interior = [v for v in subtri.vertices[self.ndim + 1 :] if v != x] - for v in interior: - s = self.subpoints_to_subdomains[v] - s.remove(d) - if not s: - del self.subpoints_to_subdomains[v] - interior_points.update(interior) - for p in interior_points: - # Try to add 'p' to all the new subdomains. It may belong to more than 1 - # if it lies on a subdomain boundary. - p_was_added = False - for subdomain in new_subdomains: - if tri.point_in_simplex(p, subdomain): - subtri = self._get_subtriangulation(subdomain) - subtri.add_point(p) - self.subpoints_to_subdomains[p].add(subdomain) - p_was_added = True - assert ( - p_was_added - ), "{} was not in the interior of any new simplices".format(x) - - return old_subdomains, new_subdomains - - def which_subdomains(self, x): - x = tuple(x) - tri = self.triangulation - if x in self.subpoints_to_subdomains: - subdomains = self.subpoints_to_subdomains[x] - else: - # XXX: O(N) in the number of simplices - subdomains = [s for s in tri.simplices if tri.point_in_simplex(x, s)] - if not subdomains: - raise ValueError("{} is not in the domain".format(x)) - return list(subdomains) - - def __contains__(self, subdomain): - return subdomain in self.triangulation.simplices - - def transform(self, x): - # XXX: implement this - raise NotImplementedError() - - def neighbors(self, subdomain, n=1): - tri = self.triangulation - neighbors = {subdomain} - for _ in range(n): - for face in list(tri.faces(simplices=neighbors)): - neighbors.update(tri.containing(face)) - neighbors.remove(subdomain) - return neighbors - - def subdomains(self): - return self.triangulation.simplices - - def vertices(self): - return self.triangulation.vertices - - def subpoints(self, subdomain, *, _check_membership=True): - if _check_membership and subdomain not in self: - raise ValueError("{} is not present in this domain".format(subdomain)) - try: - subtri = self.sub_domains[subdomain] - except KeyError: - return [] - else: - # Subtriangulations are, by definition, over simplices. This means - # that the first ndim + 1 points are the simplex vertices, which we skip - return subtri.vertices[self.ndim + 1 :] - - def clear_subdomains(self): - sub_domains = list(self.sub_domains.keys()) - self.sub_domains = dict() - return sub_domains - - def volume(self, subdomain): - return self.triangulation.volume(subdomain) - - def subvolumes(self, subdomain): - try: - subtri = self.sub_domains[subdomain] - except KeyError: - return [self.triangulation.volume(subdomain)] - else: - return [subtri.volume(s) for s in subtri.simplices] - - -class Empty(KeyError): - pass - - -class Queue: - """Priority queue supporting update and removal at arbitrary position. - - Parameters - ---------- - entries : iterable of (item, priority) - The initial data in the queue. Providing this is faster than - calling 'insert' a bunch of times. - """ - - def __init__(self, entries=()): - self._queue = SortedDict( - ((priority, -n), item) for n, (item, priority) in enumerate(entries) - ) - # 'self._queue' cannot be keyed only on priority, as there may be several - # items that have the same priority. To keep unique elements the key - # will be '(priority, self._n)', where 'self._n' is decremented whenever - # we add a new element. 'self._n' is negative so that elements with equal - # priority are sorted by insertion order. - self._n = -len(self._queue) - # To efficiently support updating and removing items if their priority - # is unknown we have to keep the reverse map of 'self._queue'. Because - # items may not be hashable we cannot use a SortedDict, so we use a - # SortedList storing '(item, key)'. - self._items = SortedList(((v, k) for k, v in self._queue.items())) - - def __len__(self): - return len(self._queue) - - def items(self): - "Return an iterator over the items in the queue in arbitrary order." - return reversed(self._queue.values()) - - def priorities(self): - "Return an iterator over the priorities in the queue in arbitrary order." - return reversed(self._queue) - - def peek(self): - "Return the item and priority at the front of the queue." - self._check_nonempty() - ((priority, _), item) = self._queue.peekitem() - return item, priority - - def pop(self): - "Remove and return the item and priority at the front of the queue." - self._check_nonempty() - (key, item) = self._queue.popitem() - i = self._items.index((item, key)) - del self._items[i] - priority, _ = key - return item, priority - - def insert(self, item, priority): - "Insert 'item' into the queue with the given priority." - key = (priority, self._n) - self._items.add((item, key)) - self._queue[key] = item - self._n -= 1 - - def _check_nonempty(self): - if not self._queue: - raise Empty() - - def _find_first(self, item): - self._check_nonempty() - i = self._items.bisect_left((item, ())) - try: - should_be, key = self._items[i] - except IndexError: - raise KeyError("item is not in queue") - if item != should_be: - raise KeyError("item is not in queue") - return i, key - - def remove(self, item): - "Remove the 'item' from the queue." - i, key = self._find_first(item) - del self._queue[key] - del self._items[i] - - def update(self, item, priority): - """Update 'item' in the queue to have the given priority. - - Raises - ------ - KeyError : if 'item' is not in the queue. - """ - i, key = self._find_first(item) - _, n = key - new_key = (priority, n) - - del self._queue[key] - self._queue[new_key] = item - del self._items[i] - self._items.add((item, new_key)) +from adaptive.priority_queue import Queue +from adaptive.domain import Interval, ConvexHull class LossFunction: @@ -898,7 +112,7 @@ def __init__(self, f, bounds, loss=None): self.ndim = 1 else: boundary_points = sorted(tuple(p) for p in itertools.product(*bounds)) - self.domain = ConvexHull(scipy.spatial.ConvexHull(boundary_points)) + self.domain = ConvexHull(boundary_points) self.loss_function = loss or EmbeddedVolumeLoss() self.ndim = len(boundary_points[0]) diff --git a/adaptive/priority_queue.py b/adaptive/priority_queue.py new file mode 100644 index 000000000..ad65cc515 --- /dev/null +++ b/adaptive/priority_queue.py @@ -0,0 +1,102 @@ +from sortedcontainers import SortedDict, SortedList + + +class Empty(KeyError): + pass + + +class Queue: + """Priority queue supporting update and removal at arbitrary position. + + Parameters + ---------- + entries : iterable of (item, priority) + The initial data in the queue. Providing this is faster than + calling 'insert' a bunch of times. + """ + + def __init__(self, entries=()): + self._queue = SortedDict( + ((priority, -n), item) for n, (item, priority) in enumerate(entries) + ) + # 'self._queue' cannot be keyed only on priority, as there may be several + # items that have the same priority. To keep unique elements the key + # will be '(priority, self._n)', where 'self._n' is decremented whenever + # we add a new element. 'self._n' is negative so that elements with equal + # priority are sorted by insertion order. + self._n = -len(self._queue) + # To efficiently support updating and removing items if their priority + # is unknown we have to keep the reverse map of 'self._queue'. Because + # items may not be hashable we cannot use a SortedDict, so we use a + # SortedList storing '(item, key)'. + self._items = SortedList(((v, k) for k, v in self._queue.items())) + + def __len__(self): + return len(self._queue) + + def items(self): + "Return an iterator over the items in the queue in arbitrary order." + return reversed(self._queue.values()) + + def priorities(self): + "Return an iterator over the priorities in the queue in arbitrary order." + return reversed(self._queue) + + def peek(self): + "Return the item and priority at the front of the queue." + self._check_nonempty() + ((priority, _), item) = self._queue.peekitem() + return item, priority + + def pop(self): + "Remove and return the item and priority at the front of the queue." + self._check_nonempty() + (key, item) = self._queue.popitem() + i = self._items.index((item, key)) + del self._items[i] + priority, _ = key + return item, priority + + def insert(self, item, priority): + "Insert 'item' into the queue with the given priority." + key = (priority, self._n) + self._items.add((item, key)) + self._queue[key] = item + self._n -= 1 + + def _check_nonempty(self): + if not self._queue: + raise Empty() + + def _find_first(self, item): + self._check_nonempty() + i = self._items.bisect_left((item, ())) + try: + should_be, key = self._items[i] + except IndexError: + raise KeyError("item is not in queue") + if item != should_be: + raise KeyError("item is not in queue") + return i, key + + def remove(self, item): + "Remove the 'item' from the queue." + i, key = self._find_first(item) + del self._queue[key] + del self._items[i] + + def update(self, item, priority): + """Update 'item' in the queue to have the given priority. + + Raises + ------ + KeyError : if 'item' is not in the queue. + """ + i, key = self._find_first(item) + _, n = key + new_key = (priority, n) + + del self._queue[key] + self._queue[new_key] = item + del self._items[i] + self._items.add((item, new_key)) From 95c0381dbea4a18c8ca2a98dde84cbd00184a202 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 16:32:17 +0200 Subject: [PATCH 049/105] add tests for priority queue and domains --- adaptive/tests/domain_utils.py | 160 +++++++++++++++++++++ adaptive/tests/test_domain.py | 195 ++++++++++++++++++++++++++ adaptive/tests/test_priority_queue.py | 82 +++++++++++ 3 files changed, 437 insertions(+) create mode 100644 adaptive/tests/domain_utils.py create mode 100644 adaptive/tests/test_domain.py create mode 100644 adaptive/tests/test_priority_queue.py diff --git a/adaptive/tests/domain_utils.py b/adaptive/tests/domain_utils.py new file mode 100644 index 000000000..5570b71aa --- /dev/null +++ b/adaptive/tests/domain_utils.py @@ -0,0 +1,160 @@ +import itertools + +import numpy as np +import scipy.linalg +import scipy.spatial + +import hypothesis.strategies as st +from adaptive.learner.new_learnerND import ConvexHull, Interval +from adaptive.learner.triangulation import point_in_simplex +from hypothesis.extra import numpy as hynp + + +def reflections(ndim): + return map(np.diag, itertools.product([1, -1], repeat=ndim)) + + +reals = st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False) +positive_reals = st.floats(min_value=1E-3, max_value=100, allow_nan=False, allow_infinity=False) + + +@st.composite +def point(draw, ndim): + return draw(reals if ndim == 1 else st.tuples(*[reals] * ndim)) + + +def unique_vectors(xs): + xs = np.asarray(xs) + if len(xs.shape) == 1: + xs = xs[:, None] + c = np.max(np.linalg.norm(xs, axis=1)) + if c == 0: + return False + d = scipy.spatial.distance_matrix(xs, xs) + d = np.extract(1 - np.identity(d.shape[0]), d) + return not np.any(d < 1E-3 / c) + + +@st.composite +def point_inside_simplex(draw, simplex): + simplex = np.asarray(simplex) + dim = simplex.shape[1] + # Set the numpy random seed + draw(st.random_module()) + # Generate a point in the unit simplex. + # https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex + # We avoid using Hypothesis to generate the points as it typically chooses + # very annoying points, which we want to avoid testing for now. + xb = np.random.rand(dim) + xb = np.array(sorted(xb)) + xb[1:] = xb[1:] - xb[:-1] + # Transform into the simplex we need + v0, vecs = simplex[0], simplex[1:] - simplex[0] + x = tuple(v0 + (vecs.T @ xb)) + return x + + +@st.composite +def points_inside(draw, domain, n): + kwargs = dict(allow_nan=False, allow_infinity=False, exclude_min=True, exclude_max=True) + if isinstance(domain, Interval): + a, b = domain.bounds + eps = (b - a) * 1E-2 + x = st.floats(min_value=(a + eps), max_value=(b - eps), **kwargs) + else: + assert isinstance(domain, ConvexHull) + tri = domain.triangulation + simplices = list(tri.simplices) + simplex = draw(st.sampled_from(simplices)) + vertices = [tri.vertices[s] for s in simplex] + x = point_inside_simplex(vertices) + + xs = st.tuples(*[x] * n).filter(unique_vectors) + return draw(xs) + + +@st.composite +def point_inside(draw, domain): + return draw(points_inside(domain, 1))[0] + + +@st.composite +def a_few_points_inside(draw, domain): + n = draw(st.integers(3, 20)) + return draw(points_inside(domain, n)) + + +@st.composite +def point_outside(draw, domain): + kwargs = dict(allow_nan=False, allow_infinity=False) + if isinstance(domain, Interval): + a, b = domain.bounds + length = b - a + x = ( + st.floats(a - 10 * length, a, **kwargs) + | st.floats(b, b + 10 * length, **kwargs) + ) + else: + assert isinstance(domain, ConvexHull) + hull = domain.bounds + # Generate point between bounding box and bounding box * 10 + points = hull.points[hull.vertices] + x = st.tuples(*[ + (st.floats(min_value=a - 10 * (b - a), max_value=a, **kwargs) + | st.floats(min_value=b, max_value=b + 10 * (b - a), **kwargs) + ) + for a, b in zip(points.min(axis=0), points.max(axis=0)) + ]) + + return draw(x) + + +@st.composite +def point_on_shared_face(draw, domain, dim): + # Return a point that is shared by at least 2 subdomains + assert isinstance(domain, ConvexHull) + assert 0 < dim < domain.ndim + + tri = domain.triangulation + + for face in tri.faces(dim + 1): + containing_subdomains = tri.containing(face) + n_neighbors = len(containing_subdomains) + if len(containing_subdomains) > 1: + break + + vertices = np.array([tri.vertices[i] for i in face]) + + f = st.floats(1E-3, 1 - 1E-3, allow_nan=False, allow_infinity=False) + xb = draw(st.tuples(*[f] * dim)) + + x = tuple(vertices[0] + xb @ (vertices[1:] - vertices[0])) + + assert all(tri.point_in_simplex(x, s) for s in containing_subdomains) + + return x + + +@st.composite +def make_random_domain(draw, ndim, fill=True): + if ndim == 1: + limits = draw(st.tuples(reals, reals).map(sorted).filter(lambda x: x[0] < x[1])) + domain = Interval(*limits) + else: + points = draw(hynp.arrays(np.float, (10, ndim), elements=reals, unique=True) + .filter(unique_vectors)) + domain = ConvexHull(points) + return domain + + +@st.composite +def make_hypercube_domain(draw, ndim, fill=True): + if ndim == 1: + limit = draw(positive_reals) + subdomain = Interval(-limit, limit) + else: + x = draw(positive_reals) + point = np.full(ndim, x) + boundary_points = [r @ point for r in reflections(ndim)] + subdomain = ConvexHull(boundary_points) + return subdomain diff --git a/adaptive/tests/test_domain.py b/adaptive/tests/test_domain.py new file mode 100644 index 000000000..7fb9d84aa --- /dev/null +++ b/adaptive/tests/test_domain.py @@ -0,0 +1,195 @@ +import itertools + +import numpy as np + +import hypothesis.strategies as st +import pytest +from adaptive.tests.domain_utils import ( + a_few_points_inside, + make_hypercube_domain, + point_inside, + point_outside, + point_on_shared_face, +) +from hypothesis import given, settings + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_getting_points_are_unique(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + points = [] + for subdomain in domain.subdomains(): + p, _ = domain.insert_points(subdomain, 10) + assert len(p) == len(set(p)) + points.extend(p) + assert len(points) == len(set(points)) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_sum_subvolumes_equals_volume(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + for x in xs: + domain.split_at(x) + for subdomain in domain.subdomains(): + assert np.isclose(domain.volume(subdomain), sum(domain.subvolumes(subdomain))) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_split_at_vertex_raises(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x = data.draw(point_inside(domain)) + domain.split_at(x) + with pytest.raises(ValueError): + domain.split_at(x) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_inserting_point_twice_raises(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x = data.draw(point_inside(domain)) + domain.insert(x) + with pytest.raises(ValueError): + domain.insert(x) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_insert_points_outside_domain_raises(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x = data.draw(point_outside(domain)) + with pytest.raises(ValueError): + domain.insert(x) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_split_at_point_outside_domain_raises(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x = data.draw(point_outside(domain)) + with pytest.raises(ValueError): + domain.insert(x) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_removing_domain_vertex_raises(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x = data.draw(point_inside(domain)) + domain.split_at(x) + with pytest.raises(ValueError): + domain.remove(x) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_removing_nonexistant_point_raises(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x = data.draw(point_inside(domain)) + with pytest.raises(ValueError): + domain.remove(x) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_splitting_at_point_adds_to_vertices(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + for x in xs: + domain.split_at(x) + vertices = set(domain.vertices()) + assert all(x in vertices for x in xs) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_inserting_points_adds_to_subpoints(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + subdomains = dict() + for x in xs: + subdomains[x] = domain.insert(x) + for x in xs: + for subdomain in subdomains[x]: + assert x in domain.subpoints(subdomain) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_inserting_then_removing_points_removes_from_subpoints(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + subdomains = dict() + for x in xs: + domain.insert(x) + for x in xs: + domain.remove(x) + assert not any(domain.subpoints(s) for s in domain.subdomains()) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +@settings(deadline=300) +def test_inserting_then_splitting_at_points_removes_from_subpoints(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + subdomains = dict() + for x in xs: + domain.insert(x) + for x in xs: + domain.split_at(x) + assert not any(domain.subpoints(s) for s in domain.subdomains()) + + +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_clear_subdomains_removes_all_points(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + for x in xs: + domain.insert(x) + assert len(xs) == sum(len(domain.subpoints(s)) for s in domain.subdomains()) + domain.clear_subdomains() + assert 0 == sum(len(domain.subpoints(s)) for s in domain.subdomains()) + + +### Interval tests + + +### ConvexHull tests + + +@pytest.mark.parametrize("ndim", [2, 3]) +@given(data=st.data()) +def test_inserting_point_on_boundary_adds_to_all_subtriangulations(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + for x in xs: + domain.split_at(x) + x = data.draw(point_on_shared_face(domain, 1)) + affected_subdomains = domain.insert(x) + assert all(x in set(domain.subpoints(s)) for s in affected_subdomains) + + +@pytest.mark.parametrize("ndim", [2, 3]) +@given(data=st.data()) +def test_split_at_reassigns_all_internal_points(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + xs = data.draw(a_few_points_inside(domain)) + + for x in xs: + domain.insert(x) + _, new_subdomains = domain.split_at(xs[0]) + subpoints = set.union(*(set(domain.subpoints(s)) for s in new_subdomains)) + assert set(xs[1:]) == subpoints diff --git a/adaptive/tests/test_priority_queue.py b/adaptive/tests/test_priority_queue.py new file mode 100644 index 000000000..7ae3a9724 --- /dev/null +++ b/adaptive/tests/test_priority_queue.py @@ -0,0 +1,82 @@ +from hypothesis import given, assume +import hypothesis.strategies as st +import pytest + +from adaptive.priority_queue import Queue, Empty + + +item = st.floats(allow_nan=False) +priority = st.floats(allow_nan=False) +items = st.lists(st.tuples(item, priority)) + + +@given(items, item) +def test_remove_nonexisting_item_raises(items, missing_item): + if items: + i, p = zip(*items) + assume(missing_item not in i) + q = Queue(items) + with pytest.raises(KeyError): + q.remove(missing_item) + + +@given(items, item) +def test_update_nonexisting_item_raises(items, missing_item): + if items: + i, p = zip(*items) + assume(missing_item not in i) + q = Queue(items) + with pytest.raises(KeyError): + q.update(missing_item, 0) + + +@given(items, item) +def test_remove_item_inserted_twice_removes_lowest_priority(items, missing_item): + if items: + i, p = zip(*items) + assume(missing_item not in i) + q = Queue(items) + + q.insert(missing_item, 0) + q.insert(missing_item, 1) + q.remove(missing_item) # should remove priority 0 item + # Get 'missing_item' out of the queue + t = None + while t != missing_item: + t, prio = q.pop() + assert prio == 1 + +@given(items) +def test_all_items_in_queue(items): + if items: + values, _= zip(*items) + else: + values = [] + q = Queue(items) + assert sorted(values) == sorted(q.items()) + + +@given(items) +def test_pop_gives_max(items): + q = Queue(items) + if items: + l = len(q) + should_pop = max(items, key=lambda x: x[1]) + assert should_pop == q.pop() + assert len(q) == l - 1 + else: + with pytest.raises(Empty): + q.pop() + + +@given(items) +def test_peek_gives_max(items): + q = Queue(items) + if items: + l = len(q) + should_peek = max(items, key=lambda x: x[1]) + assert should_peek == q.peek() + assert len(q) == l + else: + with pytest.raises(Empty): + q.peek() From d191e3c6e3aea8abb82a9c017b8ddaa1ffb4b014 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 16:39:09 +0200 Subject: [PATCH 050/105] add hypothesis to test requirements --- test-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test-requirements.txt b/test-requirements.txt index 193a0531f..fc78090fe 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -3,4 +3,5 @@ pytest pytest-cov pytest-randomly pytest-timeout +hypothesis[numpy,pytest] pre_commit From b70eea89e8c876b084bdcf66741e9c4bbbd1a7e1 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 16:42:01 +0200 Subject: [PATCH 051/105] rename 'sub_domains' to 'sub_triangulations' --- adaptive/domain.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index 1b99027d5..f57187954 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -484,11 +484,11 @@ def __init__(self, points): self.bounds = hull self.triangulation = Triangulation(hull.points[hull.vertices]) # if a subdomain has interior points, then it appears as a key - # in 'sub_domains' and maps to a 'Triangulation' of the + # in 'sub_triangulations' and maps to a 'Triangulation' of the # interior of the subdomain. By definition the triangulation # is over a simplex, and the first 'ndim + 1' points in the # triangulation are the boundary points. - self.sub_domains = dict() + self.sub_triangulations = dict() self.ndim = self.bounds.points.shape[1] # As an optimization we store any points inserted with 'insert_points' @@ -504,11 +504,11 @@ def bounding_box(self): def _get_subtriangulation(self, subdomain): try: - subtri = self.sub_domains[subdomain] + subtri = self.sub_triangulations[subdomain] except KeyError: # No points in the interior of this subdomain yet points = [self.triangulation.vertices[x] for x in subdomain] subtri = _make_new_subtriangulation(points) - self.sub_domains[subdomain] = subtri + self.sub_triangulations[subdomain] = subtri return subtri def insert_points(self, subdomain, n, *, _check_membership=True): @@ -571,15 +571,15 @@ def remove(self, x): raise ValueError("Can only remove points inside subdomains") for subdomain in affected_subdomains: # Check that it's not a vertex of the subdomain - subtri = self.sub_domains[subdomain] + subtri = self.sub_triangulations[subdomain] assert x in subtri.vertices points = [v for v in subtri.vertices if v != x] if len(points) == self.ndim + 1: # No more points inside the subdomain - del self.sub_domains[subdomain] + del self.sub_triangulations[subdomain] else: # Rebuild the subtriangulation from scratch - self.sub_domains[subdomain] = _make_new_subtriangulation(points) + self.sub_triangulations[subdomain] = _make_new_subtriangulation(points) def split_at(self, x, *, _check_membership=True): x = tuple(x) @@ -597,7 +597,7 @@ def split_at(self, x, *, _check_membership=True): old_subdomains, new_subdomains = tri.add_point(x, subdomain) if _check_membership: - assert not any(s in self.sub_domains for s in new_subdomains) + assert not any(s in self.sub_triangulations for s in new_subdomains) # Re-assign all the interior points of 'old_subdomains' to 'new_subdomains' @@ -607,7 +607,7 @@ def split_at(self, x, *, _check_membership=True): interior_points = set() for d in old_subdomains: try: - subtri = self.sub_domains.pop(d) + subtri = self.sub_triangulations.pop(d) except KeyError: continue else: @@ -675,7 +675,7 @@ def subpoints(self, subdomain, *, _check_membership=True): if _check_membership and subdomain not in self: raise ValueError("{} is not present in this domain".format(subdomain)) try: - subtri = self.sub_domains[subdomain] + subtri = self.sub_triangulations[subdomain] except KeyError: return [] else: @@ -684,16 +684,16 @@ def subpoints(self, subdomain, *, _check_membership=True): return subtri.vertices[self.ndim + 1 :] def clear_subdomains(self): - sub_domains = list(self.sub_domains.keys()) - self.sub_domains = dict() - return sub_domains + sub_triangulations = list(self.sub_triangulations.keys()) + self.sub_triangulations = dict() + return sub_triangulations def volume(self, subdomain): return self.triangulation.volume(subdomain) def subvolumes(self, subdomain): try: - subtri = self.sub_domains[subdomain] + subtri = self.sub_triangulations[subdomain] except KeyError: return [self.triangulation.volume(subdomain)] else: From 4305536cdef5d2f120b6d65bd8e42ad9a0293697 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 16:53:24 +0200 Subject: [PATCH 052/105] flake --- adaptive/domain.py | 11 ++----- adaptive/tests/domain_utils.py | 45 ++++++++++++++++----------- adaptive/tests/test_domain.py | 4 --- adaptive/tests/test_priority_queue.py | 11 ++++--- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index f57187954..a12385661 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -6,14 +6,9 @@ import numpy as np import scipy.linalg import scipy.spatial -from sortedcontainers import SortedDict, SortedList - -from adaptive.learner.triangulation import ( - Triangulation, - circumsphere, - point_in_simplex, - simplex_volume_in_embedding, -) +from sortedcontainers import SortedList + +from adaptive.learner.triangulation import Triangulation, circumsphere, point_in_simplex __all__ = ["Domain", "Interval", "ConvexHull"] diff --git a/adaptive/tests/domain_utils.py b/adaptive/tests/domain_utils.py index 5570b71aa..2dbf7e7d6 100644 --- a/adaptive/tests/domain_utils.py +++ b/adaptive/tests/domain_utils.py @@ -6,7 +6,6 @@ import hypothesis.strategies as st from adaptive.learner.new_learnerND import ConvexHull, Interval -from adaptive.learner.triangulation import point_in_simplex from hypothesis.extra import numpy as hynp @@ -15,7 +14,9 @@ def reflections(ndim): reals = st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False) -positive_reals = st.floats(min_value=1E-3, max_value=100, allow_nan=False, allow_infinity=False) +positive_reals = st.floats( + min_value=1e-3, max_value=100, allow_nan=False, allow_infinity=False +) @st.composite @@ -32,7 +33,7 @@ def unique_vectors(xs): return False d = scipy.spatial.distance_matrix(xs, xs) d = np.extract(1 - np.identity(d.shape[0]), d) - return not np.any(d < 1E-3 / c) + return not np.any(d < 1e-3 / c) @st.composite @@ -56,10 +57,12 @@ def point_inside_simplex(draw, simplex): @st.composite def points_inside(draw, domain, n): - kwargs = dict(allow_nan=False, allow_infinity=False, exclude_min=True, exclude_max=True) + kwargs = dict( + allow_nan=False, allow_infinity=False, exclude_min=True, exclude_max=True + ) if isinstance(domain, Interval): a, b = domain.bounds - eps = (b - a) * 1E-2 + eps = (b - a) * 1e-2 x = st.floats(min_value=(a + eps), max_value=(b - eps), **kwargs) else: assert isinstance(domain, ConvexHull) @@ -90,21 +93,23 @@ def point_outside(draw, domain): if isinstance(domain, Interval): a, b = domain.bounds length = b - a - x = ( - st.floats(a - 10 * length, a, **kwargs) - | st.floats(b, b + 10 * length, **kwargs) - ) + before_domain = st.floats(a - 10 * length, a, **kwargs) + after_domain = st.floats(b, b + 10 * length, **kwargs) + x = before_domain | after_domain else: assert isinstance(domain, ConvexHull) hull = domain.bounds # Generate point between bounding box and bounding box * 10 points = hull.points[hull.vertices] - x = st.tuples(*[ - (st.floats(min_value=a - 10 * (b - a), max_value=a, **kwargs) - | st.floats(min_value=b, max_value=b + 10 * (b - a), **kwargs) - ) - for a, b in zip(points.min(axis=0), points.max(axis=0)) - ]) + x = st.tuples( + *[ + ( + st.floats(min_value=a - 10 * (b - a), max_value=a, **kwargs) + | st.floats(min_value=b, max_value=b + 10 * (b - a), **kwargs) + ) + for a, b in zip(points.min(axis=0), points.max(axis=0)) + ] + ) return draw(x) @@ -119,13 +124,12 @@ def point_on_shared_face(draw, domain, dim): for face in tri.faces(dim + 1): containing_subdomains = tri.containing(face) - n_neighbors = len(containing_subdomains) if len(containing_subdomains) > 1: break vertices = np.array([tri.vertices[i] for i in face]) - f = st.floats(1E-3, 1 - 1E-3, allow_nan=False, allow_infinity=False) + f = st.floats(1e-3, 1 - 1e-3, allow_nan=False, allow_infinity=False) xb = draw(st.tuples(*[f] * dim)) x = tuple(vertices[0] + xb @ (vertices[1:] - vertices[0])) @@ -141,8 +145,11 @@ def make_random_domain(draw, ndim, fill=True): limits = draw(st.tuples(reals, reals).map(sorted).filter(lambda x: x[0] < x[1])) domain = Interval(*limits) else: - points = draw(hynp.arrays(np.float, (10, ndim), elements=reals, unique=True) - .filter(unique_vectors)) + points = draw( + hynp.arrays(np.float, (10, ndim), elements=reals, unique=True).filter( + unique_vectors + ) + ) domain = ConvexHull(points) return domain diff --git a/adaptive/tests/test_domain.py b/adaptive/tests/test_domain.py index 7fb9d84aa..f4f380578 100644 --- a/adaptive/tests/test_domain.py +++ b/adaptive/tests/test_domain.py @@ -1,5 +1,3 @@ -import itertools - import numpy as np import hypothesis.strategies as st @@ -127,7 +125,6 @@ def test_inserting_then_removing_points_removes_from_subpoints(data, ndim): domain = data.draw(make_hypercube_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) - subdomains = dict() for x in xs: domain.insert(x) for x in xs: @@ -142,7 +139,6 @@ def test_inserting_then_splitting_at_points_removes_from_subpoints(data, ndim): domain = data.draw(make_hypercube_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) - subdomains = dict() for x in xs: domain.insert(x) for x in xs: diff --git a/adaptive/tests/test_priority_queue.py b/adaptive/tests/test_priority_queue.py index 7ae3a9724..e6c0b178d 100644 --- a/adaptive/tests/test_priority_queue.py +++ b/adaptive/tests/test_priority_queue.py @@ -46,10 +46,11 @@ def test_remove_item_inserted_twice_removes_lowest_priority(items, missing_item) t, prio = q.pop() assert prio == 1 + @given(items) def test_all_items_in_queue(items): if items: - values, _= zip(*items) + values, _ = zip(*items) else: values = [] q = Queue(items) @@ -60,10 +61,10 @@ def test_all_items_in_queue(items): def test_pop_gives_max(items): q = Queue(items) if items: - l = len(q) + lq = len(q) should_pop = max(items, key=lambda x: x[1]) assert should_pop == q.pop() - assert len(q) == l - 1 + assert len(q) == lq - 1 else: with pytest.raises(Empty): q.pop() @@ -73,10 +74,10 @@ def test_pop_gives_max(items): def test_peek_gives_max(items): q = Queue(items) if items: - l = len(q) + lq = len(q) should_peek = max(items, key=lambda x: x[1]) assert should_peek == q.peek() - assert len(q) == l + assert len(q) == lq else: with pytest.raises(Empty): q.peek() From faf68cc4b58a2a97daa105ad215fd328a0eff893 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 16:53:32 +0200 Subject: [PATCH 053/105] add __all__ to domain.py --- adaptive/priority_queue.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adaptive/priority_queue.py b/adaptive/priority_queue.py index ad65cc515..4e000a39f 100644 --- a/adaptive/priority_queue.py +++ b/adaptive/priority_queue.py @@ -1,5 +1,7 @@ from sortedcontainers import SortedDict, SortedList +__all__ = ["Empty", "Queue"] + class Empty(KeyError): pass From 5377e691ff407e437c2fe0069152f52579287604 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 16:54:47 +0200 Subject: [PATCH 054/105] use math module --- adaptive/learner/new_learnerND.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 98c2cccab..14ff429e3 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -1,6 +1,6 @@ import itertools from collections.abc import Iterable -from math import sqrt +import math import numpy as np import scipy.interpolate @@ -34,7 +34,7 @@ def __call__(self, domain, subdomain, codomain_bounds, data): assert isinstance(domain, Interval) a, b = subdomain ya, yb = data[a], data[b] - return sqrt((b - a) ** 2 + (yb - ya) ** 2) + return math.sqrt((b - a) ** 2 + (yb - ya) ** 2) class EmbeddedVolumeLoss(LossFunction): @@ -189,7 +189,7 @@ def ask(self, n, tell_pending=True): else: points = self.boundary_points[self.n_asked : self.n_asked + n] # The boundary points should always be evaluated with the highest priority - losses = [float("inf")] * len(points) + losses = [math.inf] * len(points) if tell_pending: for x in points: self.pending_points.add(x) From 4b7f390ce460482e9d5f95ed019dc871e8180fe6 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 17:11:34 +0200 Subject: [PATCH 055/105] docstring formatting --- adaptive/domain.py | 62 +++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index a12385661..a91944c29 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -359,9 +359,6 @@ def _choose_point_in_simplex(simplex, transform=None): def _simplex_facets(ndim): """Return the facets of a simplex in 'ndim' dimensions - A simplex in 'ndim' dimensions consists of 'ndim + 1' points - [0, ndim + 1) - Parameters ---------- ndim : positive int @@ -371,6 +368,14 @@ def _simplex_facets(ndim): facets : Iterable of integer tuples Contains 'ndim + 1' tuples, and each tuple contains 'ndim' integers. + + Examples + -------- + In 2D a simplex is a triangle (3 points) and the facets are the lines + joining these points: + + >>> list(_simplex_facets(2)) + [(0, 1), (0, 2), (1, 2)] """ return itertools.combinations(range(ndim + 1), ndim) @@ -378,20 +383,6 @@ def _simplex_facets(ndim): def _boundary_equations(simplex): """Return the set of equations defining the boundary of a simplex - This is slower than using scipy.spatial.ConvexHull, however the ordering - of the equations is not clear for that case. - - Care is not taken to orient the facets to point out of the simplex; the - equations should only be used for verifying if a point lies on a boundary, - rather than if it lies inside the simplex. - - >>> simplex = [(0, 0), (1, 0), (0, 1)] - >>> A, b = _boundary_equations(simplex) - >>> x = [0.5, 0] - >>> which_boundary = np.isclose(A @ x + b, 0) - >>> # facet #0 is the line between (0, 0) and (1, 0) - >>> assert which_boundary[0] == True - Parameters ---------- simplex : (N + 1, N) float array-like @@ -406,6 +397,25 @@ def _boundary_equations(simplex): b : (N + 1,) float array Each element is the offset from the origin of the corresponding facet of 'simplex' + + Notes + ----- + + This is slower than using scipy.spatial.ConvexHull, however the ordering + of the equations as returned by scipy.spatial.ConvexHull is not clear. + + Care is not taken to orient the facets to point out of the simplex; the + equations should only be used for verifying if a point lies on a boundary, + rather than if it lies inside the simplex. + + Examples + -------- + >>> simplex = [(0, 0), (1, 0), (0, 1)] + >>> A, b = _boundary_equations(simplex) + >>> x = [0.5, 0] + >>> which_boundary = np.isclose(A @ x + b, 0) + >>> # facet #0 is the line between (0, 0) and (1, 0) + >>> assert which_boundary[0] == True """ points = np.asarray(simplex) ndim = points.shape[1] @@ -423,14 +433,6 @@ def _boundary_equations(simplex): def _on_which_boundary(equations, x, eps=1e-8): """Returns the simplex boundary on which 'x' is found. - >>> simplex = [(0., 0.), (2., 0.), (0., 4.)] - >>> eq = _boundary_equations(simplex) - >>> x = [0.5, 0.] - >>> _on_which_boundary(eq, x) == (0, 1) - >>> assert boundary == (0, 1) - >>> x = [2., 0.] - >>> _on_which_boundary(eq, x) == (1,) - Parameters ---------- equations : the output of _boundary_equations @@ -442,6 +444,16 @@ def _on_which_boundary(equations, x, eps=1e-8): None if 'x' is not on a simplex boundary. Otherwise, returns a tuple containing integers defining the boundary on which 'x' is found. + + Examples + -------- + >>> simplex = [(0., 0.), (2., 0.), (0., 4.)] + >>> eq = _boundary_equations(simplex) + >>> x = [0.5, 0.] + >>> _on_which_boundary(eq, x) == (0, 1) + >>> assert boundary == (0, 1) + >>> x = [2., 0.] + >>> _on_which_boundary(eq, x) == (1,) """ ndim = len(x) A, b = equations From 9ed92e8f9f2bb772f3f847c663ef082cf0040591 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 17:12:20 +0200 Subject: [PATCH 056/105] increase allowed time for tests to complete --- adaptive/tests/test_domain.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adaptive/tests/test_domain.py b/adaptive/tests/test_domain.py index f4f380578..f012ca073 100644 --- a/adaptive/tests/test_domain.py +++ b/adaptive/tests/test_domain.py @@ -14,6 +14,7 @@ @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) +@settings(deadline=500) def test_getting_points_are_unique(data, ndim): domain = data.draw(make_hypercube_domain(ndim)) points = [] @@ -134,7 +135,7 @@ def test_inserting_then_removing_points_removes_from_subpoints(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) -@settings(deadline=300) +@settings(deadline=500) def test_inserting_then_splitting_at_points_removes_from_subpoints(data, ndim): domain = data.draw(make_hypercube_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) From 6e95d687f2d988b3fd7b580b2abb511e2d9730d6 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 17:44:12 +0200 Subject: [PATCH 057/105] remove Queue.priorities method --- adaptive/priority_queue.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/adaptive/priority_queue.py b/adaptive/priority_queue.py index 4e000a39f..546114bf7 100644 --- a/adaptive/priority_queue.py +++ b/adaptive/priority_queue.py @@ -40,10 +40,6 @@ def items(self): "Return an iterator over the items in the queue in arbitrary order." return reversed(self._queue.values()) - def priorities(self): - "Return an iterator over the priorities in the queue in arbitrary order." - return reversed(self._queue) - def peek(self): "Return the item and priority at the front of the queue." self._check_nonempty() From 57107620b999a39824cf071de6d1c082676efbc9 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 17:45:41 +0200 Subject: [PATCH 058/105] Queue.items() returns items in priority order --- adaptive/priority_queue.py | 2 +- adaptive/tests/test_priority_queue.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/adaptive/priority_queue.py b/adaptive/priority_queue.py index 546114bf7..3466a6c2a 100644 --- a/adaptive/priority_queue.py +++ b/adaptive/priority_queue.py @@ -37,7 +37,7 @@ def __len__(self): return len(self._queue) def items(self): - "Return an iterator over the items in the queue in arbitrary order." + "Return an iterator over the items in the queue in priority order." return reversed(self._queue.values()) def peek(self): diff --git a/adaptive/tests/test_priority_queue.py b/adaptive/tests/test_priority_queue.py index e6c0b178d..60464f87f 100644 --- a/adaptive/tests/test_priority_queue.py +++ b/adaptive/tests/test_priority_queue.py @@ -49,12 +49,9 @@ def test_remove_item_inserted_twice_removes_lowest_priority(items, missing_item) @given(items) def test_all_items_in_queue(items): - if items: - values, _ = zip(*items) - else: - values = [] - q = Queue(items) - assert sorted(values) == sorted(q.items()) + # Items should be sorted from largest priority to smallest + sorted_items = [item for item, _ in sorted(items, key=lambda x: -x[1])] + assert sorted_items == list(Queue(items).items()) @given(items) From 3b2d9cd551a83582ebd85ae26a3aaa9064cab3f9 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 19:03:20 +0200 Subject: [PATCH 059/105] make LossFunction an ABC --- adaptive/learner/new_learnerND.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 14ff429e3..465c5a59b 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -1,3 +1,4 @@ +import abc import itertools from collections.abc import Iterable import math @@ -12,11 +13,12 @@ from adaptive.domain import Interval, ConvexHull -class LossFunction: - @property +class LossFunction(metaclass=abc.ABCMeta): + @abc.abtractproperty def n_neighbors(self): "The maximum degree of neighboring subdomains required." + @abc.abstractmethod def __call__(self, domain, subdomain, data): """Return the loss for 'subdomain' given 'data' From fc348b26ffcb43588bb3127528e4f2f91396adf1 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 19:04:09 +0200 Subject: [PATCH 060/105] allow the learner to take a ConvexHull as bounds --- adaptive/learner/new_learnerND.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 465c5a59b..9b5ea064e 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -4,6 +4,7 @@ import math import numpy as np +import scipy.spatial import scipy.interpolate from adaptive.learner.base_learner import BaseLearner @@ -113,7 +114,10 @@ def __init__(self, f, bounds, loss=None): self.loss_function = loss or DistanceLoss() self.ndim = 1 else: - boundary_points = sorted(tuple(p) for p in itertools.product(*bounds)) + if isinstance(bounds, scipy.spatial.ConvexHull): + boundary_points = bounds.points[bounds.vertices] + else: + boundary_points = sorted(tuple(p) for p in itertools.product(*bounds)) self.domain = ConvexHull(boundary_points) self.loss_function = loss or EmbeddedVolumeLoss() self.ndim = len(boundary_points[0]) From 953f62d48491f80dddf99a27b39d7a69d1b21ee4 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 19:06:03 +0200 Subject: [PATCH 061/105] add more comments to the learner --- adaptive/learner/new_learnerND.py | 79 ++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 9b5ea064e..f1535c341 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -106,6 +106,21 @@ def __call__(self, domain, subdomain, codomain_bounds, data): class LearnerND(BaseLearner): + """Learns a function 'f: ℝ^N → ℝ^m'. + + Parameters + --------- + f : callable + The function to learn. Must take a tuple of N real parameters and return a real + number or an arraylike of length M. + bounds : list of 2-tuples or `scipy.spatial.ConvexHull` + A list ``[(a_1, b_1), (a_2, b_2), ..., (a_N, b_N)]`` describing a bounding box + in N dimensions, or a convex hull that defines the boundary of the domain. + loss : callable, optional + An instance of a subclass of `LossFunction` that describes the loss + of a subdomain. + """ + def __init__(self, f, bounds, loss=None): if len(bounds) == 1: @@ -123,27 +138,43 @@ def __init__(self, f, bounds, loss=None): self.ndim = len(boundary_points[0]) self.boundary_points = boundary_points - self.queue = Queue() self.data = dict() # Contains the evaluated data only self.pending_points = set() - self.need_loss_update_factor = 1.1 self.function = f + + # We keep a running total of the number of points that were asked for. + # This is used in 'ask' to detect if we should return the boundary points. self.n_asked = 0 + # The loss function may depend on the "scale" (i.e. the difference between + # the maximum and the minimum) of the function values, in addition to the + # function values themselves. In order to take into account this "global" + # information we recompute the losses for all subdomains when the scale + # changes by more than this factor from the last time we recomputed all + # the losses. + self.need_loss_update_factor = 1.1 + # As an optimization we keep a map from subdomain to loss. # This is updated in 'self.priority' whenever the loss function is evaluated # for a new subdomain. 'self.tell_many' removes subdomains from here when # they are split, and also removes neighboring subdomains from here (to force - # a loss function recomputation) + # a loss function recomputation). self.losses = dict() # We must wait until the boundary points have been evaluated before we can # set these attributes. self._initialized = False + # The dimension of the output space. self.vdim = None + # The maximum and minimum values of 'f' seen thus far. self.codomain_bounds = None + # The difference between the maximum and minimum of 'f' at the last + # time all the losses were recomputed. self.codomain_scale_at_last_update = None + # A priority queue of subdomains, which is used to determine where to add + # points. + self.queue = Queue() for subdomain in self.domain.subdomains(): self.queue.insert(subdomain, priority=self.priority(subdomain)) @@ -163,16 +194,24 @@ def _finalize_initialization(self): except TypeError: # Trying to take the length of a number self.vdim = 1 + # Generate new subdomains using any evaluated points for x in self.data: if x in self.boundary_points: continue self.domain.split_at(x) + # Recompute all the losses from scratch self.queue = Queue() + self.losses = dict() for subdomain in self.domain.subdomains(): self.queue.insert(subdomain, priority=self.priority(subdomain)) + @property + def npoints(self): + return len(self.data) + def priority(self, subdomain): + # Compute the loss of 'subdomain' if self._initialized: if subdomain in self.losses: L_0 = self.losses[subdomain] @@ -186,6 +225,9 @@ def priority(self, subdomain): # do not have enough data. We just assign the subdomain volume as the loss. L_0 = self.domain.volume(subdomain) + # Scale the subdomain loss by the maximum relative volume of its own subdomains + # (those formed of pending points within the subdomain). If there are no pending + # points in the subdomain then the scaling is 1 and the priority is just the loss. subvolumes = self.domain.subvolumes(subdomain) return (max(subvolumes) / sum(subvolumes)) * L_0 @@ -213,6 +255,9 @@ def ask(self, n, tell_pending=True): def _ask(self, n, tell_pending): new_points = [] point_priorities = [] + # Insert a point into the subdomain at the front of the queue, and update the + # priorities of that subdomain and any neighbors (if the point was added on + # a subdomain boundary). for _ in range(n): subdomain, _ = self.queue.peek() (new_point,), affected_subdomains = self.domain.insert_points(subdomain, 1) @@ -225,6 +270,8 @@ def _ask(self, n, tell_pending): # the API of 'Domain.insert_points' to not return 'subdomain'... point_priorities.append(self.priority(subdomain)) + # Remove all the points we just added and update the priorities of any subdomains + # we touched. if not tell_pending: affected_subdomains = set() for point in new_points: @@ -258,7 +305,7 @@ def tell_many(self, xs, ys): old_subdomains, new_subdomains = self.domain.split_at(x) old.update(old_subdomains) new.update(new_subdomains) - # remove any subdomains that were new at some point but are now old + # Remove any subdomains that were new at some point but are now old. new -= old for subdomain in old: @@ -266,17 +313,18 @@ def tell_many(self, xs, ys): del self.losses[subdomain] if need_loss_update: - # Need to recalculate all priorities anyway self.queue = Queue( (subdomain, self.priority(subdomain)) for subdomain in itertools.chain(self.queue.items(), new) ) else: - # Compute the priorities for the new subdomains and re-compute the - # priorities for the neighboring subdomains, if necessary. + # Insert the newly created subdomains into the queue. for subdomain in new: self.queue.insert(subdomain, priority=self.priority(subdomain)) + # If the loss function depends on data in neighboring subdomains then + # we must recompute the priorities of all neighboring subdomains of + # the subdomains we just added. if self.loss_function.n_neighbors > 0: subdomains_to_update = set() for subdomain in new: @@ -289,6 +337,8 @@ def tell_many(self, xs, ys): self.queue.update(subdomain, priority=self.priority(subdomain)) def _update_codomain_bounds(self, ys): + # Update the codomain bounds: the minimum and the maximum values that the + # learner has seen thus far. mn, mx = self.codomain_bounds if self.vdim == 1: mn = min(mn, *ys) @@ -299,12 +349,17 @@ def _update_codomain_bounds(self, ys): self.codomain_bounds = (mn, mx) scale = mx - mn - + # How much has the scale of the outputs changed since the last time + # we recomputed the losses? scale_factor = scale / self.codomain_scale_at_last_update + + # We need to recompute all losses if the scale has increased by more + # than a certain factor since the last time we recomputed all the losses if self.vdim == 1: need_loss_update = scale_factor > self.need_loss_update_factor else: need_loss_update = np.any(scale_factor > self.need_loss_update_factor) + if need_loss_update: self.codomain_scale_at_last_update = scale return True @@ -314,7 +369,7 @@ def _update_codomain_bounds(self, ys): def remove_unfinished(self): self.pending_points = set() cleared_subdomains = self.domain.clear_subdomains() - # Subdomains who had internal points removed need their priority updating + # Subdomains that had points removed need their priority updating for subdomain in cleared_subdomains: self.queue.update(subdomain, priority=self.priority(subdomain)) @@ -323,11 +378,11 @@ def loss(self, real=True): # NOTE: O(N) in the number of subintervals, but with a low prefactor. # We have to do this because the queue is sorted in *priority* # order, and it's possible that a subinterval with a high loss - # may have a low priority (if there are many pending points). + # may have a low priority (if it has many pending points). return max(self.losses.values()) else: - # This depends on the implementation of 'self.priority'. Currently - # it returns a tuple (priority, loss). + # The 'not real' loss (which takes pending points into account) is + # just the priority in the subdomain queue. _, priority = self.queue.peek() return priority From ebcd6f2fee4d8b34ccc1f9cc7a1fd68031d78a31 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 19:10:54 +0200 Subject: [PATCH 062/105] show that the new learner works for ND output --- proof-of-concept-learner.ipynb | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/proof-of-concept-learner.ipynb b/proof-of-concept-learner.ipynb index 19fcc1b41..d6f0d00e6 100644 --- a/proof-of-concept-learner.ipynb +++ b/proof-of-concept-learner.ipynb @@ -300,6 +300,44 @@ "source": [ "runner.live_plot(plotter=lambda l: l.plot(tri_alpha=0.5), update_interval=0.5)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Works for ND output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def ring_of_fire2(xy, d=0.75):\n", + " a = 0.2\n", + " x, y = xy\n", + " z = x + math.exp(-(x ** 2 + y ** 2 - d ** 2) ** 2 / a ** 4)\n", + " return [z, z]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learner7 = LearnerND(ring_of_fire2, [(-1, 1), (-1, 1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adaptive.runner.simple(learner7, goal=lambda l: len(l.data)> 100)" + ] } ], "metadata": { From 6c07070ad0f94b769e0c30e984f182c48e7dc3a0 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 19:25:28 +0200 Subject: [PATCH 063/105] typo --- adaptive/learner/new_learnerND.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index f1535c341..48613c1f0 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -15,7 +15,7 @@ class LossFunction(metaclass=abc.ABCMeta): - @abc.abtractproperty + @abc.abstractproperty def n_neighbors(self): "The maximum degree of neighboring subdomains required." From 64a6d0fd4d6b86d237a7d40bfabf331b4ddd88a4 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 20:18:25 +0200 Subject: [PATCH 064/105] correct sign in simplex boundary equations --- adaptive/domain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index a91944c29..43f738576 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -426,7 +426,7 @@ def _boundary_equations(simplex): facet_tangent_space = points[list(v)] - points[x0] facet_normal = scipy.linalg.null_space(facet_tangent_space).squeeze() A[i, :] = facet_normal - b[i] = np.dot(points[x0], facet_normal) + b[i] = -np.dot(points[x0], facet_normal) return A, b From 9624f7fe146bdf8fb481f03b59bd45f979c759b2 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 21:35:34 +0200 Subject: [PATCH 065/105] correct abstract loss function interface --- adaptive/learner/new_learnerND.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 48613c1f0..c215bfba0 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -20,7 +20,7 @@ def n_neighbors(self): "The maximum degree of neighboring subdomains required." @abc.abstractmethod - def __call__(self, domain, subdomain, data): + def __call__(self, domain, subdomain, codomain_bounds, data): """Return the loss for 'subdomain' given 'data' Neighboring subdomains can be obtained with From 0055d573d29bbbb6fdcb20191a32af3f8c61a69b Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 21:36:09 +0200 Subject: [PATCH 066/105] rename need_loss_udate_factor --- adaptive/learner/new_learnerND.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index c215bfba0..d910e7b5b 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -152,7 +152,7 @@ def __init__(self, f, bounds, loss=None): # information we recompute the losses for all subdomains when the scale # changes by more than this factor from the last time we recomputed all # the losses. - self.need_loss_update_factor = 1.1 + self._recompute_losses_factor = 1.1 # As an optimization we keep a map from subdomain to loss. # This is updated in 'self.priority' whenever the loss function is evaluated @@ -356,9 +356,9 @@ def _update_codomain_bounds(self, ys): # We need to recompute all losses if the scale has increased by more # than a certain factor since the last time we recomputed all the losses if self.vdim == 1: - need_loss_update = scale_factor > self.need_loss_update_factor + need_loss_update = scale_factor > self._recompute_losses_factor else: - need_loss_update = np.any(scale_factor > self.need_loss_update_factor) + need_loss_update = np.any(scale_factor > self._recompute_losses_factor) if need_loss_update: self.codomain_scale_at_last_update = scale From cc1f83fbfabe62c31dd7c5fcc8c7019967361421 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 21:37:23 +0200 Subject: [PATCH 067/105] correct boundary point choosing logic --- adaptive/learner/new_learnerND.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index d910e7b5b..68e2e203d 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -142,10 +142,6 @@ def __init__(self, f, bounds, loss=None): self.pending_points = set() self.function = f - # We keep a running total of the number of points that were asked for. - # This is used in 'ask' to detect if we should return the boundary points. - self.n_asked = 0 - # The loss function may depend on the "scale" (i.e. the difference between # the maximum and the minimum) of the function values, in addition to the # function values themselves. In order to take into account this "global" @@ -232,11 +228,17 @@ def priority(self, subdomain): return (max(subvolumes) / sum(subvolumes)) * L_0 def ask(self, n, tell_pending=True): - if self.n_asked >= len(self.boundary_points): + if self._initialized: points, losses = self._ask(n, tell_pending) else: - points = self.boundary_points[self.n_asked : self.n_asked + n] - # The boundary points should always be evaluated with the highest priority + # Give priority to boundary points, but don't include points that + # we have data for or have already asked for. + points = [ + x + for x in self.boundary_points + if x not in self.data and x not in self.pending_points + ] + # infinite loss so that the boundary points are prioritized losses = [math.inf] * len(points) if tell_pending: for x in points: @@ -247,9 +249,6 @@ def ask(self, n, tell_pending=True): points += tuple(extra_points) losses += tuple(extra_losses) - if tell_pending: - self.n_asked += n - return points, losses def _ask(self, n, tell_pending): From 19ab936169149731babb1b691b11e7fee2b8b7fd Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 21:37:41 +0200 Subject: [PATCH 068/105] make point adding idempotent in learner --- adaptive/learner/new_learnerND.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 68e2e203d..52b4a272d 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -288,6 +288,11 @@ def tell_pending(self, x): self.queue.update(subdomain, priority=self.priority(subdomain)) def tell_many(self, xs, ys): + # Filter out points that are already present + if all(x in self.data for x in xs): + return + xs, ys = zip(*((x, y) for x, y in zip(xs, ys) if x not in self.data)) + self.data.update(zip(xs, ys)) self.pending_points -= set(xs) From a0bea12a01f4306ea47a7806686563a69375d63a Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:09:54 +0200 Subject: [PATCH 069/105] fix bugs in 1D domain --- adaptive/domain.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index 43f738576..71df7c50e 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -179,7 +179,7 @@ def insert(self, x, *, _check_membership=True): i = p.bisect_left(x) if p[i] == x: raise ValueError("{} exists in this interval already".format(x)) - subdomain = (p[i - 1], p[i]) + subdomain = (a, b) = p[i - 1], p[i] try: p = self.sub_intervals[subdomain] @@ -210,6 +210,8 @@ def remove(self, x, *, _check_membership=True): raise ValueError("{} not in any subdomain".format(x)) else: sub_points.remove(x) + if len(sub_points) == 2: + del self.sub_intervals[subdomain] return [subdomain] def split_at(self, x, *, _check_membership=True): @@ -588,6 +590,8 @@ def remove(self, x): # Rebuild the subtriangulation from scratch self.sub_triangulations[subdomain] = _make_new_subtriangulation(points) + return affected_subdomains + def split_at(self, x, *, _check_membership=True): x = tuple(x) tri = self.triangulation From 1a8408912b53d0473fcb90c7bc12ebd4ee496f6d Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:10:11 +0200 Subject: [PATCH 070/105] fix bug in tell_pending --- adaptive/learner/new_learnerND.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 52b4a272d..061dea840 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -282,7 +282,16 @@ def _ask(self, n, tell_pending): return new_points, point_priorities def tell_pending(self, x): + if x in self.data: + raise ValueError("Data already present for point {}".format(x)) + self.pending_points.add(x) + + # We cannot 'insert' a boundary point into the domain because it already + # exists as a vertex. This does not affect the queue ordering. + if not self._initialized and x in self.boundary_points: + return + affected_subdomains = self.domain.insert(x) for subdomain in affected_subdomains: self.queue.update(subdomain, priority=self.priority(subdomain)) From bf643624713fe388c84df91f43236cc13d7eb9d0 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:16:15 +0200 Subject: [PATCH 071/105] catch edge case in loss --- adaptive/learner/new_learnerND.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 061dea840..3aa81caf8 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -388,6 +388,8 @@ def remove_unfinished(self): def loss(self, real=True): if real: + if not self.losses: + return math.inf # NOTE: O(N) in the number of subintervals, but with a low prefactor. # We have to do this because the queue is sorted in *priority* # order, and it's possible that a subinterval with a high loss From f17117349f60d44cd28d42a9956efa2d6a55fd07 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:16:26 +0200 Subject: [PATCH 072/105] implement data saving --- adaptive/learner/new_learnerND.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 3aa81caf8..3b17f3adc 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -487,7 +487,8 @@ def _plot_1d(self): return p.redim(x=dict(range=plot_bounds)) def _get_data(self): - pass + return self.data def _set_data(self, data): - pass + if data: + self.tell_many(*zip(*data.items())) From 6111a7c38d0ed90a9384d5f28ae8d0ea3ac9fbfa Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:21:19 +0200 Subject: [PATCH 073/105] explicitly set inf --- adaptive/learner/new_learnerND.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 3b17f3adc..f65f83521 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -364,7 +364,10 @@ def _update_codomain_bounds(self, ys): scale = mx - mn # How much has the scale of the outputs changed since the last time # we recomputed the losses? - scale_factor = scale / self.codomain_scale_at_last_update + if self.codomain_scale_at_last_upate == 0: + scale_factor = math.inf + else: + scale_factor = scale / self.codomain_scale_at_last_update # We need to recompute all losses if the scale has increased by more # than a certain factor since the last time we recomputed all the losses From 4ede935e79596bee2b0a008260b6bbe73c0f6c1e Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:35:41 +0200 Subject: [PATCH 074/105] add new learner to some tests (those that it passes) --- adaptive/learner/new_learnerND.py | 2 +- adaptive/tests/test_learners.py | 26 +++++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index f65f83521..498d36fa6 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -364,7 +364,7 @@ def _update_codomain_bounds(self, ys): scale = mx - mn # How much has the scale of the outputs changed since the last time # we recomputed the losses? - if self.codomain_scale_at_last_upate == 0: + if self.codomain_scale_at_last_update == 0: scale_factor = math.inf else: scale_factor = scale / self.codomain_scale_at_last_update diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py index bf84fcc14..fc405cfe0 100644 --- a/adaptive/tests/test_learners.py +++ b/adaptive/tests/test_learners.py @@ -26,6 +26,7 @@ LearnerND, SequenceLearner, ) +from adaptive.learner.new_learnerND import LearnerND as NewLearnerND from adaptive.runner import simple try: @@ -112,11 +113,13 @@ def maybe_skip(learner): @learn_with(Learner1D, bounds=(-1, 1)) +@learn_with(NewLearnerND, bounds=[(-1, 1)]) def quadratic(x, m: uniform(0, 10), b: uniform(0, 1)): return m * x ** 2 + b @learn_with(Learner1D, bounds=(-1, 1)) +@learn_with(NewLearnerND, bounds=[(-1, 1)]) @learn_with(SequenceLearner, sequence=np.linspace(-1, 1, 201)) def linear_with_peak(x, d: uniform(-1, 1)): a = 0.01 @@ -124,6 +127,7 @@ def linear_with_peak(x, d: uniform(-1, 1)): @learn_with(LearnerND, bounds=((-1, 1), (-1, 1))) +@learn_with(NewLearnerND, bounds=((-1, 1), (-1, 1))) @learn_with(Learner2D, bounds=((-1, 1), (-1, 1))) @learn_with(SequenceLearner, sequence=np.random.rand(1000, 2)) def ring_of_fire(xy, d: uniform(0.2, 1)): @@ -133,6 +137,7 @@ def ring_of_fire(xy, d: uniform(0.2, 1)): @learn_with(LearnerND, bounds=((-1, 1), (-1, 1), (-1, 1))) +@learn_with(NewLearnerND, bounds=((-1, 1), (-1, 1), (-1, 1))) @learn_with(SequenceLearner, sequence=np.random.rand(1000, 3)) def sphere_of_fire(xyz, d: uniform(0.2, 1)): a = 0.2 @@ -242,6 +247,7 @@ def test_uniform_sampling2D(learner_type, f, learner_kwargs): (Learner1D, (-1, 1)), (Learner2D, [(-1, 1), (-1, 1)]), (LearnerND, [(-1, 1), (-1, 1), (-1, 1)]), + (NewLearnerND, [(-1, 1), (-1, 1), (-1, 1)]), ], ) def test_learner_accepts_lists(learner_type, bounds): @@ -252,7 +258,7 @@ def f(x): simple(learner, goal=lambda l: l.npoints > 10) -@run_with(Learner1D, Learner2D, LearnerND, SequenceLearner) +@run_with(Learner1D, Learner2D, LearnerND, NewLearnerND, SequenceLearner) def test_adding_existing_data_is_idempotent(learner_type, f, learner_kwargs): """Adding already existing data is an idempotent operation. @@ -299,7 +305,14 @@ def test_adding_existing_data_is_idempotent(learner_type, f, learner_kwargs): # XXX: This *should* pass (https://github.com/python-adaptive/adaptive/issues/55) # but we xfail it now, as Learner2D will be deprecated anyway -@run_with(Learner1D, xfail(Learner2D), LearnerND, AverageLearner, SequenceLearner) +@run_with( + Learner1D, + xfail(Learner2D), + LearnerND, + NewLearnerND, + AverageLearner, + SequenceLearner, +) def test_adding_non_chosen_data(learner_type, f, learner_kwargs): """Adding data for a point that was not returned by 'ask'.""" # XXX: learner, control and bounds are not defined @@ -383,7 +396,7 @@ def test_point_adding_order_is_irrelevant(learner_type, f, learner_kwargs): # XXX: the Learner2D fails with ~50% chance # see https://github.com/python-adaptive/adaptive/issues/55 -@run_with(Learner1D, xfail(Learner2D), LearnerND, AverageLearner) +@run_with(Learner1D, xfail(Learner2D), LearnerND, NewLearnerND, AverageLearner) def test_expected_loss_improvement_is_less_than_total_loss( learner_type, f, learner_kwargs ): @@ -460,6 +473,7 @@ def test_learner_performance_is_invariant_under_scaling( Learner1D, Learner2D, LearnerND, + NewLearnerND, AverageLearner, SequenceLearner, with_all_loss_functions=False, @@ -504,6 +518,7 @@ def test_balancing_learner(learner_type, f, learner_kwargs): Learner1D, Learner2D, LearnerND, + NewLearnerND, AverageLearner, maybe_skip(SKOptLearner), IntegratorLearner, @@ -535,6 +550,7 @@ def test_saving(learner_type, f, learner_kwargs): Learner1D, Learner2D, LearnerND, + NewLearnerND, AverageLearner, maybe_skip(SKOptLearner), IntegratorLearner, @@ -606,7 +622,7 @@ def test_saving_with_datasaver(learner_type, f, learner_kwargs): @pytest.mark.xfail -@run_with(Learner1D, Learner2D, LearnerND) +@run_with(Learner1D, Learner2D, LearnerND, NewLearnerND) def test_convergence_for_arbitrary_ordering(learner_type, f, learner_kwargs): """Learners that are learning the same function should converge to the same result "eventually" if given the same data, regardless @@ -618,7 +634,7 @@ def test_convergence_for_arbitrary_ordering(learner_type, f, learner_kwargs): @pytest.mark.xfail -@run_with(Learner1D, Learner2D, LearnerND) +@run_with(Learner1D, Learner2D, LearnerND, NewLearnerND) def test_learner_subdomain(learner_type, f, learner_kwargs): """Learners that never receive data outside of a subdomain should perform 'similarly' to learners defined on that subdomain only.""" From 6908ec28e7ab39abbb26046333644ea8cd5ba716 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:36:00 +0200 Subject: [PATCH 075/105] add xfailing tests for new learner This will be rectified in future commits --- adaptive/tests/test_learners.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py index fc405cfe0..6e037d7bc 100644 --- a/adaptive/tests/test_learners.py +++ b/adaptive/tests/test_learners.py @@ -422,7 +422,7 @@ def test_expected_loss_improvement_is_less_than_total_loss( # XXX: This *should* pass (https://github.com/python-adaptive/adaptive/issues/55) # but we xfail it now, as Learner2D will be deprecated anyway -@run_with(Learner1D, xfail(Learner2D), LearnerND) +@run_with(Learner1D, xfail(Learner2D), LearnerND, xfail(NewLearnerND)) def test_learner_performance_is_invariant_under_scaling( learner_type, f, learner_kwargs ): @@ -621,6 +621,25 @@ def test_saving_with_datasaver(learner_type, f, learner_kwargs): os.remove(path) +@run_with(Learner1D, Learner2D, LearnerND, xfail(NewLearnerND)) +def test_adding_data_outside_of_bounds(learner_type, f, learner_kwargs): + # Just test this does not throw an error for now + f = generate_random_parametrization(f) + learner = learner_type(f, **learner_kwargs) + + points, _ = learner.ask(20) + learner.tell_many(points, [learner.function(x) for x in points]) + + points, _ = learner.ask(10) + points = 1e5 * np.asarray( + points + ) # outside the bounds for all the test functions we have + if len(points.shape) > 1: + points = [tuple(x) for x in points] + + learner.tell_many(points, [learner.function(x) for x in points]) + + @pytest.mark.xfail @run_with(Learner1D, Learner2D, LearnerND, NewLearnerND) def test_convergence_for_arbitrary_ordering(learner_type, f, learner_kwargs): From 484cebff1376c253dcd7e280bbc69039fb49195f Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:40:35 +0200 Subject: [PATCH 076/105] correctly set inf --- adaptive/learner/new_learnerND.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 498d36fa6..f148bad00 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -364,7 +364,7 @@ def _update_codomain_bounds(self, ys): scale = mx - mn # How much has the scale of the outputs changed since the last time # we recomputed the losses? - if self.codomain_scale_at_last_update == 0: + if np.any(self.codomain_scale_at_last_update == 0): scale_factor = math.inf else: scale_factor = scale / self.codomain_scale_at_last_update From 460bd2e8931b83e9d6efce62e1b94e74b44bdc85 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 22:41:35 +0200 Subject: [PATCH 077/105] replace Domain.__contains__ with Domain.contains_subdomain --- adaptive/domain.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index 71df7c50e..28d264214 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -148,7 +148,7 @@ def __init__(self, a, b): def insert_points(self, subdomain, n, *, _check_membership=True): if n <= 0: raise ValueError("n must be positive") - if _check_membership and subdomain not in self: + if _check_membership and not self.contains_subdomain(subdomain): raise ValueError("{} is not present in this interval".format(subdomain)) try: p = self.sub_intervals[subdomain] @@ -264,7 +264,7 @@ def which_subdomains(self, x): neighbors.append((p[i], p[i + 1])) return neighbors - def __contains__(self, subdomain): + def contains_subdomain(self, subdomain): a, b = subdomain try: ia = self.points.index(a) @@ -293,7 +293,7 @@ def subdomains(self): return zip(p, p.islice(1)) def subpoints(self, subdomain, *, _check_membership=True): - if _check_membership and subdomain not in self: + if _check_membership and self.contains_subdomain(subdomain): raise ValueError("{} is not present in this interval".format(subdomain)) try: p = self.sub_intervals[subdomain] @@ -660,7 +660,7 @@ def which_subdomains(self, x): raise ValueError("{} is not in the domain".format(x)) return list(subdomains) - def __contains__(self, subdomain): + def contains_subdomain(self, subdomain): return subdomain in self.triangulation.simplices def transform(self, x): @@ -683,7 +683,7 @@ def vertices(self): return self.triangulation.vertices def subpoints(self, subdomain, *, _check_membership=True): - if _check_membership and subdomain not in self: + if _check_membership and not self.contains_subdomain(subdomain): raise ValueError("{} is not present in this domain".format(subdomain)) try: subtri = self.sub_triangulations[subdomain] From 929a9fdababc4cbefa78190c482a6656173a8641 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 23:12:10 +0200 Subject: [PATCH 078/105] correct implementation of points outside domain --- adaptive/tests/domain_utils.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/adaptive/tests/domain_utils.py b/adaptive/tests/domain_utils.py index 2dbf7e7d6..4d20af0c8 100644 --- a/adaptive/tests/domain_utils.py +++ b/adaptive/tests/domain_utils.py @@ -88,13 +88,13 @@ def a_few_points_inside(draw, domain): @st.composite -def point_outside(draw, domain): +def points_outside(draw, domain, n): kwargs = dict(allow_nan=False, allow_infinity=False) if isinstance(domain, Interval): a, b = domain.bounds length = b - a - before_domain = st.floats(a - 10 * length, a, **kwargs) - after_domain = st.floats(b, b + 10 * length, **kwargs) + before_domain = st.floats(a - 10 * length, a, exclude_max=True, **kwargs) + after_domain = st.floats(b, b + 10 * length, exclude_min=True, **kwargs) x = before_domain | after_domain else: assert isinstance(domain, ConvexHull) @@ -104,14 +104,20 @@ def point_outside(draw, domain): x = st.tuples( *[ ( - st.floats(min_value=a - 10 * (b - a), max_value=a, **kwargs) - | st.floats(min_value=b, max_value=b + 10 * (b - a), **kwargs) + st.floats(a - 10 * (b - a), a, exclude_max=True, **kwargs) + | st.floats(b, b + 10 * (b - a), exclude_min=True, **kwargs) ) for a, b in zip(points.min(axis=0), points.max(axis=0)) ] ) - return draw(x) + xs = st.tuples(*[x] * n).filter(unique_vectors) + return draw(xs) + + +@st.composite +def point_outside(draw, domain): + return draw(points_outside(domain, 1))[0] @st.composite From 369637920413050233603fc78d0e1d499ace5ca1 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sat, 12 Oct 2019 23:12:28 +0200 Subject: [PATCH 079/105] implement method 'Domain.encloses' for finding points inside a domain --- adaptive/domain.py | 30 ++++++++++++++++++++++++++++++ adaptive/tests/test_domain.py | 22 +++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index 28d264214..7bb77e199 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -87,6 +87,21 @@ def which_subdomains(self, x): ValueError : if x is outside of the domain """ + @abc.abstractmethod + def encloses(self, points): + """Return whether the domain encloses the points + + Parameters + ---------- + points : a point or sequence of points + + Returns + ------- + Boolean (if a single point was provided) or an array of booleans + if a sequence of points was provided) that is True when + the domain encloses the point. + """ + @abc.abstractmethod def vertices(self): """Returns the vertices of the domain.""" @@ -276,6 +291,14 @@ def contains_subdomain(self, subdomain): def vertices(self): return self.points + def encloses(self, points): + a, b = self.bounds + points = np.asarray(points) + if points.shape == (): # single point + return a <= points <= b + else: + return np.logical_and(a <= points, points <= b) + def neighbors(self, subdomain, n=1): a, b = subdomain p = self.points @@ -679,6 +702,13 @@ def neighbors(self, subdomain, n=1): def subdomains(self): return self.triangulation.simplices + def encloses(self, points): + points = np.asarray(points).T + A, b = self.bounds.equations[:, :-1], self.bounds.equations[:, -1:] + if len(points.shape) == 1: + points = points[:, None] + return np.all(A @ points + b <= 0, axis=0) + def vertices(self): return self.triangulation.vertices diff --git a/adaptive/tests/test_domain.py b/adaptive/tests/test_domain.py index f012ca073..abf7322ec 100644 --- a/adaptive/tests/test_domain.py +++ b/adaptive/tests/test_domain.py @@ -6,8 +6,10 @@ a_few_points_inside, make_hypercube_domain, point_inside, - point_outside, point_on_shared_face, + point_outside, + points_inside, + points_outside, ) from hypothesis import given, settings @@ -66,6 +68,24 @@ def test_insert_points_outside_domain_raises(data, ndim): domain.insert(x) +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_encloses(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + + xin = data.draw(point_inside(domain)) + assert domain.encloses(xin) + + xout = data.draw(point_outside(domain)) + assert not domain.encloses(xout) + + xins = data.draw(points_inside(domain, 20)) + assert np.all(domain.encloses(xins)) + + xouts = data.draw(points_outside(domain, 20)) + assert not np.any(domain.encloses(xouts)) + + @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_split_at_point_outside_domain_raises(data, ndim): From 7ff9372b7b83c954190423112895b9329c660b0a Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 09:37:49 +0200 Subject: [PATCH 080/105] allow NewLearnerND to accept points not in the domain These are completely ignored in the learning algorithm --- adaptive/learner/new_learnerND.py | 22 +++++++++++++++++----- adaptive/tests/test_learners.py | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index f148bad00..e4ff11aa0 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -190,11 +190,15 @@ def _finalize_initialization(self): except TypeError: # Trying to take the length of a number self.vdim = 1 - # Generate new subdomains using any evaluated points - for x in self.data: - if x in self.boundary_points: - continue - self.domain.split_at(x) + # Generate new subdomains using any evaluated points, skipping the boundary + # points (these are already vertices in the domain) and discarding any points + # that are outside the domain. + xs = list(x for x in self.data.keys() if x not in self.boundary_points) + if xs: + xs = np.array(xs) + xs = xs[self.domain.encloses(xs)] + for x in xs: + self.domain.split_at(x) # Recompute all the losses from scratch self.queue = Queue() @@ -310,6 +314,14 @@ def tell_many(self, xs, ys): self._finalize_initialization() return + # Filter out any points that are outside the domain. These still appear in + # 'self.data', but they are not added to the domain, and so have no effect + # on the learning. + are_inside = self.domain.encloses(xs) + if not np.any(are_inside): + return + xs, ys = zip(*((x, y) for x, y, inside in zip(xs, ys, are_inside) if inside)) + need_loss_update = self._update_codomain_bounds(ys) old = set() diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py index 6e037d7bc..4546f2cb6 100644 --- a/adaptive/tests/test_learners.py +++ b/adaptive/tests/test_learners.py @@ -621,7 +621,7 @@ def test_saving_with_datasaver(learner_type, f, learner_kwargs): os.remove(path) -@run_with(Learner1D, Learner2D, LearnerND, xfail(NewLearnerND)) +@run_with(Learner1D, Learner2D, LearnerND, NewLearnerND) def test_adding_data_outside_of_bounds(learner_type, f, learner_kwargs): # Just test this does not throw an error for now f = generate_random_parametrization(f) From 3979872f7d02706d0a14c0c81710293ac94b3437 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 09:38:52 +0200 Subject: [PATCH 081/105] make sure we can ask for more points after adding data outside the interval --- adaptive/tests/test_learners.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py index 4546f2cb6..3351dd647 100644 --- a/adaptive/tests/test_learners.py +++ b/adaptive/tests/test_learners.py @@ -639,6 +639,8 @@ def test_adding_data_outside_of_bounds(learner_type, f, learner_kwargs): learner.tell_many(points, [learner.function(x) for x in points]) + learner.ask(10) + @pytest.mark.xfail @run_with(Learner1D, Learner2D, LearnerND, NewLearnerND) From bb38de353fef1ccdc89a01c9c89a2e3f77a636ec Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 10:29:27 +0200 Subject: [PATCH 082/105] add new learner to datasaving test --- adaptive/tests/test_learners.py | 1 + 1 file changed, 1 insertion(+) diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py index 3351dd647..43bca0fd3 100644 --- a/adaptive/tests/test_learners.py +++ b/adaptive/tests/test_learners.py @@ -589,6 +589,7 @@ def fname(learner): Learner1D, Learner2D, LearnerND, + NewLearnerND, AverageLearner, maybe_skip(SKOptLearner), IntegratorLearner, From 60e0a0cca1d2fd828b13cbddfd07429ad2e63034 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 10:53:14 +0200 Subject: [PATCH 083/105] correct conditional --- adaptive/domain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index 7bb77e199..c31577594 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -316,7 +316,7 @@ def subdomains(self): return zip(p, p.islice(1)) def subpoints(self, subdomain, *, _check_membership=True): - if _check_membership and self.contains_subdomain(subdomain): + if _check_membership and not self.contains_subdomain(subdomain): raise ValueError("{} is not present in this interval".format(subdomain)) try: p = self.sub_intervals[subdomain] From ceaa87d6a4c41e017d782d86ff9429a5e99b1041 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 10:53:41 +0200 Subject: [PATCH 084/105] make 'points_inside' select points from more than 1 subdomain Previously all selected points would be from a single subdomain. --- adaptive/tests/domain_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/adaptive/tests/domain_utils.py b/adaptive/tests/domain_utils.py index 4d20af0c8..5e6b878d7 100644 --- a/adaptive/tests/domain_utils.py +++ b/adaptive/tests/domain_utils.py @@ -38,6 +38,7 @@ def unique_vectors(xs): @st.composite def point_inside_simplex(draw, simplex): + simplex = draw(simplex) simplex = np.asarray(simplex) dim = simplex.shape[1] # Set the numpy random seed @@ -68,9 +69,10 @@ def points_inside(draw, domain, n): assert isinstance(domain, ConvexHull) tri = domain.triangulation simplices = list(tri.simplices) - simplex = draw(st.sampled_from(simplices)) - vertices = [tri.vertices[s] for s in simplex] - x = point_inside_simplex(vertices) + simplex = st.sampled_from(simplices).map( + lambda simplex: [tri.vertices[s] for s in simplex] + ) + x = point_inside_simplex(simplex) xs = st.tuples(*[x] * n).filter(unique_vectors) return draw(xs) From a13711ae948e337895f998f3bbeeb9b05495baec Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 10:58:42 +0200 Subject: [PATCH 085/105] correct name of domain test --- adaptive/tests/test_domain.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/adaptive/tests/test_domain.py b/adaptive/tests/test_domain.py index abf7322ec..5ef4d4dfc 100644 --- a/adaptive/tests/test_domain.py +++ b/adaptive/tests/test_domain.py @@ -88,13 +88,22 @@ def test_encloses(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) -def test_split_at_point_outside_domain_raises(data, ndim): +def test_insert_point_outside_domain_raises(data, ndim): domain = data.draw(make_hypercube_domain(ndim)) x = data.draw(point_outside(domain)) with pytest.raises(ValueError): domain.insert(x) +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_split_at_point_outside_domain_raises(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x = data.draw(point_outside(domain)) + with pytest.raises(ValueError): + domain.split_at(x) + + @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_removing_domain_vertex_raises(data, ndim): From fbbe44ac3474cf7f9488a783de590a72522e93ce Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 11:06:30 +0200 Subject: [PATCH 086/105] rename point loss to point priority --- adaptive/learner/new_learnerND.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index e4ff11aa0..b11ba16f9 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -233,7 +233,7 @@ def priority(self, subdomain): def ask(self, n, tell_pending=True): if self._initialized: - points, losses = self._ask(n, tell_pending) + points, point_priorities = self._ask(n, tell_pending) else: # Give priority to boundary points, but don't include points that # we have data for or have already asked for. @@ -242,18 +242,18 @@ def ask(self, n, tell_pending=True): for x in self.boundary_points if x not in self.data and x not in self.pending_points ] - # infinite loss so that the boundary points are prioritized - losses = [math.inf] * len(points) + # Infinite priority so that the boundary points are prioritized + point_priorities = [math.inf] * len(points) if tell_pending: for x in points: self.pending_points.add(x) n_extra = n - len(points) if n_extra > 0: - extra_points, extra_losses = self._ask(n_extra, tell_pending) + extra_points, extra_point_priorities = self._ask(n_extra, tell_pending) points += tuple(extra_points) - losses += tuple(extra_losses) + point_priorities += tuple(extra_point_priorities) - return points, losses + return points, point_priorities def _ask(self, n, tell_pending): new_points = [] From d227e1bf70ec56f8fe79877ee80c08871c1eba73 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 11:08:56 +0200 Subject: [PATCH 087/105] do 1 operation per line only --- adaptive/learner/new_learnerND.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index b11ba16f9..ca7abba2d 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -279,7 +279,8 @@ def _ask(self, n, tell_pending): affected_subdomains = set() for point in new_points: self.pending_points.remove(point) - affected_subdomains.update(self.domain.remove(point)) + sd = self.domain.remove(point) + affected_subdomains.update(sd) for subdomain in affected_subdomains: self.queue.update(subdomain, priority=self.priority(subdomain)) From 9f8774a915c4f640df4ff037f53764879c0e2664 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 17:07:43 +0200 Subject: [PATCH 088/105] correct docstring --- adaptive/domain.py | 1 - 1 file changed, 1 deletion(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index c31577594..e3a740e0c 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -476,7 +476,6 @@ def _on_which_boundary(equations, x, eps=1e-8): >>> eq = _boundary_equations(simplex) >>> x = [0.5, 0.] >>> _on_which_boundary(eq, x) == (0, 1) - >>> assert boundary == (0, 1) >>> x = [2., 0.] >>> _on_which_boundary(eq, x) == (1,) """ From 6bdbe88780bbaae9f8f18da7a8137fed2cb91cc5 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 17:32:26 +0200 Subject: [PATCH 089/105] correct test that all internal points are reassigned when splitting --- adaptive/tests/test_domain.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/adaptive/tests/test_domain.py b/adaptive/tests/test_domain.py index 5ef4d4dfc..cc737672f 100644 --- a/adaptive/tests/test_domain.py +++ b/adaptive/tests/test_domain.py @@ -189,10 +189,28 @@ def test_clear_subdomains_removes_all_points(data, ndim): assert 0 == sum(len(domain.subpoints(s)) for s in domain.subdomains()) -### Interval tests +@pytest.mark.parametrize("ndim", [1, 2, 3]) +@given(data=st.data()) +def test_split_at_reassigns_all_internal_points(data, ndim): + domain = data.draw(make_hypercube_domain(ndim)) + x_split, *xs = data.draw(a_few_points_inside(domain)) + + for x in xs: + domain.insert(x) + + # The subdomains where the points were assigned initially + subpoints = {s: set(domain.subpoints(s)) for s in domain.subdomains()} + # Sanity check; all the inserted points are in *some* subdomain + assert set.union(*subpoints.values()) == set(xs) + + old_subdomains, new_subdomains = domain.split_at(x_split) + + old_subpoints = set.union(*(subpoints[s] for s in old_subdomains)) + new_subpoints = set.union(*(set(domain.subpoints(s)) for s in new_subdomains)) + assert old_subpoints == new_subpoints -### ConvexHull tests +### ConvexHull-specific tests @pytest.mark.parametrize("ndim", [2, 3]) @@ -206,16 +224,3 @@ def test_inserting_point_on_boundary_adds_to_all_subtriangulations(data, ndim): x = data.draw(point_on_shared_face(domain, 1)) affected_subdomains = domain.insert(x) assert all(x in set(domain.subpoints(s)) for s in affected_subdomains) - - -@pytest.mark.parametrize("ndim", [2, 3]) -@given(data=st.data()) -def test_split_at_reassigns_all_internal_points(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) - xs = data.draw(a_few_points_inside(domain)) - - for x in xs: - domain.insert(x) - _, new_subdomains = domain.split_at(xs[0]) - subpoints = set.union(*(set(domain.subpoints(s)) for s in new_subdomains)) - assert set(xs[1:]) == subpoints From 32518d1e87c437cdf4bd7c1376318f16264d8b20 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 17:34:26 +0200 Subject: [PATCH 090/105] run all domain tests over random domains, rather than hypercubes --- adaptive/tests/test_domain.py | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/adaptive/tests/test_domain.py b/adaptive/tests/test_domain.py index cc737672f..a2435f36a 100644 --- a/adaptive/tests/test_domain.py +++ b/adaptive/tests/test_domain.py @@ -4,7 +4,7 @@ import pytest from adaptive.tests.domain_utils import ( a_few_points_inside, - make_hypercube_domain, + make_random_domain, point_inside, point_on_shared_face, point_outside, @@ -18,7 +18,7 @@ @given(data=st.data()) @settings(deadline=500) def test_getting_points_are_unique(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) points = [] for subdomain in domain.subdomains(): p, _ = domain.insert_points(subdomain, 10) @@ -30,7 +30,7 @@ def test_getting_points_are_unique(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_sum_subvolumes_equals_volume(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) for x in xs: @@ -42,7 +42,7 @@ def test_sum_subvolumes_equals_volume(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_split_at_vertex_raises(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x = data.draw(point_inside(domain)) domain.split_at(x) with pytest.raises(ValueError): @@ -52,7 +52,7 @@ def test_split_at_vertex_raises(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_inserting_point_twice_raises(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x = data.draw(point_inside(domain)) domain.insert(x) with pytest.raises(ValueError): @@ -62,7 +62,7 @@ def test_inserting_point_twice_raises(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_insert_points_outside_domain_raises(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x = data.draw(point_outside(domain)) with pytest.raises(ValueError): domain.insert(x) @@ -71,7 +71,7 @@ def test_insert_points_outside_domain_raises(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_encloses(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xin = data.draw(point_inside(domain)) assert domain.encloses(xin) @@ -89,7 +89,7 @@ def test_encloses(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_insert_point_outside_domain_raises(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x = data.draw(point_outside(domain)) with pytest.raises(ValueError): domain.insert(x) @@ -98,7 +98,7 @@ def test_insert_point_outside_domain_raises(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_split_at_point_outside_domain_raises(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x = data.draw(point_outside(domain)) with pytest.raises(ValueError): domain.split_at(x) @@ -107,7 +107,7 @@ def test_split_at_point_outside_domain_raises(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_removing_domain_vertex_raises(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x = data.draw(point_inside(domain)) domain.split_at(x) with pytest.raises(ValueError): @@ -117,7 +117,7 @@ def test_removing_domain_vertex_raises(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_removing_nonexistant_point_raises(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x = data.draw(point_inside(domain)) with pytest.raises(ValueError): domain.remove(x) @@ -126,7 +126,7 @@ def test_removing_nonexistant_point_raises(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_splitting_at_point_adds_to_vertices(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) for x in xs: @@ -138,7 +138,7 @@ def test_splitting_at_point_adds_to_vertices(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_inserting_points_adds_to_subpoints(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) subdomains = dict() @@ -152,7 +152,7 @@ def test_inserting_points_adds_to_subpoints(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_inserting_then_removing_points_removes_from_subpoints(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) for x in xs: @@ -166,7 +166,7 @@ def test_inserting_then_removing_points_removes_from_subpoints(data, ndim): @given(data=st.data()) @settings(deadline=500) def test_inserting_then_splitting_at_points_removes_from_subpoints(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) for x in xs: @@ -179,7 +179,7 @@ def test_inserting_then_splitting_at_points_removes_from_subpoints(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_clear_subdomains_removes_all_points(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) for x in xs: @@ -192,7 +192,7 @@ def test_clear_subdomains_removes_all_points(data, ndim): @pytest.mark.parametrize("ndim", [1, 2, 3]) @given(data=st.data()) def test_split_at_reassigns_all_internal_points(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) x_split, *xs = data.draw(a_few_points_inside(domain)) for x in xs: @@ -216,7 +216,7 @@ def test_split_at_reassigns_all_internal_points(data, ndim): @pytest.mark.parametrize("ndim", [2, 3]) @given(data=st.data()) def test_inserting_point_on_boundary_adds_to_all_subtriangulations(data, ndim): - domain = data.draw(make_hypercube_domain(ndim)) + domain = data.draw(make_random_domain(ndim)) xs = data.draw(a_few_points_inside(domain)) for x in xs: From 5d3434db2a2f2a41059161fd5b8084343054cfca Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 17:48:28 +0200 Subject: [PATCH 091/105] update priority queue docstring --- adaptive/priority_queue.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/adaptive/priority_queue.py b/adaptive/priority_queue.py index 3466a6c2a..9e70a213c 100644 --- a/adaptive/priority_queue.py +++ b/adaptive/priority_queue.py @@ -41,13 +41,23 @@ def items(self): return reversed(self._queue.values()) def peek(self): - "Return the item and priority at the front of the queue." + """Return the item and priority at the front of the queue. + + Raises + ------ + Empty : if the queue is empty + """ self._check_nonempty() ((priority, _), item) = self._queue.peekitem() return item, priority def pop(self): - "Remove and return the item and priority at the front of the queue." + """Remove and return the item and priority at the front of the queue. + + Raises + ------ + Empty : if the queue is empty + """ self._check_nonempty() (key, item) = self._queue.popitem() i = self._items.index((item, key)) @@ -78,7 +88,12 @@ def _find_first(self, item): return i, key def remove(self, item): - "Remove the 'item' from the queue." + """Remove the 'item' from the queue. + + Raises + ------ + KeyError : if 'item' is not in the queue. + """ i, key = self._find_first(item) del self._queue[key] del self._items[i] From 6efc2507ef131b277c4b3cd60bda40af22e8ef92 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Sun, 13 Oct 2019 17:48:46 +0200 Subject: [PATCH 092/105] neater ordering --- adaptive/priority_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/priority_queue.py b/adaptive/priority_queue.py index 9e70a213c..8da1b748c 100644 --- a/adaptive/priority_queue.py +++ b/adaptive/priority_queue.py @@ -110,6 +110,6 @@ def update(self, item, priority): new_key = (priority, n) del self._queue[key] - self._queue[new_key] = item del self._items[i] + self._queue[new_key] = item self._items.add((item, new_key)) From 06003cb85ee7d3479549efe5e6c91e475ea168a4 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Mon, 14 Oct 2019 12:16:07 +0200 Subject: [PATCH 093/105] add clarifying comment about adding points to neighboring subdomains --- adaptive/domain.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index e3a740e0c..358c21987 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -565,7 +565,9 @@ def insert_points(self, subdomain, n, *, _check_membership=True): subtri.add_point(point, largest_simplex) self.subpoints_to_subdomains[point].add(subdomain) # If the point was added to a boundary of the subdomain we should - # add it to the neighboring subdomains. + # add it to the neighboring subdomains. If we do not do this, then + # if 'insert_points' is called for the neighboring subdomains, it is + # possible that 'point' may be returned, which is inconsistent. boundary = subtri.on_which_boundary(point) if boundary is not None: # Convert subtriangulation indices to triangulation indices From b4a48145370a08650dd57ce82d13b338046b9fd1 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Mon, 14 Oct 2019 12:21:31 +0200 Subject: [PATCH 094/105] add shortcuts when creating triangulations with exactly 1 simplex This case is very common when inserting 1 point and then splitting at the same point. This change gives a ~25% speedup to the new LearnerND. --- adaptive/domain.py | 3 ++- adaptive/learner/triangulation.py | 29 +++++++++++++++++------------ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index 358c21987..f3aa66061 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -496,7 +496,8 @@ def _make_new_subtriangulation(points): points = np.asarray(points) ndim = points.shape[1] boundary_points = points[: ndim + 1] - subtri = Triangulation(points) + # _check_vertices=False to speed up the initial triangulation + subtri = Triangulation(points, _check_vertices=False) subtri.on_which_boundary = functools.partial( _on_which_boundary, _boundary_equations(boundary_points) ) diff --git a/adaptive/learner/triangulation.py b/adaptive/learner/triangulation.py index bb2482ce6..a54eb1571 100644 --- a/adaptive/learner/triangulation.py +++ b/adaptive/learner/triangulation.py @@ -266,7 +266,7 @@ class Triangulation: or more simplices in the """ - def __init__(self, coords): + def __init__(self, coords, *, _check_vertices=True): if not is_iterable_and_sized(coords): raise TypeError("Please provide a 2-dimensional list of points") coords = list(coords) @@ -287,23 +287,28 @@ def __init__(self, coords): raise ValueError("Please provide at least one simplex") coords = list(map(tuple, coords)) - vectors = np.subtract(coords[1:], coords[0]) - if np.linalg.matrix_rank(vectors) < dim: - raise ValueError( - "Initial simplex has zero volumes " - "(the points are linearly dependent)" - ) + if _check_vertices: + vectors = np.subtract(coords[1:], coords[0]) + if np.linalg.matrix_rank(vectors) < dim: + raise ValueError( + "Initial simplex has zero volumes " + "(the points are linearly dependent)" + ) self.vertices = list(coords) self.simplices = set() # initialise empty set for each vertex self.vertex_to_simplices = [set() for _ in coords] - # find a Delaunay triangulation to start with, then we will throw it - # away and continue with our own algorithm - initial_tri = scipy.spatial.Delaunay(coords) - for simplex in initial_tri.simplices: - self.add_simplex(simplex) + if len(coords) == dim + 1: + # There is just a single simplex + self.add_simplex(tuple(range(dim + 1))) + else: + # find a Delaunay triangulation to start with, then we will throw it + # away and continue with our own algorithm + initial_tri = scipy.spatial.Delaunay(coords) + for simplex in initial_tri.simplices: + self.add_simplex(simplex) def delete_simplex(self, simplex): simplex = tuple(sorted(simplex)) From 52175f08e4e0a267a8102d51cb8883b2ae48133b Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 15:43:15 +0200 Subject: [PATCH 095/105] fix bug where we receive more points than we asked for --- adaptive/learner/new_learnerND.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index ca7abba2d..6129cd44a 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -242,6 +242,8 @@ def ask(self, n, tell_pending=True): for x in self.boundary_points if x not in self.data and x not in self.pending_points ] + # Make sure we don't give more points than asked for + points = points[:n] # Infinite priority so that the boundary points are prioritized point_priorities = [math.inf] * len(points) if tell_pending: From cdf26a7c63d1eef52f29b145a27f4649d48bcf98 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 16:15:17 +0200 Subject: [PATCH 096/105] add learner tests that simulate the runner and randomly asking/telling --- adaptive/tests/test_learners.py | 96 ++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py index 43bca0fd3..3b0721ab7 100644 --- a/adaptive/tests/test_learners.py +++ b/adaptive/tests/test_learners.py @@ -11,6 +11,8 @@ import shutil import tempfile +import hypothesis.strategies as st +import hypothesis.stateful as stateful import numpy as np import pytest import scipy.spatial @@ -622,7 +624,7 @@ def test_saving_with_datasaver(learner_type, f, learner_kwargs): os.remove(path) -@run_with(Learner1D, Learner2D, LearnerND, NewLearnerND) +@run_with(Learner1D, Learner2D, LearnerND, NewLearnerND, with_all_loss_functions=False) def test_adding_data_outside_of_bounds(learner_type, f, learner_kwargs): # Just test this does not throw an error for now f = generate_random_parametrization(f) @@ -643,6 +645,98 @@ def test_adding_data_outside_of_bounds(learner_type, f, learner_kwargs): learner.ask(10) +# Hypothesis RuleBasedStateMachine does not allow for parametrization so we have to +# wrap it in a parametrized test +@run_with( + Learner1D, + Learner2D, + LearnerND, + NewLearnerND, + SequenceLearner, + AverageLearner, + with_all_loss_functions=False, +) +def test_simulate_runner(learner_type, f, learner_kwargs): + + g = generate_random_parametrization(f) + + # This simulates the current algorithm used by the Runner, i.e. ask for + # 'ncores' points and then tell the results one at a time and ask for + # one more point. + class Machine(stateful.RuleBasedStateMachine): + def __init__(self): + super().__init__() + self.data = dict() + self.learner = learner_type(g, **learner_kwargs) + + pending = stateful.Bundle("pending") + + @stateful.invariant() + def learner_contains_all_data(self): + # TODO: add more invariants that should be true for all learners + assert self.data == self.learner.data + + @stateful.initialize(target=pending, ncores=st.integers(1, 10)) + def init_learner(self, ncores): + points, _ = self.learner.ask(ncores) + data = [(x, self.learner.function(x)) for x in points] + return stateful.multiple(*data) + + @stateful.rule(target=pending, xy=stateful.consumes(pending)) + def ask_and_tell(self, xy): + x, y = xy + self.learner.tell(x, y) + self.data[x] = y + (x,), _ = self.learner.ask(1) + return (x, self.learner.function(x)) + + Machine.TestCase().runTest() + + +# Hypothesis RuleBasedStateMachine does not allow for parametrization so we have to +# wrap it in a parametrized test +@run_with( + Learner1D, + Learner2D, + LearnerND, + NewLearnerND, + SequenceLearner, + AverageLearner, + with_all_loss_functions=False, +) +def test_randomly_ask_tell(learner_type, f, learner_kwargs): + + g = generate_random_parametrization(f) + + # This simulates a strategy where we ask for a random number of points, + # and then tell a random selection of all the points we've asked for so far + class Machine(stateful.RuleBasedStateMachine): + def __init__(self): + super().__init__() + self.data = dict() + # We just test on the most trivial function we can + self.learner = learner_type(g, **learner_kwargs) + + pending = stateful.Bundle("pending") + + @stateful.invariant() + def learner_contains_all_data(self): + assert self.data == self.learner.data + + @stateful.rule(target=pending, n=st.integers(1, 10)) + def ask(self, n): + points, _ = self.learner.ask(n) + return stateful.multiple(*[(x, self.learner.function(x)) for x in points]) + + @stateful.rule(xys=st.lists(stateful.consumes(pending), min_size=1)) + def tell(self, xys): + xs, ys = zip(*xys) + self.learner.tell_many(xs, ys) + self.data.update(xys) + + Machine.TestCase().runTest() + + @pytest.mark.xfail @run_with(Learner1D, Learner2D, LearnerND, NewLearnerND) def test_convergence_for_arbitrary_ordering(learner_type, f, learner_kwargs): From 011c5f3a85c3895f39ca1cbb9f7c23f8e2b4d730 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 16:32:25 +0200 Subject: [PATCH 097/105] create queue on initialization rather than calling 'insert' --- adaptive/learner/new_learnerND.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index 6129cd44a..e0ac6573b 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -201,10 +201,11 @@ def _finalize_initialization(self): self.domain.split_at(x) # Recompute all the losses from scratch - self.queue = Queue() self.losses = dict() - for subdomain in self.domain.subdomains(): - self.queue.insert(subdomain, priority=self.priority(subdomain)) + self.queue = Queue( + (subdomain, self.priority(subdomain)) + for subdomain in self.domain.subdomains() + ) @property def npoints(self): From 51901cc8cbe609ade57aa6ea0f4d6d742378b6f0 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 16:45:50 +0200 Subject: [PATCH 098/105] suppress context when raising when an item was not in a queue --- adaptive/priority_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/priority_queue.py b/adaptive/priority_queue.py index 8da1b748c..2509b3cac 100644 --- a/adaptive/priority_queue.py +++ b/adaptive/priority_queue.py @@ -82,7 +82,7 @@ def _find_first(self, item): try: should_be, key = self._items[i] except IndexError: - raise KeyError("item is not in queue") + raise KeyError("item is not in queue") from None if item != should_be: raise KeyError("item is not in queue") return i, key From d401cbb39b5eff0448dedd6b976287cd1864aa42 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 17:17:51 +0200 Subject: [PATCH 099/105] fix bug in subdomain updating logic when telling data Before this fix when telling more than 1 point it was possible that we would attempt to remove subdomains from the queue that were only produced "temporarily" when adding points. --- adaptive/learner/new_learnerND.py | 32 ++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index e0ac6573b..f208b6844 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -328,27 +328,33 @@ def tell_many(self, xs, ys): need_loss_update = self._update_codomain_bounds(ys) - old = set() - new = set() + to_remove = set() + to_add = set() for x in xs: - old_subdomains, new_subdomains = self.domain.split_at(x) - old.update(old_subdomains) - new.update(new_subdomains) - # Remove any subdomains that were new at some point but are now old. - new -= old - - for subdomain in old: + old_subdomains, new_subdomains = map(set, self.domain.split_at(x)) + # Subdomains that were added in a prior iteration of this loop, + # but which have now been removed to make way for others. + temp_subdomains = to_add.intersection(old_subdomains) + # We no longer want to add subdomains that have now been removed, + # and we want to add the new subdomains. + to_add -= temp_subdomains + to_add.update(new_subdomains) + # We do not want to remove subdomains that were produced on a + # prior iteration of this loop, as these will not be in the queue. + to_remove.update(old_subdomains - temp_subdomains) + + for subdomain in to_remove: self.queue.remove(subdomain) del self.losses[subdomain] if need_loss_update: self.queue = Queue( (subdomain, self.priority(subdomain)) - for subdomain in itertools.chain(self.queue.items(), new) + for subdomain in itertools.chain(self.queue.items(), to_add) ) else: # Insert the newly created subdomains into the queue. - for subdomain in new: + for subdomain in to_add: self.queue.insert(subdomain, priority=self.priority(subdomain)) # If the loss function depends on data in neighboring subdomains then @@ -356,11 +362,11 @@ def tell_many(self, xs, ys): # the subdomains we just added. if self.loss_function.n_neighbors > 0: subdomains_to_update = set() - for subdomain in new: + for subdomain in to_add: subdomains_to_update.update( self.domain.neighbors(subdomain, self.loss_function.n_neighbors) ) - subdomains_to_update -= new + subdomains_to_update -= to_add for subdomain in subdomains_to_update: del self.losses[subdomain] # Force loss recomputation self.queue.update(subdomain, priority=self.priority(subdomain)) From 445c6f1be4ca684588d5f3df1bd90d4ad9cac81a Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 17:20:41 +0200 Subject: [PATCH 100/105] improve comment --- adaptive/learner/new_learnerND.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adaptive/learner/new_learnerND.py b/adaptive/learner/new_learnerND.py index f208b6844..6237a717c 100644 --- a/adaptive/learner/new_learnerND.py +++ b/adaptive/learner/new_learnerND.py @@ -368,7 +368,9 @@ def tell_many(self, xs, ys): ) subdomains_to_update -= to_add for subdomain in subdomains_to_update: - del self.losses[subdomain] # Force loss recomputation + # We have more data, so we must force a loss recomputation by + # removing the subdomain from the loss cache. + del self.losses[subdomain] self.queue.update(subdomain, priority=self.priority(subdomain)) def _update_codomain_bounds(self, ys): From d41b355e554ba57c57396d687f3330496934c672 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 18:29:51 +0200 Subject: [PATCH 101/105] blackify domain.py --- adaptive/domain.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/adaptive/domain.py b/adaptive/domain.py index f3aa66061..c512382c9 100644 --- a/adaptive/domain.py +++ b/adaptive/domain.py @@ -139,7 +139,7 @@ def subvolumes(self, subdomain): def _choose_point_in_subinterval(a, b): m = a + (b - a) / 2 if not a < m < b: - raise ValueError("{} cannot be split further".format((a, b))) + raise ValueError(f"{(a, b)} cannot be split further") return m @@ -164,7 +164,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): if n <= 0: raise ValueError("n must be positive") if _check_membership and not self.contains_subdomain(subdomain): - raise ValueError("{} is not present in this interval".format(subdomain)) + raise ValueError(f"{subdomain} is not present in this interval") try: p = self.sub_intervals[subdomain] except KeyError: # No points yet in the interior of this subdomain @@ -188,12 +188,12 @@ def insert(self, x, *, _check_membership=True): if _check_membership: a, b = self.bounds if not (a <= x <= b): - raise ValueError("{} is outside of this interval".format(x)) + raise ValueError(f"{x} is outside of this interval") p = self.points i = p.bisect_left(x) if p[i] == x: - raise ValueError("{} exists in this interval already".format(x)) + raise ValueError(f"{x} exists in this interval already") subdomain = (a, b) = p[i - 1], p[i] try: @@ -202,7 +202,7 @@ def insert(self, x, *, _check_membership=True): self.sub_intervals[subdomain] = SortedList([a, x, b]) else: if x in p: - raise ValueError("{} exists in a subinterval already".format(x)) + raise ValueError(f"{x} exists in a subinterval already") p.add(x) return [subdomain] @@ -211,7 +211,7 @@ def remove(self, x, *, _check_membership=True): if _check_membership: a, b = self.bounds if not (a <= x <= b): - raise ValueError("{} is outside of this interval".format(x)) + raise ValueError(f"{x} is outside of this interval") p = self.points i = p.bisect_left(x) @@ -222,7 +222,7 @@ def remove(self, x, *, _check_membership=True): try: sub_points = self.sub_intervals[subdomain] except KeyError: - raise ValueError("{} not in any subdomain".format(x)) + raise ValueError(f"{x} not in any subdomain") else: sub_points.remove(x) if len(sub_points) == 2: @@ -264,7 +264,7 @@ def split_at(self, x, *, _check_membership=True): def which_subdomains(self, x): a, b = self.bounds if not (a <= x <= b): - raise ValueError("{} is outside the interval".format(x)) + raise ValueError(f"{x} is outside the interval") p = self.points i = p.bisect_left(x) if p[i] != x: @@ -317,7 +317,7 @@ def subdomains(self): def subpoints(self, subdomain, *, _check_membership=True): if _check_membership and not self.contains_subdomain(subdomain): - raise ValueError("{} is not present in this interval".format(subdomain)) + raise ValueError(f"{subdomain} is not present in this interval") try: p = self.sub_intervals[subdomain] except KeyError: @@ -548,7 +548,7 @@ def insert_points(self, subdomain, n, *, _check_membership=True): raise ValueError("n must be positive") tri = self.triangulation if _check_membership and subdomain not in tri.simplices: - raise ValueError("{} is not present in this domain".format(subdomain)) + raise ValueError(f"{subdomain} is not present in this domain") subtri = self._get_subtriangulation(subdomain) @@ -587,11 +587,11 @@ def insert(self, x, *, _check_membership=True): # XXX: O(N) in the number of simplices affected_subdomains = self.which_subdomains(x) if not affected_subdomains: - raise ValueError("{} is not present in this domain".format(x)) + raise ValueError(f"{x} is not present in this domain") for subdomain in affected_subdomains: subtri = self._get_subtriangulation(subdomain) if x in subtri.vertices: # O(N) in the number of vertices - raise ValueError("{} exists in a subinterval already".format(x)) + raise ValueError(f"{x} exists in a subinterval already") subtri.add_point(x) self.subpoints_to_subdomains[x].update(affected_subdomains) @@ -667,9 +667,7 @@ def split_at(self, x, *, _check_membership=True): subtri.add_point(p) self.subpoints_to_subdomains[p].add(subdomain) p_was_added = True - assert ( - p_was_added - ), "{} was not in the interior of any new simplices".format(x) + assert p_was_added, f"{x} was not in the interior of any new simplices" return old_subdomains, new_subdomains @@ -682,7 +680,7 @@ def which_subdomains(self, x): # XXX: O(N) in the number of simplices subdomains = [s for s in tri.simplices if tri.point_in_simplex(x, s)] if not subdomains: - raise ValueError("{} is not in the domain".format(x)) + raise ValueError(f"{x} is not in the domain") return list(subdomains) def contains_subdomain(self, subdomain): @@ -716,7 +714,7 @@ def vertices(self): def subpoints(self, subdomain, *, _check_membership=True): if _check_membership and not self.contains_subdomain(subdomain): - raise ValueError("{} is not present in this domain".format(subdomain)) + raise ValueError(f"{subdomain} is not present in this domain") try: subtri = self.sub_triangulations[subdomain] except KeyError: From 7430143324a49bccf7e19ebb53a9af4bf8e5857b Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Wed, 16 Oct 2019 18:30:36 +0200 Subject: [PATCH 102/105] check that points matrix is well conditioned --- adaptive/tests/domain_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptive/tests/domain_utils.py b/adaptive/tests/domain_utils.py index 5e6b878d7..24a529ed8 100644 --- a/adaptive/tests/domain_utils.py +++ b/adaptive/tests/domain_utils.py @@ -33,7 +33,7 @@ def unique_vectors(xs): return False d = scipy.spatial.distance_matrix(xs, xs) d = np.extract(1 - np.identity(d.shape[0]), d) - return not np.any(d < 1e-3 / c) + return not np.any(d < 1e-3 / c) and np.linalg.cond(xs) < 1e5 @st.composite From 6c0cafad48449863235e55da4de8c464471cc52e Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Thu, 17 Oct 2019 09:43:09 +0200 Subject: [PATCH 103/105] remove stateful invariant checks These are not catching an interesting class of bugs for now. We should reintroduce these when we come up with a firmer learner API. --- adaptive/tests/test_learners.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py index 3b0721ab7..4d5fd6aed 100644 --- a/adaptive/tests/test_learners.py +++ b/adaptive/tests/test_learners.py @@ -666,15 +666,11 @@ def test_simulate_runner(learner_type, f, learner_kwargs): class Machine(stateful.RuleBasedStateMachine): def __init__(self): super().__init__() - self.data = dict() self.learner = learner_type(g, **learner_kwargs) pending = stateful.Bundle("pending") - @stateful.invariant() - def learner_contains_all_data(self): - # TODO: add more invariants that should be true for all learners - assert self.data == self.learner.data + # TODO: add some invariant checking here @stateful.initialize(target=pending, ncores=st.integers(1, 10)) def init_learner(self, ncores): @@ -686,7 +682,6 @@ def init_learner(self, ncores): def ask_and_tell(self, xy): x, y = xy self.learner.tell(x, y) - self.data[x] = y (x,), _ = self.learner.ask(1) return (x, self.learner.function(x)) @@ -713,15 +708,11 @@ def test_randomly_ask_tell(learner_type, f, learner_kwargs): class Machine(stateful.RuleBasedStateMachine): def __init__(self): super().__init__() - self.data = dict() - # We just test on the most trivial function we can self.learner = learner_type(g, **learner_kwargs) pending = stateful.Bundle("pending") - @stateful.invariant() - def learner_contains_all_data(self): - assert self.data == self.learner.data + # TODO: add some invariant checking here @stateful.rule(target=pending, n=st.integers(1, 10)) def ask(self, n): @@ -732,7 +723,6 @@ def ask(self, n): def tell(self, xys): xs, ys = zip(*xys) self.learner.tell_many(xs, ys) - self.data.update(xys) Machine.TestCase().runTest() From 9ffd7bc164cf99f27b71bcfb0723d207cc9434b8 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Thu, 17 Oct 2019 10:16:29 +0200 Subject: [PATCH 104/105] make random domains by sampling points uniformly in the unit hypercube This does not test very degenerate domains, but for the moment we just want to test that everything works for domains defined over hulls with different numbers of points. Adaptive will typically not be used on very degenerate domains. --- adaptive/tests/domain_utils.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/adaptive/tests/domain_utils.py b/adaptive/tests/domain_utils.py index 24a529ed8..705b9e101 100644 --- a/adaptive/tests/domain_utils.py +++ b/adaptive/tests/domain_utils.py @@ -6,7 +6,6 @@ import hypothesis.strategies as st from adaptive.learner.new_learnerND import ConvexHull, Interval -from hypothesis.extra import numpy as hynp def reflections(ndim): @@ -153,11 +152,10 @@ def make_random_domain(draw, ndim, fill=True): limits = draw(st.tuples(reals, reals).map(sorted).filter(lambda x: x[0] < x[1])) domain = Interval(*limits) else: - points = draw( - hynp.arrays(np.float, (10, ndim), elements=reals, unique=True).filter( - unique_vectors - ) - ) + # Set the numpy random seed + draw(st.random_module()) + # Generate points in a hypercube around the origin + points = np.random.rand(10, ndim) - 0.5 domain = ConvexHull(points) return domain From 587d0272b833f8d651eeb4c936087cb93ce9d769 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Thu, 17 Oct 2019 11:33:04 +0200 Subject: [PATCH 105/105] simplify utilities for generating domains and points inside/outside them We now use numpy.random for generating random points, which produces much less degenerate examples than using Hypothesis' "floats" strategy. At this point we don't want to test super awkward cases. --- adaptive/tests/domain_utils.py | 108 +++++++++++++-------------------- 1 file changed, 43 insertions(+), 65 deletions(-) diff --git a/adaptive/tests/domain_utils.py b/adaptive/tests/domain_utils.py index 705b9e101..0e472d406 100644 --- a/adaptive/tests/domain_utils.py +++ b/adaptive/tests/domain_utils.py @@ -1,47 +1,25 @@ import itertools import numpy as np -import scipy.linalg -import scipy.spatial import hypothesis.strategies as st from adaptive.learner.new_learnerND import ConvexHull, Interval +# This module contains utilities for producing domains and points inside and outside of them. +# Because we typically do not want to test very degenerate cases (e.g. points that are almost +# coincident, very large or very small) we prefer generating points in the interval [0, 1) +# using numpy.random, rather than drawing from Hypothesis' "floats" strategy. + +# Return an iterator that yields matrices reflecting in the cartesian +# coordinate axes in 'ndim' dimensions. def reflections(ndim): return map(np.diag, itertools.product([1, -1], repeat=ndim)) -reals = st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False) -positive_reals = st.floats( - min_value=1e-3, max_value=100, allow_nan=False, allow_infinity=False -) - - -@st.composite -def point(draw, ndim): - return draw(reals if ndim == 1 else st.tuples(*[reals] * ndim)) - - -def unique_vectors(xs): - xs = np.asarray(xs) - if len(xs.shape) == 1: - xs = xs[:, None] - c = np.max(np.linalg.norm(xs, axis=1)) - if c == 0: - return False - d = scipy.spatial.distance_matrix(xs, xs) - d = np.extract(1 - np.identity(d.shape[0]), d) - return not np.any(d < 1e-3 / c) and np.linalg.cond(xs) < 1e5 - - -@st.composite -def point_inside_simplex(draw, simplex): - simplex = draw(simplex) +def point_inside_simplex(simplex): simplex = np.asarray(simplex) dim = simplex.shape[1] - # Set the numpy random seed - draw(st.random_module()) # Generate a point in the unit simplex. # https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex # We avoid using Hypothesis to generate the points as it typically chooses @@ -57,13 +35,11 @@ def point_inside_simplex(draw, simplex): @st.composite def points_inside(draw, domain, n): - kwargs = dict( - allow_nan=False, allow_infinity=False, exclude_min=True, exclude_max=True - ) + # Set the numpy random seed + draw(st.random_module()) if isinstance(domain, Interval): a, b = domain.bounds - eps = (b - a) * 1e-2 - x = st.floats(min_value=(a + eps), max_value=(b - eps), **kwargs) + return a + (b - a) * np.random.rand(n) else: assert isinstance(domain, ConvexHull) tri = domain.triangulation @@ -71,10 +47,11 @@ def points_inside(draw, domain, n): simplex = st.sampled_from(simplices).map( lambda simplex: [tri.vertices[s] for s in simplex] ) - x = point_inside_simplex(simplex) - - xs = st.tuples(*[x] * n).filter(unique_vectors) - return draw(xs) + # "point_inside_simplex" uses the numpy RNG, and we set the seed above. + # Together this means we're almost guaranteed not to get coinciding points. + # Note that we draw from the 'simplex' strategy on each iteration, so we + # distribute the points between the different simplices in the domain. + return [tuple(point_inside_simplex(draw(simplex))) for _ in range(n)] @st.composite @@ -90,30 +67,26 @@ def a_few_points_inside(draw, domain): @st.composite def points_outside(draw, domain, n): - kwargs = dict(allow_nan=False, allow_infinity=False) + # set numpy random seed + draw(st.random_module()) + if isinstance(domain, Interval): a, b = domain.bounds - length = b - a - before_domain = st.floats(a - 10 * length, a, exclude_max=True, **kwargs) - after_domain = st.floats(b, b + 10 * length, exclude_min=True, **kwargs) - x = before_domain | after_domain + ndim = 1 else: assert isinstance(domain, ConvexHull) hull = domain.bounds - # Generate point between bounding box and bounding box * 10 points = hull.points[hull.vertices] - x = st.tuples( - *[ - ( - st.floats(a - 10 * (b - a), a, exclude_max=True, **kwargs) - | st.floats(b, b + 10 * (b - a), exclude_min=True, **kwargs) - ) - for a, b in zip(points.min(axis=0), points.max(axis=0)) - ] - ) + ndim = points.shape[1] + a, b = points.min(axis=0)[None, :], points.max(axis=0)[None, :] - xs = st.tuples(*[x] * n).filter(unique_vectors) - return draw(xs) + # Generate a point outside the bounding box of the domain. + center = (a + b) / 2 + border = (b - a) / 2 + r = border + 10 * border * np.random.rand(n, ndim) + quadrant = np.sign(np.random.rand(n, ndim) - 0.5) + assert not np.any(quadrant == 0) + return center + quadrant * r @st.composite @@ -127,6 +100,9 @@ def point_on_shared_face(draw, domain, dim): assert isinstance(domain, ConvexHull) assert 0 < dim < domain.ndim + # Set the numpy random seed + draw(st.random_module()) + tri = domain.triangulation for face in tri.faces(dim + 1): @@ -136,8 +112,7 @@ def point_on_shared_face(draw, domain, dim): vertices = np.array([tri.vertices[i] for i in face]) - f = st.floats(1e-3, 1 - 1e-3, allow_nan=False, allow_infinity=False) - xb = draw(st.tuples(*[f] * dim)) + xb = np.random.rand(dim) x = tuple(vertices[0] + xb @ (vertices[1:] - vertices[0])) @@ -148,12 +123,13 @@ def point_on_shared_face(draw, domain, dim): @st.composite def make_random_domain(draw, ndim, fill=True): + # Set the numpy random seed + draw(st.random_module()) + if ndim == 1: - limits = draw(st.tuples(reals, reals).map(sorted).filter(lambda x: x[0] < x[1])) - domain = Interval(*limits) + a, b = sorted(np.random.rand(2) - 0.5) + domain = Interval(a, b) else: - # Set the numpy random seed - draw(st.random_module()) # Generate points in a hypercube around the origin points = np.random.rand(10, ndim) - 0.5 domain = ConvexHull(points) @@ -162,12 +138,14 @@ def make_random_domain(draw, ndim, fill=True): @st.composite def make_hypercube_domain(draw, ndim, fill=True): + # Set the numpy random seed + draw(st.random_module()) + limit = np.random.rand() + if ndim == 1: - limit = draw(positive_reals) subdomain = Interval(-limit, limit) else: - x = draw(positive_reals) - point = np.full(ndim, x) + point = np.full(ndim, limit) boundary_points = [r @ point for r in reflections(ndim)] subdomain = ConvexHull(boundary_points) return subdomain