Skip to content

Commit 1887192

Browse files
authored
Merge pull request #26993 from dschwen/moose-kdtree-pickle
Serialize ValueCache
2 parents 49a1772 + 2f534a9 commit 1887192

File tree

3 files changed

+291
-98
lines changed

3 files changed

+291
-98
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# ValueCache
2+
3+
The `ValueCache<T>` class can cache value of type `T` indexed by n-dimensional vectors. Retrieval is performed using a kd-tree data structure (using nanoflann).
4+
5+
## Persistence
6+
7+
When the object is constructed using the
8+
9+
```C++
10+
ValueCache(const std::string & file_name, std::size_t in_dim, std::size_t max_leaf_size = 10);
11+
```
12+
13+
constructor, the state of the cache is saved to a file upon destruction of the cache object (usually at the end of a simulation), and read in at construction of the cache object. Thus the cached knowledge from a previous simulation will be available in future runs.
14+
15+
!alert warning
16+
It is up to the developer to ensure that the cache state from previous runs is applicable to the following runs when the persistence feature is used!
17+
18+
Note that overloads for `dataLoad` and `dataStore` for type `T` must be implemented. If full persistence is not desired the cache object can still be declared as restartable.

framework/include/utils/ValueCache.h

Lines changed: 216 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,25 @@
1010
#pragma once
1111

1212
#include "MooseTypes.h"
13+
#include "MooseUtils.h"
14+
#include "DataIO.h"
15+
#include "libmesh/int_range.h"
1316
#include "libmesh/nanoflann.hpp"
17+
#include <cstdlib>
18+
#include <fstream>
19+
#include <iostream>
1420
#include <memory>
1521
#include <vector>
1622
#include <tuple>
23+
#include <optional>
24+
#include <exception>
25+
26+
template <typename T>
27+
class ValueCache;
28+
template <typename T>
29+
void dataStore(std::ostream & stream, ValueCache<T> & c, void * context);
30+
template <typename T>
31+
void dataLoad(std::istream & stream, ValueCache<T> & c, void * context);
1732

1833
/**
1934
* ValueCache is a generic helper template to implement an unstructured data
@@ -27,56 +42,103 @@ template <typename T>
2742
class ValueCache
2843
{
2944
public:
45+
/// Construct a ValueCache with indices in in_dim dimensions
3046
ValueCache(std::size_t in_dim, std::size_t max_leaf_size = 10);
3147

48+
/// Same as above, but provide a filename for storing/restoring the cache content between runs
49+
ValueCache(const std::string & file_name, std::size_t in_dim, std::size_t max_leaf_size = 10);
50+
51+
/// Object destructor. If the cache was constructed with a file name, it gets written here.
52+
~ValueCache();
53+
54+
/// insert a new value out_value at the position in_val
3255
void insert(const std::vector<Real> & in_val, const T & out_val);
56+
57+
/// get a single neighbor of in_val along with stored values and distances
58+
std::tuple<const std::vector<Real> &, const T &, Real>
59+
getNeighbor(const std::vector<Real> & in_val);
60+
61+
/// get a list of up to k neighbors of in_val along with stored values and distances
62+
std::vector<std::tuple<const std::vector<Real> &, const T &, Real>>
63+
getNeighbors(const std::vector<Real> & in_val, const std::size_t k);
64+
65+
/// return the number of cache entries
3366
std::size_t size();
34-
bool guess(const std::vector<Real> & in_val, T & out_val, Real & distance_sqr);
35-
std::vector<std::tuple<std::vector<Real> &, T &, Real>>
36-
kNearestNeighbors(const std::vector<Real> & in_val, const std::size_t k);
67+
68+
/// remove all data from the cache
69+
void clear();
70+
71+
///@{ Nanoflann interface functions
72+
std::size_t kdtree_get_point_count() const;
73+
Real kdtree_get_pt(const std::size_t idx, const std::size_t dim) const;
74+
template <class BBOX>
75+
bool kdtree_get_bbox(BBOX & bb) const;
76+
///@}
3777

3878
protected:
39-
struct PointCloud
40-
{
41-
PointCloud(std::size_t in_dim) : _in_dim(in_dim) {}
42-
43-
std::vector<std::vector<Real>> _pts;
44-
const std::size_t _in_dim;
45-
46-
inline size_t kdtree_get_point_count() const { return _pts.size(); }
47-
inline Real kdtree_get_pt(const std::size_t idx, const std::size_t dim) const
48-
{
49-
return _pts[idx][dim];
50-
}
51-
template <class BBOX>
52-
bool kdtree_get_bbox(BBOX & /* bb */) const
53-
{
54-
return false;
55-
}
56-
} _point_cloud;
79+
/// rebuild the kd-tree from scratch and update the bounding box
80+
void rebuildTree();
5781

5882
using KdTreeT =
59-
nanoflann::KDTreeSingleIndexDynamicAdaptor<nanoflann::L2_Simple_Adaptor<Real, PointCloud>,
60-
PointCloud,
83+
nanoflann::KDTreeSingleIndexDynamicAdaptor<nanoflann::L2_Simple_Adaptor<Real, ValueCache<T>>,
84+
ValueCache<T>,
6185
-1,
6286
std::size_t>;
6387

88+
std::vector<std::pair<std::vector<Real>, T>> _location_data;
6489
std::unique_ptr<KdTreeT> _kd_tree;
65-
std::vector<T> _data;
6690

6791
const std::size_t _in_dim;
6892
const std::size_t _max_leaf_size;
6993
const std::size_t _max_subtrees;
94+
95+
/// file name for persistent store/restore of the cache
96+
std::optional<std::string> _persistent_storage_file;
97+
98+
/// bounding box (updated upon insertion)
99+
std::vector<std::pair<Real, Real>> _bbox;
100+
101+
friend void dataStore<T>(std::ostream & stream, ValueCache<T> & c, void * context);
102+
friend void dataLoad<T>(std::istream & stream, ValueCache<T> & c, void * context);
70103
};
71104

72105
template <typename T>
73106
ValueCache<T>::ValueCache(std::size_t in_dim, std::size_t max_leaf_size)
74-
: _point_cloud(in_dim),
75-
_kd_tree(nullptr),
76-
_in_dim(in_dim),
77-
_max_leaf_size(max_leaf_size),
78-
_max_subtrees(100)
107+
: _kd_tree(nullptr), _in_dim(in_dim), _max_leaf_size(max_leaf_size), _max_subtrees(100)
108+
{
109+
}
110+
111+
template <typename T>
112+
ValueCache<T>::ValueCache(const std::string & file_name,
113+
std::size_t in_dim,
114+
std::size_t max_leaf_size)
115+
: ValueCache(in_dim, max_leaf_size)
79116
{
117+
_persistent_storage_file = file_name;
118+
119+
// if the persistent storage file exists and is readable, load it
120+
if (MooseUtils::checkFileReadable(*_persistent_storage_file,
121+
/*check_line_endings =*/false,
122+
/*throw_on_unreadable =*/false))
123+
{
124+
std::ifstream in_file(_persistent_storage_file->c_str());
125+
if (!in_file)
126+
mooseError("Failed to open '", *_persistent_storage_file, "' for reading.");
127+
dataLoad(in_file, *this, nullptr);
128+
}
129+
}
130+
131+
template <typename T>
132+
ValueCache<T>::~ValueCache()
133+
{
134+
// if a persistent storage file was specified, write results back to it
135+
if (_persistent_storage_file.has_value())
136+
{
137+
std::ofstream out_file(_persistent_storage_file->c_str());
138+
if (!out_file)
139+
mooseWarning("Failed to open '", *_persistent_storage_file, "' for writing.");
140+
dataStore(out_file, *this, nullptr);
141+
}
80142
}
81143

82144
template <typename T>
@@ -85,11 +147,20 @@ ValueCache<T>::insert(const std::vector<Real> & in_val, const T & out_val)
85147
{
86148
mooseAssert(in_val.size() == _in_dim, "Key dimensions do not match cache dimensions");
87149

88-
auto id = _point_cloud._pts.size();
89-
mooseAssert(size() == id, "Inconsistent cache data size.");
150+
auto id = size();
151+
_location_data.emplace_back(in_val, out_val);
90152

91-
_point_cloud._pts.push_back(in_val);
92-
_data.push_back(out_val);
153+
// update bounding box
154+
if (id == 0)
155+
{
156+
// first item is inserted
157+
_bbox.resize(_in_dim);
158+
for (const auto i : make_range(_in_dim))
159+
_bbox[i] = {in_val[i], in_val[i]};
160+
}
161+
else
162+
for (const auto i : make_range(_in_dim))
163+
_bbox[i] = {std::min(_bbox[i].first, in_val[i]), std::max(_bbox[i].second, in_val[i])};
93164

94165
// do we have too many subtrees?
95166
if (_kd_tree && _kd_tree->getAllIndices().size() > _max_subtrees)
@@ -98,65 +169,147 @@ ValueCache<T>::insert(const std::vector<Real> & in_val, const T & out_val)
98169
// rebuild tree or add point
99170
if (!_kd_tree)
100171
{
101-
_kd_tree = std::make_unique<KdTreeT>(_in_dim, _point_cloud, _max_leaf_size);
172+
_kd_tree = std::make_unique<KdTreeT>(_in_dim, *this, _max_leaf_size);
102173
mooseAssert(_kd_tree != nullptr, "KDTree was not properly initialized.");
103174
}
104175
else
105176
_kd_tree->addPoints(id, id);
106177
}
107178

179+
/**
180+
* Retrieve a single closest neighbor to in_val. Throws an exception if no values are stored.
181+
*/
108182
template <typename T>
109-
std::size_t
110-
ValueCache<T>::size()
111-
{
112-
return _data.size();
113-
}
114-
115-
template <typename T>
116-
bool
117-
ValueCache<T>::guess(const std::vector<Real> & in_val, T & out_val, Real & distance_sqr)
183+
std::tuple<const std::vector<Real> &, const T &, Real>
184+
ValueCache<T>::getNeighbor(const std::vector<Real> & in_val)
118185
{
119-
// cache is empty
120-
if (_data.empty())
121-
return false;
186+
// throw an exception if this is called on an empty cache
187+
if (_location_data.empty())
188+
throw std::runtime_error("Attempting to retrieve a neighbor from an empty ValueCache.");
122189

190+
// buffers for the kNN search
123191
nanoflann::KNNResultSet<Real> result_set(1);
124192
std::size_t return_index;
125-
result_set.init(&return_index, &distance_sqr);
193+
Real distance;
126194

127-
// perform search
195+
// kNN search
196+
result_set.init(&return_index, &distance);
128197
_kd_tree->findNeighbors(result_set, in_val.data());
129198

130-
// no result found
131-
if (result_set.size() != 1)
132-
return false;
133-
134-
out_val = _data[return_index];
135-
return true;
199+
const auto & [location, data] = _location_data[return_index];
200+
return {std::cref(location), std::cref(data), distance};
136201
}
137202

138203
/**
139204
* This function performs a search on the value cache and returns either the k-nearest neighbors or
140205
* the neighbors available if the cache size is less than k.
141206
*/
142207
template <typename T>
143-
std::vector<std::tuple<std::vector<Real> &, T &, Real>>
144-
ValueCache<T>::kNearestNeighbors(const std::vector<Real> & in_val, const std::size_t k)
208+
std::vector<std::tuple<const std::vector<Real> &, const T &, Real>>
209+
ValueCache<T>::getNeighbors(const std::vector<Real> & in_val, const std::size_t k)
145210
{
146-
std::vector<std::tuple<std::vector<Real> &, T &, Real>> nearest_neighbors;
211+
// return early if no points are stored
212+
if (_location_data.empty())
213+
return {};
147214

215+
// buffers for the kNN search
148216
nanoflann::KNNResultSet<Real> result_set(std::min(k, size()));
149217
std::vector<std::size_t> return_indices(std::min(k, size()));
150218
std::vector<Real> distances(std::min(k, size()));
151219

152-
result_set.init(return_indices.data(), distances.data());
153-
154220
// kNN search
221+
result_set.init(return_indices.data(), distances.data());
155222
_kd_tree->findNeighbors(result_set, in_val.data());
156223

157-
for (std::size_t i = 0; i < result_set.size(); ++i)
158-
nearest_neighbors.push_back(
159-
std::tie((_point_cloud._pts[return_indices[i]]), (_data[return_indices[i]]), distances[i]));
160-
224+
// prepare results to be returned
225+
std::vector<std::tuple<const std::vector<Real> &, const T &, Real>> nearest_neighbors;
226+
for (const auto i : index_range(result_set))
227+
{
228+
const auto & [location, data] = _location_data[return_indices[i]];
229+
nearest_neighbors.emplace_back(std::cref(location), std::cref(data), distances[i]);
230+
}
161231
return nearest_neighbors;
162232
}
233+
234+
template <typename T>
235+
std::size_t
236+
ValueCache<T>::size()
237+
{
238+
return kdtree_get_point_count();
239+
}
240+
241+
template <typename T>
242+
void
243+
ValueCache<T>::clear()
244+
{
245+
_location_data.clear();
246+
_kd_tree = nullptr;
247+
}
248+
249+
template <typename T>
250+
void
251+
ValueCache<T>::rebuildTree()
252+
{
253+
if (_location_data.empty())
254+
return;
255+
256+
// reset bounding box (must be done before the tree is built)
257+
_bbox.resize(_in_dim);
258+
const auto & location0 = _location_data[0].first;
259+
for (const auto i : make_range(_in_dim))
260+
_bbox[i] = {location0[i], location0[i]};
261+
262+
for (const auto & pair : _location_data)
263+
{
264+
const auto & location = pair.first;
265+
for (const auto i : make_range(_in_dim))
266+
_bbox[i] = {std::min(_bbox[i].first, location[i]), std::max(_bbox[i].second, location[i])};
267+
}
268+
269+
// build kd-tree
270+
_kd_tree = std::make_unique<KdTreeT>(_in_dim, *this, _max_leaf_size);
271+
mooseAssert(_kd_tree != nullptr, "KDTree was not properly initialized.");
272+
}
273+
274+
template <typename T>
275+
std::size_t
276+
ValueCache<T>::kdtree_get_point_count() const
277+
{
278+
return _location_data.size();
279+
}
280+
281+
template <typename T>
282+
Real
283+
ValueCache<T>::kdtree_get_pt(const std::size_t idx, const std::size_t dim) const
284+
{
285+
return _location_data[idx].first[dim];
286+
}
287+
288+
template <typename T>
289+
template <class BBOX>
290+
bool
291+
ValueCache<T>::kdtree_get_bbox(BBOX & bb) const
292+
{
293+
if (_location_data.empty())
294+
return false;
295+
296+
// return the bounding box incrementally built upon insertion
297+
for (const auto i : make_range(_in_dim))
298+
bb[i] = {_bbox[i].first, _bbox[i].second};
299+
return true;
300+
}
301+
302+
template <typename T>
303+
inline void
304+
dataStore(std::ostream & stream, ValueCache<T> & c, void * context)
305+
{
306+
storeHelper(stream, c._location_data, context);
307+
}
308+
309+
template <typename T>
310+
inline void
311+
dataLoad(std::istream & stream, ValueCache<T> & c, void * context)
312+
{
313+
loadHelper(stream, c._location_data, context);
314+
c.rebuildTree();
315+
}

0 commit comments

Comments
 (0)