Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On-manifold noise for Concentric Spheres (Inferred) #3

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions src/datagen/synthetic/multiple/concentricspheres.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, N=1000, num_neg=None, n=100, k=2, D=2.0, max_norm=5.0, bp=1.8
sigma=5, seed=42, r=10.0, g=10.0, x_ck=None, rotation=None, translation=None,\
normalize=True, norm_factor=None, gamma=0.5, anchor=None, online=False,\
off_online=False, augment=False, inferred=False, nn=None, buffer_nbhrs=2,\
max_t_delta=1e-3, recomp_tn=False, use_new_knn=False, cache_dir="/tmp", **kwargs):
max_t_delta=1e-3, recomp_tn=False, use_new_knn=False, cache_dir="/tmp", on_mfld_noise=0, **kwargs):
"""
:param N: number of samples in the dataset
:type N: int
Expand Down Expand Up @@ -103,6 +103,8 @@ def __init__(self, N=1000, num_neg=None, n=100, k=2, D=2.0, max_norm=5.0, bp=1.8
:type use_new_knn: bool
:param cache_dir: directory to cache auxillary attributes in order to free RAM
:type cache_dir: str
:param on_mfld_noise: magnitude of maximum on-manifold noise to be added when `inferred == True`
:type on_mfld_noise: float
"""

if seed is not None: seed_everything(seed)
Expand Down Expand Up @@ -174,6 +176,9 @@ def __init__(self, N=1000, num_neg=None, n=100, k=2, D=2.0, max_norm=5.0, bp=1.8
### only relevant when `self.inferred == True`###

self.avoid_io = True # generate points without writing intermediate steps to disk

self.on_mfld_noise = on_mfld_noise
self.on_mfld_noise_mat = None

self.all_points_trivial_ = None
self.all_points_tr_ = None
Expand Down Expand Up @@ -271,6 +276,13 @@ def _collect_on_mfld_k(self):
if self.N < 1e+7:
self.on_mfld_pts_trivial_ = np.zeros((self.num_pos, self.n))
self.on_mfld_pts_trivial_[:, :self.k] = self.on_mfld_pts_k_
if self.on_mfld_noise == 0:
self.on_mfld_noise_mat = 0
else:
self.on_mfld_noise_mat = np.random.normal(self.mu, self.sigma, size=self.on_mfld_pts_trivial_.shape)
self.on_mfld_noise_mat /= np.linalg.norm(self.on_mfld_noise_mat, axis=1, ord=2).reshape(-1, 1)
self.on_mfld_noise_mat *= np.random.uniform(0, self.on_mfld_noise, size=self.on_mfld_noise_mat.shape[0]).reshape(-1, 1)
self.on_mfld_pts_trivial_ += self.on_mfld_noise_mat

def _inf_setup(self):
"""setting up data for off manifold samples when computing inferred manifold"""
Expand All @@ -297,6 +309,7 @@ def find_knn(self, X, use_new=False):
if self.on_mfld_pts_trivial_ is None:
to_fit = np.zeros((self.N, self.n))
to_fit[:, self.k] = self.on_mfld_pts_k_
to_fit += self.on_mfld_noise_mat
logger.info("[ConcentricSpheres]: fitting knn...")
self.knn.fit(to_fit)
logger.info("[ConcentricSpheres]: knn fit done")
Expand Down Expand Up @@ -332,6 +345,7 @@ def make_inferred_off_mfld2(self, pp_chunk_size=50000):
if self.on_mfld_pts_trivial_ is None:
X = np.zeros((self.N, self.n))
X[:, self.k] = self.on_mfld_pts_k_
X += self.on_mfld_noise_mat
self.nn_distances, self.nn_indices = self.find_knn(X, use_new=False)
else:
self.nn_distances, self.nn_indices = self.find_knn(self.on_mfld_pts_trivial_, use_new=False)
Expand Down Expand Up @@ -364,9 +378,10 @@ def make_inferred_off_mfld2(self, pp_chunk_size=50000):
if self.on_mfld_pts_trivial_ is None:
on_mfld_pts = np.zeros((pp_chunk_size, self.n))
on_mfld_pts[:, :self.k] = self.on_mfld_pts_k_[i:i+pp_chunk_size]
on_mfld_pts += self.on_mfld_noise_mat[i:i+pp_chunk_size]

nbhrs = np.zeros((pp_chunk_size, nbhr_indices.shape[1], self.n))
nbhrs[:, :, :self.k] = self.on_mfld_pts_k_[nbhr_indices]
nbhrs[:, :, :self.k] = self.on_mfld_pts_k_[nbhr_indices] + self.on_mfld_noise_mat[nbhr_indices]
else:
on_mfld_pts = self.on_mfld_pts_trivial_[i:i+pp_chunk_size]
nbhrs = self.on_mfld_pts_trivial_[nbhr_indices]
Expand Down
3 changes: 2 additions & 1 deletion src/pipeline/data_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ def inf_conc_spheres_cfg():
"max_t_delta": 1e-3,
"recomp_tn": False,
"gamma": 0,
"cache_dir": "../../data_cache/train/"
"cache_dir": "../../data_cache/train/",
"on_mfld_noise": 0.0
}

val_cfg_dict = copy.deepcopy(train_cfg_dict)
Expand Down