Skip to content

Commit 717c85b

Browse files
committed
Fixing inverseLocate so it hopefully works now. I was advancing on the wrong thing in directInverseLocate, and my upper bound on permissible inputs in inverseLocate was super-wrong (because there's lots of space between texts in string coordinate space)
1 parent 4553f30 commit 717c85b

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

rlcsa.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,8 @@ RLCSA::locate(usint index, bool steps) const
375375
usint
376376
RLCSA::inverseLocate(usint location) const
377377
{
378-
if(!(this->support_locate) || location >= this->data_size) { return this->data_size; }
378+
if(!(this->support_locate)) { return this->data_size; }
379+
// TODO: Check for out-of-bounds locations somehow.
379380

380381
// Inverse-locate the given location in BWT space, and convert back to SA
381382
// space before returning.
@@ -434,23 +435,43 @@ RLCSA::directLocate(usint index, bool steps) const
434435
usint
435436
RLCSA::directInverseLocate(usint location) const
436437
{
437-
// Get the SA value and text location (in that order) of the last SA sample
438+
439+
std::cout << "Dumping SA" << std::endl;
440+
441+
for(usint i = 0; i <= this->data_size; i++)
442+
{
443+
if(this->sa_samples->isSampled(i))
444+
{
445+
std::cout << "SA[" << i << "] = " << this->sa_samples->getSampleAt(i) << std::endl;
446+
}
447+
}
448+
449+
std::cout << "Un-locating " << location << std::endl;
450+
// Get the SA value and SA index (in that order) of the last SA sample
438451
// before the given text location.
439452
pair_type last_sample = this->sa_samples->inverseSA(location);
440453

441454
// TODO: catch the (size, size) sentinel.
442-
443-
while(last_sample.second != location) {
455+
std::cout << "SA[" << last_sample.second << "] = " << last_sample.first << std::endl;
456+
while(last_sample.first < location) {
444457
// We're not at the desired text location, so we must be before it.
445458

446-
// Advance the text location by 1
447-
last_sample.second += 1;
459+
// Advance the text location (SA value) by 1
460+
last_sample.first += 1;
448461

449-
// Advance the SA position to that corresponding to the next character. Note
462+
// Advance the SA index to that corresponding to the next character. Note
450463
// that psi returns BWT coordinates, so we have to convert back to SA
451464
// coordinates.
452-
last_sample.first = (this->psi(last_sample.first) -
465+
last_sample.second = (this->psi(last_sample.second) -
453466
this->number_of_sequences);
467+
468+
std::cout << "SA[" << last_sample.second << "] = " << last_sample.first << std::endl;
469+
}
470+
471+
if(last_sample.first != location) {
472+
// We managed to start on the wrong side of what we're trying to locate;
473+
// inverseSA lied to us.
474+
throw "Somehow skipped desired inverse locate position";
454475
}
455476

456477
// Return the answer in BWT coordinates. It will probably be immediately

sasamples.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class SASamples
5656
void writeTo(FILE* sample_file) const;
5757

5858
// Returns (i, inverseSA(i)) such that i is the last sampled position up to value.
59+
// The return value can also be thought of as (SA[j], j).
5960
// Value is actual 0-based suffix array value.
6061
// Returns (size, size) if value is too large.
6162
pair_type inverseSA(usint value) const;

0 commit comments

Comments
 (0)