Skip to content

Commit a26dc45

Browse files
committed
Probabilistic properties fixed
Some probabilistic properties were refined. Added a-priori-fpp (for both the entire filter and each subset), added a-priori-isep (inter-set errors probability). Flotation feature was removed.
1 parent 6708637 commit a26dc45

File tree

4 files changed

+160
-35
lines changed

4 files changed

+160
-35
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Spatial Bloom Filters have been first proposed for use in location-privacy appli
99

1010
The libSBF-cpp repository contains the C++ implementation of the SBF data structure. The SBF class is provided, as well as various methods for managing the filter:
1111
- once the filter is constructed, the user can insert elements into it through the `Insert` method. The `Check` method, on the contrary, is used to verify weather an element belongs to one of the mapped sets.
12-
- methods `SetAreaFpp`, `GetFilterSparsity`, `GetFilterFpp`, `GetAreaEmersion` and `GetAreaFlotation` allow to compute and return several probabilistic properties of the constructed filter.
12+
- methods `SetAreaFpp`, `GetFilterSparsity`, `GetFilterFpp` and `GetAreaEmersion` allow to compute and return several probabilistic properties of the constructed filter.
1313
- finally, two methods are provided to print out the filter: `PrintFilter` prints the filter and related statistics to the standard output whereas `SaveToDisk` writes the filter onto a CSV file.
1414

1515
For more details on the implementation, and how to use the library please refer to the [homepage](http://sbf.csr.unibo.it/ "SBF project homepage") of the project.

sbf.cpp

Lines changed: 88 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ void SBF::PrintFilter(int mode)
265265
printf("Number of cells: %d\n",this->cells);
266266
printf("Size in Bytes: %d\n",this->size);
267267
printf("Filter sparsity: %.5f\n",this->GetFilterSparsity());
268+
printf("Filter a-priori fpp: %.5f\n", this->GetFilterAPrioriFpp());
268269
printf("Filter fpp: %.5f\n",this->GetFilterFpp());
269270
printf("Number of mapped elements: %d\n",this->members);
270271
printf("Number of hash collisions: %d\n",this->collisions);
@@ -299,10 +300,9 @@ void SBF::PrintFilter(int mode)
299300
printf("\n");
300301
}
301302

302-
printf("\nEmersion and Fpp:\n");
303+
printf("\nEmersion, Fpp, Isep:\n");
303304
for(int j = 1; j < this->AREA_number+1; j++){
304-
if(this->GetAreaFlotation(j)) printf("Area %d: emersion %.5f, flotation safe, fpp %.5f",j,this->GetAreaEmersion(j),this->AREA_fpp[j]);
305-
else printf("Area %d: emersion %.5f, flotation unsafe, fpp %.5f",j,this->GetAreaEmersion(j),this->AREA_fpp[j]);
305+
printf("Area %d: emersion %.5f, a-priori fpp %.5f, fpp %.5f, a-priori isep %.5f",j,this->GetAreaEmersion(j),this->AREA_a_priori_fpp[j],this->AREA_fpp[j],this->AREA_a_priori_isep[j]);
306306
printf("\n");
307307
}
308308
printf("\n");
@@ -333,11 +333,13 @@ void SBF::SaveToDisk(std::string path, int mode)
333333
myfile << "members" << ";" << this->members << std::endl;
334334
myfile << "collisions" << ";" << this->collisions << std::endl;
335335
myfile << "sparsity" << ";" << this->GetFilterSparsity() << std::endl;
336+
myfile << "a-priori fpp" << ";" << this->GetFilterAPrioriFpp() << std::endl;
336337
myfile << "fpp" << ";" << this->GetFilterFpp() << std::endl;
337338
// area-related parameters:
338-
// area,members,self-collisions,cells,emersion,flotation,fpp
339+
// area,members,self-collisions,cells,emersion,a-priori fpp,fpp,a-priori isep
340+
myfile << "area" << ";" << "members" << ";" << "self-collisions" << ";" << "cells" << ";" << "emersion" << ";" << "a-priori fpp" << ";" << "fpp" << ";" << "a-priori isep" << std::endl;
339341
for(int j = 1; j < this->AREA_number+1; j++){
340-
myfile << j << ";" << this->AREA_members[j] << ";" << this->AREA_self_collisions[j] << ";" << this->AREA_cells[j] << ";" << this->GetAreaEmersion(j) << ";" << this->GetAreaFlotation(j) << ";" << this->AREA_fpp[j] << std::endl;
342+
myfile << j << ";" << this->AREA_members[j] << ";" << this->AREA_self_collisions[j] << ";" << this->AREA_cells[j] << ";" << this->GetAreaEmersion(j) << ";" << this->AREA_a_priori_fpp[j] << ";" << this->AREA_fpp[j] << ";" << this->AREA_a_priori_isep[j] << std::endl;
341343
}
342344

343345
}
@@ -486,10 +488,61 @@ int SBF::Check(char *string, int size)
486488
}
487489

488490

491+
// Computes a-priori area-specific inter-set error probability (a_priori_isep)
492+
void SBF::SetAPrioriAreaIsep()
493+
{
494+
double p1;
495+
int nfill;
496+
497+
498+
for (int i = this->AREA_number; i>0; i--) {
499+
nfill = 0;
500+
501+
for (int j = i+1; j <= this->AREA_number; j++) {
502+
nfill += this->AREA_members[j];
503+
}
504+
505+
p1 = (double)(1 - 1 / (double)this->cells);
506+
p1 = (double)(1 - (double)pow(p1, this->HASH_number*nfill));
507+
p1 = (double)pow(p1, this->HASH_number);
508+
509+
this->AREA_a_priori_isep[i] = (float)p1;
510+
511+
}
512+
}
513+
514+
515+
// Computes a-priori area-specific false positives probability (a_priori_fpp)
516+
void SBF::SetAPrioriAreaFpp()
517+
{
518+
double p;
519+
int c;
520+
521+
for (int i = this->AREA_number; i>0; i--) {
522+
c = 0;
523+
524+
for (int j = i; j <= this->AREA_number; j++) {
525+
c += this->AREA_members[j];
526+
}
527+
528+
p = (double)(1 - 1 / (double)this->cells);
529+
p = (double)(1 - (double)pow(p, this->HASH_number*c));
530+
p = (double)pow(p, this->HASH_number);
531+
532+
this->AREA_a_priori_fpp[i] = (float)p;
533+
534+
for (int j = i; j<this->AREA_number; j++) {
535+
this->AREA_a_priori_fpp[i] -= this->AREA_a_priori_fpp[j + 1];
536+
}
537+
if (AREA_a_priori_fpp[i]<0) AREA_a_priori_fpp[i] = 0;
538+
}
539+
}
540+
541+
489542
// Computes a-posteriori area-specific false positives probability (fpp)
490543
void SBF::SetAreaFpp()
491544
{
492-
float p;
545+
double p;
493546
int c;
494547

495548
for(int i = this->AREA_number; i>0; i--){
@@ -499,8 +552,10 @@ void SBF::SetAreaFpp()
499552
c += this->AREA_cells[j];
500553
}
501554

502-
p = (float)c/(float)this->cells;
503-
this->AREA_fpp[i] = (float)pow(p,this->HASH_number);
555+
p = (double)c/(double)this->cells;
556+
p = (double)pow(p,this->HASH_number);
557+
558+
this->AREA_fpp[i] = (float)p;
504559

505560
for(int j=i; j<this->AREA_number; j++){
506561
this->AREA_fpp[i] -= this->AREA_fpp[j+1];
@@ -510,6 +565,13 @@ void SBF::SetAreaFpp()
510565
}
511566

512567

568+
// Returns the number of inserted elements for the input area
569+
int SBF::GetAreaMembers(int area)
570+
{
571+
return this->AREA_members[area];
572+
}
573+
574+
513575
// Returns the sparsity of the entire SBF
514576
float SBF::GetFilterSparsity()
515577
{
@@ -524,21 +586,35 @@ float SBF::GetFilterSparsity()
524586
}
525587

526588

589+
// Returns the a-priori false positive probability over the entire filter
590+
// (i.e. not area-specific)
591+
float SBF::GetFilterAPrioriFpp()
592+
{
593+
double p;
594+
595+
p = (double)(1 - 1 / (double)this->cells);
596+
p = (double)(1 - (double)pow(p, this->HASH_number*this->members));
597+
p = (double)pow(p, this->HASH_number);
598+
599+
return (float)p;
600+
}
601+
602+
527603
// Returns the a-posteriori false positive probability over the entire filter
528604
// (i.e. not area-specific)
529605
float SBF::GetFilterFpp()
530606
{
531-
float p,fpp;
607+
double p;
532608
int c = 0;
533609
// Counts non-zero cells
534610
for(int i = 1; i < this->AREA_number+1; i++){
535611
c += this->AREA_cells[i];
536612
}
537-
p = (float)c/(float)this->cells;
613+
p = (double)c/(double)this->cells;
538614

539-
fpp = (float)(pow(p,this->HASH_number));
615+
p = (double)(pow(p,this->HASH_number));
540616

541-
return fpp;
617+
return (float)p;
542618
}
543619

544620
// Returns the emersion value for the input area
@@ -555,17 +631,6 @@ float SBF::GetAreaEmersion(int area)
555631
}
556632

557633

558-
// Returns the flotation value for the input area. TRUE if it is not possible
559-
// for an element belonging to the input area to be recognized as belonging to a
560-
// different area, FALSE if collisions may cause this to happen
561-
bool SBF::GetAreaFlotation(int area)
562-
{
563-
564-
if((this->AREA_members[area]==0) || (this->HASH_number==0)) return true;
565-
else{
566-
return (this->AREA_members[area]*this->HASH_number) - this->AREA_self_collisions[area] - this->AREA_cells[area] < this->HASH_number;
567-
}
568-
}
569634

570635

571636
} //namespace sbf

sbf.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ namespace sbf {
7070
int *AREA_members;
7171
int *AREA_cells;
7272
int *AREA_self_collisions;
73+
float *AREA_a_priori_fpp;
7374
float *AREA_fpp;
75+
float *AREA_a_priori_isep;
7476
int BIG_end;
7577

7678
// Private methods (commented in the sbf.cpp)
@@ -176,6 +178,8 @@ namespace sbf {
176178
this->AREA_cells = new int[this->AREA_number + 1];
177179
this->AREA_self_collisions = new int[this->AREA_number + 1];
178180
this->AREA_fpp = new float[this->AREA_number + 1];
181+
this->AREA_a_priori_fpp = new float[this->AREA_number + 1];
182+
this->AREA_a_priori_isep = new float[this->AREA_number + 1];
179183

180184
// Parameter initializations
181185
this->members = 0;
@@ -185,6 +189,8 @@ namespace sbf {
185189
this->AREA_cells[a] = 0;
186190
this->AREA_self_collisions[a] = 0;
187191
this->AREA_fpp[a] = -1;
192+
this->AREA_a_priori_fpp[a] = -1;
193+
this->AREA_a_priori_isep[a] = -1;
188194
}
189195
}
190196

@@ -197,6 +203,8 @@ namespace sbf {
197203
delete[] AREA_cells;
198204
delete[] AREA_self_collisions;
199205
delete[] AREA_fpp;
206+
delete[] AREA_a_priori_fpp;
207+
delete[] AREA_a_priori_isep;
200208
for (int j = 0; j<this->HASH_number; j++) {
201209
delete[] HASH_salt[j];
202210
}
@@ -209,11 +217,14 @@ namespace sbf {
209217
void SaveToDisk(std::string path, int mode);
210218
void Insert(char *string, int size, int area);
211219
int Check(char *string, int size);
220+
int GetAreaMembers(int area);
212221
float GetFilterSparsity();
213222
float GetFilterFpp();
223+
float GetFilterAPrioriFpp();
214224
void SetAreaFpp();
225+
void SetAPrioriAreaFpp();
226+
void SetAPrioriAreaIsep();
215227
float GetAreaEmersion(int area);
216-
bool GetAreaFlotation(int area);
217228
};
218229

219230
} //namespace sbf

test-app/test-app.cpp

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
3737
int main() {
3838

3939
std::ifstream myfile;
40-
std::string line, a, member;
40+
std::string line, a, member, path;
41+
std::ofstream rate_file;
4142
int len, line_count, area, area_check, n, narea, nver;
42-
int well_recognised, false_positives, exchanged_elements;
43+
int well_recognised, false_positives, iser;
44+
int* area_iser;
45+
int* area_fp;
4346
char* element;
4447
sbf::SBF* myFilter = NULL;
4548

@@ -221,7 +224,9 @@ int main() {
221224
}
222225

223226
//calculates filter's probabilistic properties
227+
myFilter->SetAPrioriAreaFpp();
224228
myFilter->SetAreaFpp();
229+
myFilter->SetAPrioriAreaIsep();
225230

226231
//prints filter to the standard output or saves it to disk
227232
if (print_mode == 1) myFilter->PrintFilter(0);
@@ -235,7 +240,11 @@ int main() {
235240

236241
//operates a self check upon the filter (i.e. runs the Check method for each
237242
//of the already mapped elements)
238-
well_recognised = 0, exchanged_elements = 0;
243+
well_recognised = 0, iser = 0;
244+
area_iser = new int[narea+1];
245+
for (int a = 0; a < narea + 1; a++) {
246+
area_iser[a] = 0;
247+
}
239248
myfile.open(construction_dataset.c_str());
240249

241250
if (myfile.is_open()) {
@@ -254,13 +263,30 @@ int main() {
254263

255264
if (area == area_check) well_recognised++;
256265
else {
257-
exchanged_elements++;
266+
iser++;
267+
area_iser[area]++;
258268
}
259269

260270
}
261-
printf("Well recognised: %d\n", well_recognised);
262-
printf("Elements assigned to a wrong set: %d\n", exchanged_elements);
263-
printf("Exchange rate: %.5f\n", (float)exchanged_elements / (float)n);
271+
printf("Elements assigned to the correct set: %d\n", well_recognised);
272+
printf("Inter-set errors: %d\n", iser);
273+
printf("Inter-set errors rate: %.5f\n", (float)iser / (float)n);
274+
275+
276+
if (print_mode == 3 || print_mode == 4) {
277+
path = "ise" + buf + ".csv";
278+
rate_file.open(path.c_str());
279+
rate_file.setf(std::ios_base::fixed, std::ios_base::floatfield);
280+
rate_file.precision(5);
281+
// area-related parameters:
282+
// area,inter-set errors,inter-set error rate
283+
rate_file << "area" << ";" << "errors" << ";" << "rate" << std::endl;
284+
for (int j = 1; j < narea + 1; j++) {
285+
rate_file << j << ";" << area_iser[j] << ";" << (float)area_iser[j] / (float)myFilter->GetAreaMembers(j) << std::endl;
286+
}
287+
rate_file.close();
288+
}
289+
264290
myfile.close();
265291
}
266292
else {
@@ -288,6 +314,10 @@ int main() {
288314

289315
//operates a verification using non members dataset
290316
well_recognised = 0, false_positives = 0;
317+
area_fp = new int[narea + 1];
318+
for (int a = 0; a < narea + 1; a++) {
319+
area_fp[a] = 0;
320+
}
291321
myfile.open(verification_dataset.c_str());
292322

293323
if (myfile.is_open()) {
@@ -301,12 +331,31 @@ int main() {
301331
memcpy(element, line.c_str(), len);
302332
area = myFilter->Check(element, len);
303333

304-
if (area == 0)well_recognised++;
305-
else false_positives++;
334+
if (area == 0) well_recognised++;
335+
else
336+
{
337+
false_positives++;
338+
area_fp[area]++;
339+
}
306340
}
307-
printf("Well recognised: %d\n", well_recognised);
341+
printf("True negatives: %d\n", well_recognised);
308342
printf("False positives: %d\n", false_positives);
309343
printf("False positives rate: %.5f\n", (float)false_positives / (float)nver);
344+
345+
if (print_mode == 3 || print_mode == 4) {
346+
path = "fp" + buf + ".csv";
347+
rate_file.open(path.c_str());
348+
rate_file.setf(std::ios_base::fixed, std::ios_base::floatfield);
349+
rate_file.precision(5);
350+
// area-related parameters:
351+
// area,false positives,false positives rate
352+
rate_file << "area" << ";" << "false positives" << ";" << "rate" << std::endl;
353+
for (int j = 1; j < narea + 1; j++) {
354+
rate_file << j << ";" << area_fp[j] << ";" << (float)area_fp[j] / (float)nver << std::endl;
355+
}
356+
rate_file.close();
357+
}
358+
310359
myfile.close();
311360
}
312361
else {

0 commit comments

Comments
 (0)