Skip to content

Commit 92b5ad3

Browse files
Hook RAPPOR up to Metis
1 parent 241cfe3 commit 92b5ad3

File tree

3 files changed

+77
-30
lines changed

3 files changed

+77
-30
lines changed

proxy.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ type Website struct {
3333
Domain string `json:"domain,omitempty"`
3434
}
3535

36+
type SvrMsg struct {
37+
Cohort int `json:"cohort,omitempty"`
38+
Reports []int `json:"reports,omitempty"`
39+
}
40+
3641
var client = &http.Client{
3742
Transport: &http.Transport{
3843
Dial: (&net.Dialer{
@@ -51,6 +56,7 @@ var client = &http.Client{
5156

5257
var transport string
5358
var hmacSecret string
59+
var cohort int
5460

5561
/*
5662
Domains Metis is reasonably certain are censored are stored here.
@@ -70,7 +76,7 @@ func contains(slice []string, s string) bool {
7076
}
7177

7278
func isBlocked(url *url.URL) (bool) {
73-
return contains(blockedDomains, url.Hostname()) || contains(tempBlockedDomains, url.Hostname())
79+
return contains(blockedDomains, url.Hostname()) || contains(tempBlockedDomains, url.Hostname())
7480
}
7581

7682
func remove(s []string, e string) []string {
@@ -155,14 +161,15 @@ func updateMasterList() error {
155161
s.init(p)
156162
var e Encoder
157163

158-
e.init(p, 1, hmacSecret, &s)
164+
e.init(p, cohort, hmacSecret, &s)
159165
var rappor []int
160166
for i := 0; i < len(blockedDomains); i++ {
161-
fmt.Println("Blocked domains: ", blockedDomains)
162-
fmt.Println(e.Encode([]byte(blockedDomains[i])))
167+
//fmt.Println("Blocked domains[i]: ", blockedDomains[i])
168+
//fmt.Println(e.Encode([]byte(blockedDomains[i])))
163169
rappor = append(rappor, e.Encode([]byte(blockedDomains[i])))
164170
}
165-
err := json.NewEncoder(&buf).Encode(rappor)
171+
svrMsg := SvrMsg{cohort, rappor}
172+
err := json.NewEncoder(&buf).Encode(svrMsg)
166173
if err != nil {
167174
return err
168175
}
@@ -524,6 +531,8 @@ func main() {
524531
log.Println("Starting Metis proxy....")
525532
//Set the HMAC secret for generating the PRRs in RAPPOR
526533
generateSecret()
534+
//TODO: Have the server tell the client what cohort it belongs to?
535+
cohort=1
527536
//Ask the master server for the blocked list
528537
if getBlockedList() != nil {
529538
log.Println("Error getting blocked list, starting with empty blocked list!")

server/rappor_analysis.py

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import sys
3636
import numpy as np
3737
import time
38+
import statsmodels.api as sm
3839

3940
from random import SystemRandom
4041
from sklearn import linear_model
@@ -401,9 +402,10 @@ def getDomains(numDomains):
401402
domains.append(line.rstrip())
402403
return domains[0:numDomains]
403404

404-
def getBloomFilters(domains, numCohorts, numHashes, numBloomBits):
405+
def getBloomFilters(domains, cohorts, numHashes, numBloomBits):
406+
"""cohorts is a list of the cohorts that were reported this time"""
405407
blooms = {}
406-
for m in range(0, numCohorts):
408+
for m in cohorts:
407409
bloomsForCohort = []
408410
for d in domains:
409411
bits = get_bloom_bits(d, m, numHashes, numBloomBits)
@@ -414,43 +416,51 @@ def getBloomFilters(domains, numCohorts, numHashes, numBloomBits):
414416
#print(blooms)
415417
return blooms
416418

417-
def makeDesignMatrix(params, numDomains):
419+
def makeDesignMatrix(params, cohorts, numDomains):
420+
""""cohorts is a list of the cohorts that were reported this time"""
418421
k = params.num_bloombits #number of bits in Bloom filter
419-
m = params.num_cohorts
422+
m = len(cohorts)
420423
#M is number of candidate strings
421424
#h is the number of hash functions per cohort
422425
domains = getDomains(numDomains)
423-
blooms = getBloomFilters(domains, params.num_cohorts, params.num_hashes, params.num_bloombits)
426+
blooms = getBloomFilters(domains, cohorts, params.num_hashes, params.num_bloombits)
424427

425428
X = np.zeros([k*m, numDomains])
426429

427430
for cohort in blooms:
428431
#print("Cohort: ", cohort)
429432
domainColumn = 0
433+
rowChunk = 0
430434
for domain in blooms[cohort]:
431435
#domain is a list of the bits that need to be set
432436
for bitToSet in domain:
433-
X[cohort*k+bitToSet, domainColumn] = 1
437+
X[rowChunk*k+bitToSet, domainColumn] = 1
434438
domainColumn+=1
439+
rowChunk += 1
435440
#print(X)
436441
return X, domains
437442

438443
def doLassoRegression(params, numDomains, reports):
439-
X, domains = makeDesignMatrix(params, numDomains)
444+
#reports is supposed to be a dictionary whose keys are the cohort number and whose values are the list of reports from that cohort
445+
#This needs to be rethought because every time we run this code, we're only using one cohort
446+
#Need to redesign on client side to send cohort #
447+
#So server needs to send cohort # when client first requests blocked list
448+
X, domains = makeDesignMatrix(params, reports.keys(), numDomains)
440449
Y_list = []
441-
for i in range(0, params.num_cohorts):
442-
Y_j = estimateSetBits(reports[i], params)
450+
for key in reports:
451+
Y_j = estimateSetBits(reports[key], params)
443452
Y_list.append(Y_j)
444453
Y = np.array(Y_list)
445454
Y = Y.flatten()
446455

456+
print("********X_shape: ", X.shape, ", y-shape: ", Y.shape)
447457
linreg = linear_model.LassoCV(n_alphas=10, cv=10)
448458
linreg.fit(X,Y)
449459
print("Coefficients: ", linreg.coef_)
450460
print("Alpha: ", linreg.alpha_)
451461
return X, Y, linreg, domains
452462

453-
def doLinearRegression(X, Y):
463+
def doLinearRegression(X, Y, M):
454464
X = sm.add_constant(X)
455465
ols = sm.OLS(Y,X)
456466
results = ols.fit()
@@ -482,19 +492,24 @@ def readRapporReports(filename):
482492
reportFile.close()
483493
return reps
484494

485-
def analyzeReports(filename, params, numDomains):
486-
reps = readRapporReports()
495+
def analyzeReports(reps, params, numDomains):
487496
lassX, lassY, lassoReg, domains = doLassoRegression(params, numDomains, reps)
488497
relevantDomains = np.where(lassoReg.coef_!=0)[0]
489498
linX = lassX[:,relevantDomains]
499+
print("domains: ", domains, ", relevantDomains: ", relevantDomains)
490500
#Keep track of the domains the LASSO selected
491-
linDoms = domains[relevantDomains]
492-
#Confirm that the domains LASSO selected are relevant using linear regression
493-
#This may prune out even more domains than LASSO did
494-
finalRelevantIdxs = doLinearRegression(linX, lassY)
495-
return linDoms[finalRelevantIdxs]
501+
if len(relevantDomains) != 0:
502+
doms = np.array(domains)
503+
rels = np.array(relevantDomains)
504+
linDoms = doms[rels]
505+
#Confirm that the domains LASSO selected are relevant using linear regression
506+
#This may prune out even more domains than LASSO did
507+
finalRelevantIdxs = doLinearRegression(linX, lassY, numDomains)
508+
return linDoms[finalRelevantIdxs]
509+
else:
510+
return []
496511

497-
def main():
512+
"""def main():
498513
rapporRepsFile = "rappor_reports.txt"
499514
numDomains = 10
500515
params = Params(prob_f=0.2)
@@ -517,7 +532,7 @@ def main():
517532
mbl.write(d+"\r\n")
518533
519534
520-
535+
"""
521536

522537

523538

server/server.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
from flask import Flask, request
55
from flask_restful import Resource, Api, reqparse
6-
from json import dumps
76

7+
import json
88
import sqlite3
99

10+
import rappor_analysis as rappor
11+
1012
class Sql():
1113
def __init__(self, dbName):
1214
self.conn = sqlite3.connect(dbName)
@@ -18,33 +20,54 @@ def __init__(self, dbName):
1820
self.conn.commit()
1921

2022
def populateDummyData(self):
21-
# SQL command to insert the data in the table
2223
sql_command = """INSERT OR IGNORE INTO domains VALUES ("google.com");"""
2324
self.crsr.execute(sql_command)
25+
sql_command = """INSERT OR IGNORE INTO domains VALUES ("facebook.com");"""
26+
self.crsr.execute(sql_command)
27+
self.conn.commit()
28+
29+
def addToDB(self, domain):
30+
cmd = "INSERT OR IGNORE INTO domains VALUES (\""+domain+"\");"
31+
self.crsr.execute(cmd)
2432
self.conn.commit()
2533

2634
def getBlockedList(self):
2735
cmd = "SELECT * FROM domains;"
2836
self.crsr.execute(cmd)
2937
return self.crsr.fetchall()
38+
39+
def clearDB(self):
40+
cmd = "DELETE FROM domains;"
41+
self.crsr.execute(cmd)
42+
self.conn.commit()
3043

3144

3245
class BlockedList(Resource):
3346
def __init__(self):
3447
self.sql = Sql("master_blocked_list.db")
48+
#self.sql.clearDB()
3549
self.sql.populateDummyData()
3650

3751
def get(self):
3852
print("Request for blocked list made by client: ")
39-
blocked = self.sql.getBlockedList()
40-
print(blocked)
41-
#return [{'Domain':'Earth'}]
53+
sqlBlocked = self.sql.getBlockedList()
54+
blocked = []
55+
for dom in sqlBlocked:
56+
blocked.append({'Domain':dom[0]})
4257
return blocked
4358

4459
def post(self):
4560
print("Client sent RAPPOR things to us: ")
4661
data = request.data
47-
print "Data: ", data[0]
62+
print(data)
63+
cliMsg = json.loads(data)
64+
print("Client message is ", cliMsg)
65+
reps = {cliMsg["cohort"]:cliMsg["reports"]}
66+
numDomains = 10
67+
params = rappor.Params(prob_f=0.2)
68+
doms = rappor.analyzeReports(reps, params, numDomains)
69+
for d in doms:
70+
self.sql.addToDB(d)
4871
return '', 200
4972

5073
def main():

0 commit comments

Comments
 (0)