Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deep JWT obfuscation 2 #157

Merged
merged 26 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ jobs:
run: |
python -m pip install --upgrade pip
python -m pip install --requirement requirements.txt
python download_data.py --data_dir data
python download_data.py --data_dir data --jobs $(nproc)
python review_data.py &>review_base.txt
ansi2html --style 'pre {font-family: monospace; font-size: large}' <review_base.txt >review_base.html

Expand Down
8 changes: 5 additions & 3 deletions benchmark/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,17 +284,17 @@ def check_line_from_meta(self,
print(f"WARNING: check meta value start-end {row}", flush=True)
continue

code = (project_id, file_id, row.LineStart, row.LineEnd, row.ValueStart, row.ValueEnd, rule)
code = (data_path, row.LineStart, row.LineEnd, row.ValueStart, row.ValueEnd, rule)
if code in self.line_checker:
self.result_cnt -= 1
if 'T' == row.GroundTruth:
print(f"WARNING: Already checked True! Duplicate? {code}", flush=True)
return LineStatus.CHECKED, project_id, file_id
return LineStatus.CHECKED, project_id, file_name
else:
self.line_checker.add(code)

for meta_rule in row.Category.split(':'):
# increase the counter only for corresponded rule metioned in markup
# increase the counter only for corresponded rule mentioned in markup
if meta_rule == rule:
if 'T' == row.GroundTruth:
self._increase_result_dict_cnt(meta_rule, True)
Expand Down Expand Up @@ -340,6 +340,8 @@ def analyze_result(self) -> None:
false_cnt = value.false_cnt
total_true_cnt, total_false_cnt = self._get_total_true_false_count(rule)
result = Result(true_cnt, false_cnt, total_true_cnt, total_false_cnt)
if rule not in self.rules_markup_counters:
self.rules_markup_counters[rule] = (0, 0, 0)
rows.append([
rule,
self.rules_markup_counters[rule][0],
Expand Down
42 changes: 34 additions & 8 deletions download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,10 @@ def move_files(temp_dir, dataset_dir):
return missing_repos


CHARS4RAND = (string.digits + string.ascii_lowercase + string.ascii_uppercase).encode("ascii")
CHARS4RAND = (string.ascii_lowercase + string.ascii_uppercase).encode("ascii")
DIGITS = string.digits.encode("ascii")
# 0 on first position may break json e.g. "id":123, -> "qa":038, which is incorrect json
DIGITS4RAND = DIGITS[1:]
CHARS4OBF = {ord(x) for x in string.ascii_lowercase + string.ascii_uppercase if
x not in "falsetrun"}


def obfuscate_jwt(value: str) -> str:
Expand All @@ -234,20 +232,48 @@ def obfuscate_jwt(value: str) -> str:
decoded = base64.b64decode(value, validate=True)
new_json = bytearray(len(decoded))
backslash = False
for n, i in enumerate(decoded):
n = 0
while len(decoded) > n:
if backslash:
new_json[n] = 0x3F # ord('?')
backslash = False
n += 1
continue
if i in DIGITS:
if decoded[n] in b'nft"':
reserved_word_found = False
for wrd in [
# reserved words in JSON
b"null", b"false", b"true",
# trigger words from CredSweeper filter ValueJsonWebTokenCheck
b'"alg":', b'"apu":', b'"apv":', b'"aud":', b'"b64":', b'"crit":', b'"crv":', b'"cty":', b'"d":',
b'"dp":', b'"dq":', b'"e":', b'"enc":', b'"epk":', b'"exp":', b'"ext":', b'"iat":', b'"id":', b'"iss":',
b'"iv":', b'"jku":', b'"jti":', b'"jwk":', b'"k":', b'"key_ops":', b'"keys":', b'"kid":', b'"kty":',
b'"n":', b'"nbf":', b'"nonce":', b'"oth":', b'"p":', b'"p2c":', b'"p2s":', b'"password":', b'"ppt":',
b'"q":', b'"qi":', b'"role":', b'"secret":', b'"sub":', b'"svt":', b'"tag":', b'"token":', b'"typ":',
b'"url":', b'"use":', b'"x":', b'"x5c":', b'"x5t":', b'"x5t#S256":', b'"x5u":', b'"y":', b'"zip":'
]:
# safe words to keep JSON structure (false, true, null)
# and important JWT ("alg", "type", ...)
if decoded[n:n + len(wrd)] == wrd:
end_pos = n + len(wrd)
while n < end_pos:
new_json[n] = decoded[n]
n += 1
reserved_word_found = True
break
if reserved_word_found:
continue
# any other data will be obfuscated
if decoded[n] in DIGITS:
new_json[n] = random.choice(DIGITS4RAND)
elif i in CHARS4OBF:
elif decoded[n] in CHARS4RAND:
new_json[n] = random.choice(CHARS4RAND)
elif '\\' == i:
elif '\\' == decoded[n]:
new_json[n] = 0x3F # ord('?')
backslash = True
else:
new_json[n] = i
new_json[n] = decoded[n]
n += 1

encoded = base64.b64encode(new_json, altchars=b"-_").decode("ascii")
while len(encoded) > len_value:
Expand Down
4 changes: 2 additions & 2 deletions meta/1ce69180.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
703,9f28279e,GitHub,1ce69180,data/1ce69180/src/9f28279e.rb,39,39,F,F,,,F,F,,,,,0,0,F,F,F,Bitbucket Client ID
850,9f28279e,GitHub,1ce69180,data/1ce69180/src/9f28279e.rb,164,164,F,F,,,F,F,,,,,0,0,F,F,F,Bitbucket Client ID
851,560ba91b,GitHub,1ce69180,data/1ce69180/src/560ba91b.rb,54,54,F,F,,,F,F,,,,,0,0,F,F,F,Auth
852,1e5305b3,GitHub,1ce69180,data/1ce69180/src/1e5305b3.rb,39,39,T,F,88,328,F,F,,,,,0,0,F,F,F,JSON Web Token
852,1e5305b3,GitHub,1ce69180,data/1ce69180/src/1e5305b3.rb,39,39,F,F,88,328,F,F,,,,,0,0,F,F,F,JSON Web Token
853,0de86720,GitHub,1ce69180,data/1ce69180/src/0de86720.rb,545,545,F,F,,,F,F,,,,,0,0,F,F,F,Secret
910,7cec5529,GitHub,1ce69180,data/1ce69180/src/7cec5529.rb,172,172,F,F,,,F,F,,,,,0,0,F,F,F,Bitbucket Client ID
1818,e7c55fa4,GitHub,1ce69180,data/1ce69180/src/e7c55fa4.rb,35,35,F,F,,,F,F,,,,,0,0,F,F,F,Secret
Expand Down Expand Up @@ -628,4 +628,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
114064,5e754a57,GitHub,1ce69180,data/1ce69180/src/5e754a57.rb,22,22,T,F,44,76,F,F,,,,,0.00,,F,F,F,Auth
114065,5e754a57,GitHub,1ce69180,data/1ce69180/src/5e754a57.rb,30,30,T,F,44,74,F,F,,,,,0.00,,F,F,F,Auth
114066,fcf0ceac,GitHub,1ce69180,data/1ce69180/src/fcf0ceac.txt,2,2,F,F,,,F,F,,,,,0.00,,F,F,F,Auth
131923,1e5305b3,GitHub,1ce69180,data/1ce69180/src/1e5305b3.rb,36,36,T,F,84,324,F,F,,,,,0.00,,F,F,F,JSON Web Token
131923,1e5305b3,GitHub,1ce69180,data/1ce69180/src/1e5305b3.rb,36,36,F,F,84,324,F,F,,,,,0.00,,F,F,F,JSON Web Token
Loading
Loading