Skip to content

Commit c078530

Browse files
committed
clean up comments
1 parent 87bdeab commit c078530

File tree

2 files changed

+10
-47
lines changed

2 files changed

+10
-47
lines changed

lightbug_http/strings.mojo

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ struct BytesConstant:
3333
alias DOUBLE_CRLF = bytes(lineBreak + lineBreak)
3434

3535

36-
# RFC 9112 Section 2.2-2: US-ASCII character bounds
3736
alias US_ASCII_MAX = 0x7F
3837
alias ISO_8859_1_MAX = 0xFF
3938

@@ -79,20 +78,15 @@ fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raise
7978
Error: If the data contains invalid multi-byte sequences that could
8079
create security vulnerabilities.
8180
"""
82-
# Check each byte to ensure it's in a safe encoding superset of US-ASCII
8381
for i in range(len(data)):
8482
var b = data[i]
8583

86-
# Allow US-ASCII range (most common case)
8784
if is_us_ascii_octet(b):
8885
continue
8986

90-
# Allow ISO-8859-1 extended range (superset of US-ASCII)
9187
if is_iso_8859_1_octet(b):
9288
continue
9389

94-
# If we get here, we have a byte outside ISO-8859-1 range
95-
# This could be part of a multi-byte UTF-8 sequence which is unsafe
9690
raise Error(
9791
"RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) +
9892
" at position " + String(i) +
@@ -118,11 +112,8 @@ fn safe_to_string_rfc9112[origin: Origin](b: Span[UInt8, origin]) raises -> Stri
118112
Raises:
119113
Error: If the bytes contain invalid sequences for HTTP parsing.
120114
"""
121-
# Validate the octets first
122115
var validated_span = validate_http_message_octets(b)
123116

124-
# Create string treating bytes as ISO-8859-1 (safe superset of US-ASCII)
125-
# Note: We use unsafe_from_utf8 here but we've validated the input is safe
126117
return String(StringSlice(unsafe_from_utf8=validated_span))
127118

128119

@@ -143,11 +134,9 @@ fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> S
143134
for i in range(len(data)):
144135
var b = data[i]
145136

146-
# Safe US-ASCII characters can be added directly
147137
if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25: # Printable ASCII except %
148138
result += chr(Int(b))
149139
else:
150-
# Percent-encode unsafe or non-printable octets
151140
result += "%" + hex(Int(b)).upper().rjust(2, "0")
152141

153142
return result

tests/rfc/test_rfc9112_simple.mojo

Lines changed: 10 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,59 +6,44 @@ def test_rfc9112_parse_as_octets():
66
"""RFC 9112 Section 2.2-2: MUST parse HTTP message as sequence of octets."""
77
print("Testing: Parse HTTP message as sequence of octets...")
88

9-
# Test that we parse HTTP messages as individual octets, not Unicode characters
109
var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\n\r\n"
1110
var octets = http_message.as_bytes()
1211

13-
# Verify we access individual octets (bytes), not Unicode code points
14-
testing.assert_equal(octets[0], ord('G')) # First octet is 'G'
15-
testing.assert_equal(octets[4], ord('/')) # Fifth octet is '/'
12+
testing.assert_equal(octets[0], ord('G'))
13+
testing.assert_equal(octets[4], ord('/'))
1614

17-
# Find the first CR and LF octets in the message
1815
var found_cr = False
1916
var found_lf = False
2017
for i in range(len(octets)):
21-
if octets[i] == 0x0D and not found_cr: # First CR
18+
if octets[i] == 0x0D and not found_cr:
2219
found_cr = True
23-
if octets[i] == 0x0A and not found_lf: # First LF
20+
if octets[i] == 0x0A and not found_lf:
2421
found_lf = True
2522
if found_cr and found_lf:
2623
break
2724

28-
testing.assert_true(found_cr) # CR found as octet
29-
testing.assert_true(found_lf) # LF found as octet
30-
31-
print("✓ HTTP message parsed as sequence of octets")
25+
testing.assert_true(found_cr)
26+
testing.assert_true(found_lf)
3227

3328

3429
def test_rfc9112_us_ascii_superset_encoding():
3530
"""RFC 9112 Section 2.2-2: MUST use encoding that is superset of US-ASCII."""
3631
print("Testing: Encoding is superset of US-ASCII...")
3732

38-
# US-ASCII range is 0x00-0x7F
39-
# ISO-8859-1 (0x00-0xFF) is a valid superset
40-
41-
# Test US-ASCII characters are valid
4233
testing.assert_true(ord('G') <= 0x7F) # US-ASCII
4334
testing.assert_true(ord(' ') <= 0x7F) # US-ASCII
4435
testing.assert_true(0x0A <= 0x7F) # LF in US-ASCII
4536
testing.assert_true(0x0D <= 0x7F) # CR in US-ASCII
46-
47-
# Test that superset (ISO-8859-1) includes extended range
4837
testing.assert_true(0x80 <= 0xFF) # Extended range valid
4938
testing.assert_true(0xFF <= 0xFF) # Maximum byte valid
50-
51-
print("✓ Encoding is superset of US-ASCII (ISO-8859-1)")
5239

5340

5441
def test_rfc9112_lf_security_vulnerability():
5542
"""RFC 9112 Section 2.2-2: Prevent LF (%x0A) security vulnerabilities."""
5643
print("Testing: LF (%x0A) security vulnerability prevention...")
5744

58-
# The critical security issue: LF (%x0A) in multibyte sequences
5945
var lf_octet: UInt8 = 0x0A
6046

61-
# When parsed as octets (safe), LF is clearly identifiable
6247
var test_data = "GET /\r\nHost: test\r\n\r\n"
6348
var data_octets = test_data.as_bytes()
6449

@@ -67,22 +52,16 @@ def test_rfc9112_lf_security_vulnerability():
6752
if data_octets[i] == lf_octet:
6853
lf_positions.append(i)
6954

70-
# Should find LF octets at specific positions
7155
testing.assert_true(len(lf_positions) > 0)
72-
print("✓ LF (%x0A) handled safely as octet")
73-
print(" - Found " + String(len(lf_positions)) + " LF octets in message")
74-
print(" - No multibyte character sequence confusion")
7556

7657

7758
def test_rfc9112_string_parser_safety():
7859
"""RFC 9112 Section 2.2-2: String parsers only used after protocol element extraction."""
7960
print("Testing: String parsers used only after safe extraction...")
8061

81-
# Demonstrate the RFC requirement: protocol elements extracted as octets first
8262
var http_request = "GET /api/data HTTP/1.1\r\nHost: server.com\r\n\r\n"
8363
var request_octets = http_request.as_bytes()
8464

85-
# Step 1: Extract protocol elements as octets (safe)
8665
var method_end = -1
8766
for i in range(len(request_octets)):
8867
if request_octets[i] == ord(' '):
@@ -91,15 +70,10 @@ def test_rfc9112_string_parser_safety():
9170

9271
testing.assert_true(method_end > 0)
9372

94-
# Step 2: Verify the extracted octets match expected method
95-
testing.assert_equal(request_octets[0], ord('G')) # First octet
96-
testing.assert_equal(request_octets[1], ord('E')) # Second octet
97-
testing.assert_equal(request_octets[2], ord('T')) # Third octet
98-
testing.assert_equal(method_end, 3) # Method is 3 octets
99-
100-
print("✓ String parsing only after protocol element extraction")
101-
print(" - Protocol elements extracted as octets first")
102-
print(" - String conversion only after safe extraction")
73+
testing.assert_equal(request_octets[0], ord('G'))
74+
testing.assert_equal(request_octets[1], ord('E'))
75+
testing.assert_equal(request_octets[2], ord('T'))
76+
testing.assert_equal(method_end, 3)
10377

10478

10579
def main():

0 commit comments

Comments
 (0)