3131#endif
3232using System . Threading ;
3333using System . Diagnostics ;
34+ using System . Text . Unicode ;
3435using System . Threading . Tasks ;
3536using System . Collections . Generic ;
3637using System . Diagnostics . CodeAnalysis ;
@@ -1777,7 +1778,7 @@ void StepHeaderField (int headerFieldLength)
17771778 inputIndex += headerIndex ;
17781779 }
17791780
1780- unsafe bool StepHeaderValue ( byte * inbuf , ref ByteDetectionOptions options , ref bool midline )
1781+ unsafe bool StepHeaderValue ( byte * inbuf , ref ByteDetectionOptions options , ref bool midline , ref bool ascii )
17811782 {
17821783 byte * start = inbuf + inputIndex ;
17831784 byte * inend = inbuf + inputEnd ;
@@ -1792,8 +1793,11 @@ unsafe bool StepHeaderValue (byte* inbuf, ref ByteDetectionOptions options, ref
17921793 inptr = ParseUtils . EndOfLine ( inptr , inend + 1 , options , out var detected ) ;
17931794
17941795 if ( ( detected & ByteDetectionResults . Detected8Bit ) != 0 ) {
1795- OnMimeComplianceViolation ( MimeComplianceViolation . Unexpected8BitBytesInHeaders , lineBeginOffset , lineNumber ) ;
1796+ // Note: we don't emit Unexpected8BitBytesInHeader here because 8-bit might only indicate UTF-8 which is valid in headers
1797+ // according to RFC 6532. Instead, we'll just track that this header value contains non-ASCII text and check for valid UTF-8
1798+ // once we've got the full value.
17961799 options &= ~ ByteDetectionOptions . Detect8Bit ;
1800+ ascii = false ;
17971801 }
17981802
17991803 if ( ( detected & ByteDetectionResults . DetectedNulls ) != 0 ) {
@@ -1922,7 +1926,7 @@ unsafe bool TryCheckMboxMarkerWithinHeaderBlock (byte* inbuf)
19221926 return true ;
19231927 }
19241928
1925- Header CreateHeader ( long beginOffset , int beginLineNumber , int fieldNameLength , int headerFieldLength , bool invalid )
1929+ Header CreateHeader ( long beginOffset , int beginLineNumber , int fieldNameLength , int headerFieldLength , bool invalid , bool ascii )
19261930 {
19271931 byte [ ] field , value ;
19281932
@@ -1942,6 +1946,23 @@ Header CreateHeader (long beginOffset, int beginLineNumber, int fieldNameLength,
19421946 Offset = beginOffset
19431947 } ;
19441948
1949+ if ( DetectMimeComplianceViolations ) {
1950+ if ( invalid ) {
1951+ // This means that the field name itself contains all of the data and is invalid. Check for null bytes *and* non-UTF-8 text.
1952+ var fieldSpan = field . AsSpan ( ) ;
1953+ int index = fieldSpan . IndexOf ( ( byte ) '\0 ' ) ;
1954+
1955+ if ( index != - 1 )
1956+ OnMimeComplianceViolation ( MimeComplianceViolation . UnexpectedNullBytesInHeader , beginOffset + index , beginLineNumber ) ;
1957+
1958+ if ( ! Utf8 . IsValid ( fieldSpan ) )
1959+ OnMimeComplianceViolation ( MimeComplianceViolation . Unexpected8BitBytesInHeader , beginOffset , beginLineNumber ) ;
1960+ } else if ( ! ascii ) {
1961+ if ( ! Utf8 . IsValid ( value ) )
1962+ OnMimeComplianceViolation ( MimeComplianceViolation . Unexpected8BitBytesInHeader , beginOffset , beginLineNumber ) ;
1963+ }
1964+ }
1965+
19451966 UpdateHeaderState ( header , beginOffset , beginLineNumber ) ;
19461967 headerCount ++ ;
19471968
@@ -1950,7 +1971,6 @@ Header CreateHeader (long beginOffset, int beginLineNumber, int fieldNameLength,
19501971
19511972 unsafe void StepHeaders ( byte * inbuf , CancellationToken cancellationToken )
19521973 {
1953- var options = DetectMimeComplianceViolations ? ByteDetectionOptions . Detect8Bit | ByteDetectionOptions . DetectNulls : ByteDetectionOptions . None ;
19541974 int headersBeginLineNumber = lineNumber ;
19551975 var eof = false ;
19561976
@@ -1974,6 +1994,7 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
19741994 ReadAhead ( ReadAheadSize , 0 , cancellationToken ) ;
19751995
19761996 do {
1997+ var options = DetectMimeComplianceViolations ? ByteDetectionOptions . Detect8Bit | ByteDetectionOptions . DetectNulls : ByteDetectionOptions . None ;
19771998 var beginOffset = GetOffset ( inputIndex ) ;
19781999 var beginLineNumber = lineNumber ;
19792000 int left = inputEnd - inputIndex ;
@@ -2080,9 +2101,10 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
20802101 }
20812102
20822103 bool midline = true ;
2104+ bool ascii = true ;
20832105
20842106 // Consume the header value.
2085- while ( ! StepHeaderValue ( inbuf , ref options , ref midline ) ) {
2107+ while ( ! StepHeaderValue ( inbuf , ref options , ref midline , ref ascii ) ) {
20862108 if ( ReadAhead ( 1 , 0 , cancellationToken ) == 0 ) {
20872109 if ( DetectMimeComplianceViolations ) {
20882110 if ( midline )
@@ -2101,7 +2123,7 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
21012123 return ;
21022124 }
21032125
2104- var header = CreateHeader ( beginOffset , beginLineNumber , fieldNameLength , headerFieldLength , invalid ) ;
2126+ var header = CreateHeader ( beginOffset , beginLineNumber , fieldNameLength , headerFieldLength , invalid , ascii ) ;
21052127
21062128 OnHeaderRead ( header , beginLineNumber , cancellationToken ) ;
21072129 } while ( ! eof ) ;
0 commit comments