Skip to content

Commit 865c0d5

Browse files
committed
Merge pull request #317 from jekh/remove-htpmime-dependency
Remove Apache httpmime dependency from LP module
2 parents 61b6e24 + e4e7b23 commit 865c0d5

File tree

9 files changed

+250
-66
lines changed

9 files changed

+250
-66
lines changed

browsermob-core-littleproxy/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@
6868
<groupId>javax.servlet</groupId>
6969
<artifactId>servlet-api</artifactId>
7070
</exclusion>
71+
<exclusion>
72+
<groupId>org.apache.httpcomponents</groupId>
73+
<artifactId>httpmime</artifactId>
74+
</exclusion>
7175
<!-- Due to usage in LegacyProxyServer and BrowserMobProxyServer, this dependency needs to be given "provided" scope.
7276
It is not used by the BMP LittleProxy implementation itself. -->
7377
<exclusion>

browsermob-core-littleproxy/src/main/java/net/lightbody/bmp/filters/HarCaptureFilter.java

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import net.lightbody.bmp.core.har.HarPostDataParam;
2222
import net.lightbody.bmp.core.har.HarRequest;
2323
import net.lightbody.bmp.core.har.HarResponse;
24+
import net.lightbody.bmp.exception.UnsupportedCharsetException;
2425
import net.lightbody.bmp.filters.support.HttpConnectTiming;
2526
import net.lightbody.bmp.filters.util.HarCaptureUtil;
2627
import net.lightbody.bmp.proxy.CaptureType;
@@ -421,9 +422,22 @@ protected void captureRequestContent(HttpRequest httpRequest, byte[] fullMessage
421422
urlEncoded = false;
422423
}
423424

425+
Charset charset;
426+
try {
427+
charset = BrowserMobHttpUtil.readCharsetInContentTypeHeader(contentType);
428+
} catch (UnsupportedCharsetException e) {
429+
log.warn("Found unsupported character set in Content-Type header '{}' in HTTP request to {}. Content will not be captured in HAR.", contentType, httpRequest.getUri(), e);
430+
return;
431+
}
432+
433+
if (charset == null) {
434+
// no charset specified, so use the default -- but log a message since this might not encode the data correctly
435+
charset = BrowserMobHttpUtil.DEFAULT_HTTP_CHARSET;
436+
log.debug("No charset specified; using charset {} to decode contents to {}", charset, httpRequest.getUri());
437+
}
438+
424439
if (urlEncoded) {
425-
String textContents = BrowserMobHttpUtil.getContentAsString(fullMessage, contentType, originalRequest);
426-
Charset charset = BrowserMobHttpUtil.deriveCharsetFromContentTypeHeader(contentType);
440+
String textContents = BrowserMobHttpUtil.getContentAsString(fullMessage, charset);
427441

428442
QueryStringDecoder queryStringDecoder = new QueryStringDecoder(textContents, charset, false);
429443

@@ -440,7 +454,7 @@ protected void captureRequestContent(HttpRequest httpRequest, byte[] fullMessage
440454
//TODO: implement capture of files and multipart form data
441455

442456
// not URL encoded, so let's grab the body of the POST and capture that
443-
String postBody = BrowserMobHttpUtil.getContentAsString(fullMessage, contentType, originalRequest);
457+
String postBody = BrowserMobHttpUtil.getContentAsString(fullMessage, charset);
444458
harEntry.getRequest().getPostData().setText(postBody);
445459
}
446460
}
@@ -451,7 +465,7 @@ protected void captureResponseContent(HttpResponse httpResponse, byte[] fullMess
451465

452466
String contentType = HttpHeaders.getHeader(httpResponse, HttpHeaders.Names.CONTENT_TYPE);
453467
if (contentType == null) {
454-
log.warn("No content type specified in response. Content will be treated as {}", BrowserMobHttpUtil.UNKNOWN_CONTENT_TYPE);
468+
log.warn("No content type specified in response from {}. Content will be treated as {}", originalRequest.getUri(), BrowserMobHttpUtil.UNKNOWN_CONTENT_TYPE);
455469
contentType = BrowserMobHttpUtil.UNKNOWN_CONTENT_TYPE;
456470
}
457471

@@ -461,8 +475,22 @@ protected void captureResponseContent(HttpResponse httpResponse, byte[] fullMess
461475
forceBinary = true;
462476
}
463477

478+
Charset charset;
479+
try {
480+
charset = BrowserMobHttpUtil.readCharsetInContentTypeHeader(contentType);
481+
} catch (UnsupportedCharsetException e) {
482+
log.warn("Found unsupported character set in Content-Type header '{}' in HTTP response from {}. Content will not be captured in HAR.", contentType, originalRequest.getUri(), e);
483+
return;
484+
}
485+
486+
if (charset == null) {
487+
// no charset specified, so use the default -- but log a message since this might not encode the data correctly
488+
charset = BrowserMobHttpUtil.DEFAULT_HTTP_CHARSET;
489+
log.debug("No charset specified; using charset {} to decode contents from {}", charset, originalRequest.getUri());
490+
}
491+
464492
if (!forceBinary && BrowserMobHttpUtil.hasTextualContent(contentType)) {
465-
String text = BrowserMobHttpUtil.getContentAsString(fullMessage, contentType, originalRequest);
493+
String text = BrowserMobHttpUtil.getContentAsString(fullMessage, charset);
466494
harEntry.getResponse().getContent().setText(text);
467495
} else if (dataToCapture.contains(CaptureType.RESPONSE_BINARY_CONTENT)) {
468496
harEntry.getResponse().getContent().setText(DatatypeConverter.printBase64Binary(fullMessage));

browsermob-core-littleproxy/src/test/groovy/net/lightbody/bmp/util/BrowserMobHttpUtilTest.groovy

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ package net.lightbody.bmp.util
22

33
import org.junit.Test
44

5+
import java.nio.charset.Charset
6+
57
import static org.junit.Assert.assertEquals
8+
import static org.junit.Assert.assertFalse
9+
import static org.junit.Assert.assertNull
10+
import static org.junit.Assert.assertTrue
611

712
class BrowserMobHttpUtilTest {
813
@Test
@@ -22,7 +27,7 @@ class BrowserMobHttpUtilTest {
2227

2328
uriToResource.each {uri, expectedResource ->
2429
String parsedResource = BrowserMobHttpUtil.getPathFromUri(uri)
25-
assertEquals("Parsed resource from URL did not match expected resource", expectedResource, parsedResource)
30+
assertEquals("Parsed resource from URL did not match expected resource for URL: " + uri, expectedResource, parsedResource)
2631
}
2732
}
2833

@@ -41,7 +46,63 @@ class BrowserMobHttpUtilTest {
4146

4247
uriToHostAndPort.each {uri, expectedHostAndPort ->
4348
String parsedHostAndPort = BrowserMobHttpUtil.getHostAndPortFromUri(uri)
44-
assertEquals("Parsed host and port from URL did not match expected host and port", expectedHostAndPort, parsedHostAndPort)
49+
assertEquals("Parsed host and port from URL did not match expected host and port for URL: " + uri, expectedHostAndPort, parsedHostAndPort)
50+
}
51+
}
52+
53+
@Test
54+
void testReadCharsetInContentTypeHeader() {
55+
Map<String, Charset> contentTypeHeaderAndCharset = [
56+
'text/html; charset=UTF-8' : Charset.forName('UTF-8'),
57+
'text/html; charset=US-ASCII' : Charset.forName('US-ASCII'),
58+
'text/html' : null,
59+
'application/json;charset=utf-8' : Charset.forName('UTF-8'),
60+
'text/*; charset=US-ASCII' : Charset.forName('US-ASCII'),
61+
'unknown-type/something-incredible' : null,
62+
'unknown-type/something-incredible;charset=UTF-8' : Charset.forName('UTF-8'),
63+
'1234 & extremely malformed!' : null,
64+
'1234 & extremely malformed!;charset=UTF-8' : null, // malformed content-types result in unparseable charsets
65+
'' : null,
66+
]
67+
68+
contentTypeHeaderAndCharset.each {contentTypeHeader, expectedCharset ->
69+
Charset derivedCharset = BrowserMobHttpUtil.readCharsetInContentTypeHeader(contentTypeHeader)
70+
assertEquals("Charset derived from parsed content type header did not match expected charset for content type header: " + contentTypeHeader, expectedCharset, derivedCharset)
71+
}
72+
73+
Charset derivedCharset = BrowserMobHttpUtil.readCharsetInContentTypeHeader(null)
74+
assertNull("Expected null Content-Type header to return a null charset", derivedCharset)
75+
76+
boolean threwException = false
77+
try {
78+
BrowserMobHttpUtil.readCharsetInContentTypeHeader('text/html; charset=FUTURE_CHARSET')
79+
} catch (UnsupportedCharsetException) {
80+
threwException = true
4581
}
82+
83+
assertTrue('Expected an UnsupportedCharsetException to occur when parsing the content type header text/html; charset=FUTURE_CHARSET', threwException)
84+
}
85+
86+
@Test
87+
void testHasTextualContent() {
88+
Map<String, Boolean> contentTypeHeaderAndTextFlag = [
89+
'text/html' : true,
90+
'text/*' : true,
91+
'application/x-javascript' : true,
92+
'application/javascript' : true,
93+
'application/xml' : true,
94+
'application/xhtml+xml' : true,
95+
'application/xhtml+xml; charset=UTF-8' : true,
96+
'application/octet-stream' : false,
97+
'': false,
98+
]
99+
100+
contentTypeHeaderAndTextFlag.each {contentTypeHeader, expectedIsText ->
101+
boolean isTextualContent = BrowserMobHttpUtil.hasTextualContent(contentTypeHeader)
102+
assertEquals("hasTextualContent did not return expected value for content type header: " + contentTypeHeader, expectedIsText, isTextualContent)
103+
}
104+
105+
boolean isTextualContent = BrowserMobHttpUtil.hasTextualContent(null)
106+
assertFalse("Expected hasTextualContent to return false for null content type", isTextualContent)
46107
}
47108
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package net.lightbody.bmp.exception;
2+
3+
/**
4+
* A checked exception wrapper for {@link java.nio.charset.UnsupportedCharsetException}. This exception is checked to prevent
5+
* situations where an unsupported character set in e.g. a Content-Type header causes the proxy to fail completely, rather
6+
* than fallback to some suitable default behavior, such as not parsing the text contents of a message.
7+
*/
8+
public class UnsupportedCharsetException extends Exception {
9+
public UnsupportedCharsetException(java.nio.charset.UnsupportedCharsetException e) {
10+
super(e);
11+
12+
if (e == null) {
13+
throw new IllegalArgumentException("net.lightbody.bmp.exception.UnsupportedCharsetException must be initialized with a non-null instance of java.nio.charset.UnsupportedCharsetException");
14+
}
15+
}
16+
17+
/**
18+
* @return the underlying {@link java.nio.charset.UnsupportedCharsetException} that this exception wraps.
19+
*/
20+
public java.nio.charset.UnsupportedCharsetException getUnsupportedCharsetExceptionCause() {
21+
return (java.nio.charset.UnsupportedCharsetException) this.getCause();
22+
}
23+
}

browsermob-core/src/main/java/net/lightbody/bmp/proxy/CaptureType.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ public enum CaptureType {
1919

2020
/**
2121
* Non-binary HTTP request content, such as post data or other text-based request payload.
22-
* FIXME: link to binary content-types
23-
* See ${@link TBD} for a list of Content-Types that
22+
* See {@link net.lightbody.bmp.util.BrowserMobHttpUtil#hasTextualContent(String)} for a list of Content-Types that
2423
* are considered non-binary.
2524
*
2625
*/
@@ -43,8 +42,7 @@ public enum CaptureType {
4342

4443
/**
4544
* Non-binary HTTP response content (typically, HTTP body content).
46-
* FIXME: link to binary content-types
47-
* See ${@link TBD} for a list of Content-Types that
45+
* See {@link net.lightbody.bmp.util.BrowserMobHttpUtil#hasTextualContent(String)} for a list of Content-Types that
4846
* are considered non-binary.
4947
*/
5048
RESPONSE_CONTENT,

browsermob-core/src/main/java/net/lightbody/bmp/util/BrowserMobHttpUtil.java

Lines changed: 35 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package net.lightbody.bmp.util;
22

33
import com.google.common.net.HostAndPort;
4+
import com.google.common.net.MediaType;
45
import io.netty.buffer.ByteBuf;
56
import io.netty.handler.codec.http.HttpHeaders;
67
import io.netty.handler.codec.http.HttpRequest;
78
import io.netty.handler.codec.http.HttpResponse;
89
import net.lightbody.bmp.exception.DecompressionException;
9-
import org.apache.http.entity.ContentType;
10+
import net.lightbody.bmp.exception.UnsupportedCharsetException;
1011
import org.slf4j.Logger;
1112
import org.slf4j.LoggerFactory;
1213

@@ -18,6 +19,7 @@
1819
import java.nio.charset.Charset;
1920
import java.nio.charset.StandardCharsets;
2021
import java.util.List;
22+
import java.util.Locale;
2123
import java.util.Map;
2224
import java.util.zip.GZIPInputStream;
2325
import java.util.zip.InflaterInputStream;
@@ -149,62 +151,47 @@ public static byte[] extractReadableBytes(ByteBuf content) {
149151
}
150152

151153
/**
152-
* Converts the byte array into a String based on the charset specified in the contentTypeHeader. If no
153-
* charset is specified in the contentTypeHeader, this method uses default (see {@link #DEFAULT_HTTP_CHARSET}). The httpRequest is used
154-
* only for logging purposes if the contentTypeHeader does not contain a charset.
154+
* Converts the byte array into a String based on the specified charset. The charset cannot be null.
155155
*
156156
* @param content bytes to convert to a String
157-
* @param contentTypeHeader request's content type header
158-
* @param httpRequest HTTP request responsible for this content (used for logging purposes only)
157+
* @param charset the character set of the content
159158
* @return String containing the converted content
159+
* @throws IllegalArgumentException if charset is null
160160
*/
161-
public static String getContentAsString(byte[] content, String contentTypeHeader, HttpRequest httpRequest) {
162-
Charset charset = readCharsetInContentTypeHeader(contentTypeHeader);
161+
public static String getContentAsString(byte[] content, Charset charset) {
163162
if (charset == null) {
164-
// no charset specified, so use the default -- but log a message since this might not encode the data correctly
165-
charset = DEFAULT_HTTP_CHARSET;
166-
if (httpRequest != null) {
167-
log.debug("No charset specified; using charset {} to decode contents to/from {}", charset, httpRequest.getUri());
168-
} else {
169-
log.debug("No charset specified; using charset {} to decode contents", charset);
170-
}
163+
throw new IllegalArgumentException("Charset cannot be null");
171164
}
172165

173166
return new String(content, charset);
174167
}
175168

176169
/**
177-
* Derives the charset from the Content-Type header. Unlike {@link #readCharsetInContentTypeHeader}, if contentTypeHeader is null or
178-
* does not specify a charset, this method will return the ISO-8859-1 charset.
179-
*
180-
* @param contentTypeHeader the Content-Type header string; can be null or empty
181-
* @return the character set indicated in the contentTypeHeader, or ISO-8859-1 if none is specified or no contentTypeHeader is specified
182-
*/
183-
public static Charset deriveCharsetFromContentTypeHeader(String contentTypeHeader) {
184-
Charset charset = readCharsetInContentTypeHeader(contentTypeHeader);
185-
if (charset == null) {
186-
return DEFAULT_HTTP_CHARSET;
187-
}
188-
189-
return charset;
190-
}
191-
192-
/**
193-
* Reads the charset directly from the Content-Type header string. If the Content-Type header does not contain a charset, or if the header
194-
* is null or empty, this method returns null. See also {@link #deriveCharsetFromContentTypeHeader(String)}.
170+
* Reads the charset directly from the Content-Type header string. If the Content-Type header does not contain a charset,
171+
* is malformed or unparsable, or if the header is null or empty, this method returns null.
195172
*
196173
* @param contentTypeHeader the Content-Type header string; can be null or empty
197-
* @return the character set indicated in the contentTypeHeader, or null if the charset is not present
174+
* @return the character set indicated in the contentTypeHeader, or null if the charset is not present or is not parsable
175+
* @throws UnsupportedCharsetException if there is a charset specified in the content-type header, but it is not supported on this platform
198176
*/
199-
public static Charset readCharsetInContentTypeHeader(String contentTypeHeader) {
177+
public static Charset readCharsetInContentTypeHeader(String contentTypeHeader) throws UnsupportedCharsetException {
200178
if (contentTypeHeader == null || contentTypeHeader.isEmpty()) {
201-
return DEFAULT_HTTP_CHARSET;
179+
return null;
202180
}
203181

204-
//FIXME: remove dependency on HttpCore's ContentType
205-
ContentType contentTypeCharset = ContentType.parse(contentTypeHeader);
182+
MediaType mediaType;
183+
try {
184+
mediaType = MediaType.parse(contentTypeHeader);
185+
} catch (IllegalArgumentException e) {
186+
log.info("Unable to parse Content-Type header: {}. Content-Type header will be ignored.", contentTypeHeader, e);
187+
return null;
188+
}
206189

207-
return contentTypeCharset.getCharset();
190+
try {
191+
return mediaType.charset().orNull();
192+
} catch (java.nio.charset.UnsupportedCharsetException e) {
193+
throw new UnsupportedCharsetException(e);
194+
}
208195
}
209196

210197
/**
@@ -284,11 +271,15 @@ public static boolean startsWithHttpOrHttps(String uri) {
284271
return false;
285272
}
286273

287-
if (uri.startsWith("http://") || uri.startsWith("https://")) {
288-
return true;
289-
} else {
290-
return false;
291-
}
274+
// the scheme is case insensitive, according to RFC 7230, section 2.7.3:
275+
/*
276+
The scheme and host
277+
are case-insensitive and normally provided in lowercase; all other
278+
components are compared in a case-sensitive manner.
279+
*/
280+
String lowercaseUri = uri.toLowerCase(Locale.US);
281+
282+
return lowercaseUri.startsWith("http://") || lowercaseUri.startsWith("https://");
292283
}
293284

294285
/**

0 commit comments

Comments
 (0)