@@ -95,7 +95,8 @@ def initialize(base_url, options={})
95
95
follow_redirects = 5 if follow_redirects == true
96
96
97
97
if follow_redirects
98
- require 'faraday_middleware'
98
+ require 'faraday/follow_redirects'
99
+ builder . use Faraday ::FollowRedirects ::Middleware
99
100
builder . response :follow_redirects , :limit => follow_redirects . to_i
100
101
end
101
102
builder . adapter :net_http
@@ -330,14 +331,27 @@ def parse_date(value)
330
331
# Regex is from WebCollab:
331
332
# http://webcollab.sourceforge.net/unicode.html
332
333
def strip_invalid_utf_8_chars ( xml )
333
- xml && xml . gsub ( /[\x00 -\x08 \x10 \x0B \x0C \x0E -\x19 \x7F ]
334
+ return nil unless xml
335
+
336
+ # If it's in a specific encoding other than BINARY, it may trigger
337
+ # an exception to try to gsub these illegal bytes. Temporarily
338
+ # put it in BINARY. NOTE: We're not totally sure what's going on
339
+ # with encodings in this gem in general, it might not be totally reasonable.
340
+ orig_encoding = xml . encoding
341
+ xml . force_encoding ( "BINARY" )
342
+
343
+ xml = xml . gsub ( /[\x00 -\x08 \x10 \x0B \x0C \x0E -\x19 \x7F ]
334
344
| [\x00 -\x7F ][\x80 -\xBF ]+
335
345
| ([\xC0 \xC1 ]|[\xF0 -\xFF ])[\x80 -\xBF ]*
336
346
| [\xC2 -\xDF ]((?![\x80 -\xBF ])|[\x80 -\xBF ]{2,})
337
347
| [\xE0 -\xEF ](([\x80 -\xBF ](?![\x80 -\xBF ]))
338
348
| (?![\x80 -\xBF ]{2})|[\x80 -\xBF ]{3,})/x , '?' ) \
339
349
. gsub ( /\xE0 [\x80 -\x9F ][\x80 -\xBF ]
340
350
| \xED [\xA0 -\xBF ][\x80 -\xBF ]/ , '?' )
351
+
352
+ xml . force_encoding ( orig_encoding )
353
+
354
+ xml
341
355
end
342
356
343
357
end
0 commit comments