diff --git a/lib/nori/parser/nokogiri.rb b/lib/nori/parser/nokogiri.rb
index 533de70..6db8ebc 100644
--- a/lib/nori/parser/nokogiri.rb
+++ b/lib/nori/parser/nokogiri.rb
@@ -46,7 +46,11 @@ def characters(string)
alias cdata_block characters
def error(message)
- @last_error = message
+ if (invalid_chr = message[/PCDATA invalid Char value (\d+)/, 1])
+ characters(invalid_chr.to_i.chr)
+ else
+ @last_error = message
+ end
end
end
@@ -54,7 +58,9 @@ def self.parse(xml, options)
document = Document.new
document.options = options
parser = ::Nokogiri::XML::SAX::Parser.new document
- parser.parse xml
+ parser.parse xml do |ctx|
+ ctx.recovery = true
+ end
raise ParseError, document.last_error if document.last_error
document.stack.length > 0 ? document.stack.pop.to_hash : {}
end
diff --git a/spec/nori/nori_spec.rb b/spec/nori/nori_spec.rb
index d11a6f4..dd21e6c 100644
--- a/spec/nori/nori_spec.rb
+++ b/spec/nori/nori_spec.rb
@@ -644,6 +644,10 @@
expect { parse('foo bar') }.to raise_error(Nori::ParseError)
end
+ it "should preserve control characters" do
+ xml = "a\u0002c".force_encoding('UTF-8')
+ expect(parse(xml)["tag"]).to eq("a\u0002c")
+ end
end
end