Skip to content

Commit 4e0749a

Browse files
andybalholmnigeltao
authored andcommitted
exp/html: Convert \r and \r\n to \n when tokenizing
Also escape "\r" as "
" when rendering HTML. Pass 2 additional tests. R=nigeltao CC=golang-dev https://golang.org/cl/6260046
1 parent afe0e97 commit 4e0749a

File tree

4 files changed

+66
-4
lines changed

4 files changed

+66
-4
lines changed

src/pkg/exp/html/escape.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ func lower(b []byte) []byte {
192192
return b
193193
}
194194

195-
const escapedChars = `&'<>"`
195+
const escapedChars = "&'<>\"\r"
196196

197197
func escape(w writer, s string) error {
198198
i := strings.IndexAny(s, escapedChars)
@@ -214,6 +214,8 @@ func escape(w writer, s string) error {
214214
case '"':
215215
// "&#34;" is shorter than "&quot;".
216216
esc = "&#34;"
217+
case '\r':
218+
esc = "&#13;"
217219
default:
218220
panic("unrecognized escape character")
219221
}

src/pkg/exp/html/testlogs/plain-text-unsafe.dat.log

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ PASS "<svg>\x00 </svg><frameset>"
2121
FAIL "<svg>\x00a</svg><frameset>"
2222
PASS "<svg><path></path></svg><frameset>"
2323
PASS "<svg><p><frameset>"
24-
FAIL "<!DOCTYPE html><pre>\r\n\r\nA</pre>"
25-
FAIL "<!DOCTYPE html><pre>\r\rA</pre>"
24+
PASS "<!DOCTYPE html><pre>\r\n\r\nA</pre>"
25+
PASS "<!DOCTYPE html><pre>\r\rA</pre>"
2626
PASS "<!DOCTYPE html><pre>\rA</pre>"
2727
PASS "<!DOCTYPE html><table><tr><td><math><mtext>\x00a"
2828
PASS "<!DOCTYPE html><table><tr><td><svg><foreignObject>\x00a"

src/pkg/exp/html/token.go

+34-1
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,38 @@ func (z *Tokenizer) Raw() []byte {
696696
return z.buf[z.raw.start:z.raw.end]
697697
}
698698

699+
// convertNewlines converts "\r" and "\r\n" in s to "\n".
700+
// The conversion happens in place, but the resulting slice may be shorter.
701+
func convertNewlines(s []byte) []byte {
702+
for i, c := range s {
703+
if c != '\r' {
704+
continue
705+
}
706+
707+
src := i + 1
708+
if src >= len(s) || s[src] != '\n' {
709+
s[i] = '\n'
710+
continue
711+
}
712+
713+
dst := i
714+
for src < len(s) {
715+
if s[src] == '\r' {
716+
if src+1 < len(s) && s[src+1] == '\n' {
717+
src++
718+
}
719+
s[dst] = '\n'
720+
} else {
721+
s[dst] = s[src]
722+
}
723+
src++
724+
dst++
725+
}
726+
return s[:dst]
727+
}
728+
return s
729+
}
730+
699731
// Text returns the unescaped text of a text, comment or doctype token. The
700732
// contents of the returned slice may change on the next call to Next.
701733
func (z *Tokenizer) Text() []byte {
@@ -704,6 +736,7 @@ func (z *Tokenizer) Text() []byte {
704736
s := z.buf[z.data.start:z.data.end]
705737
z.data.start = z.raw.end
706738
z.data.end = z.raw.end
739+
s = convertNewlines(s)
707740
if !z.textIsRaw {
708741
s = unescape(s)
709742
}
@@ -739,7 +772,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
739772
z.nAttrReturned++
740773
key = z.buf[x[0].start:x[0].end]
741774
val = z.buf[x[1].start:x[1].end]
742-
return lower(key), unescape(val), z.nAttrReturned < len(z.attr)
775+
return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr)
743776
}
744777
}
745778
return nil, nil, false

src/pkg/exp/html/token_test.go

+27
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,33 @@ loop:
592592
}
593593
}
594594

595+
func TestConvertNewlines(t *testing.T) {
596+
testCases := map[string]string{
597+
"Mac\rDOS\r\nUnix\n": "Mac\nDOS\nUnix\n",
598+
"Unix\nMac\rDOS\r\n": "Unix\nMac\nDOS\n",
599+
"DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
600+
"": "",
601+
"\n": "\n",
602+
"\n\r": "\n\n",
603+
"\r": "\n",
604+
"\r\n": "\n",
605+
"\r\n\n": "\n\n",
606+
"\r\n\r": "\n\n",
607+
"\r\n\r\n": "\n\n",
608+
"\r\r": "\n\n",
609+
"\r\r\n": "\n\n",
610+
"\r\r\n\n": "\n\n\n",
611+
"\r\r\r\n": "\n\n\n",
612+
"\r \n": "\n \n",
613+
"xyz": "xyz",
614+
}
615+
for in, want := range testCases {
616+
if got := string(convertNewlines([]byte(in))); got != want {
617+
t.Errorf("input %q: got %q, want %q", in, got, want)
618+
}
619+
}
620+
}
621+
595622
const (
596623
rawLevel = iota
597624
lowLevel

0 commit comments

Comments
 (0)