Skip to content

Commit 8a20fba

Browse files
authored
Refactor markup render system (#32533)
Remove unmaintainable sanitizer rules. No need to add special "class" regexp rules anymore, use RenderInternal.SafeAttr instead, more details (and examples) are in the tests
1 parent 4f879a0 commit 8a20fba

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+567
-507
lines changed

modules/html/html.go

-25
This file was deleted.

modules/htmlutil/html.go

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2022 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package htmlutil
5+
6+
import (
7+
"fmt"
8+
"html/template"
9+
"slices"
10+
)
11+
12+
// ParseSizeAndClass get size and class from string with default values
13+
// If present, "others" expects the new size first and then the classes to use
14+
func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) {
15+
size := defaultSize
16+
if len(others) >= 1 {
17+
if v, ok := others[0].(int); ok && v != 0 {
18+
size = v
19+
}
20+
}
21+
class := defaultClass
22+
if len(others) >= 2 {
23+
if v, ok := others[1].(string); ok && v != "" {
24+
if class != "" {
25+
class += " "
26+
}
27+
class += v
28+
}
29+
}
30+
return size, class
31+
}
32+
33+
func HTMLFormat(s string, rawArgs ...any) template.HTML {
34+
args := slices.Clone(rawArgs)
35+
for i, v := range args {
36+
switch v := v.(type) {
37+
case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML:
38+
// for most basic types (including template.HTML which is safe), just do nothing and use it
39+
case string:
40+
args[i] = template.HTMLEscapeString(v)
41+
case fmt.Stringer:
42+
args[i] = template.HTMLEscapeString(v.String())
43+
default:
44+
args[i] = template.HTMLEscapeString(fmt.Sprint(v))
45+
}
46+
}
47+
return template.HTML(fmt.Sprintf(s, args...))
48+
}

modules/htmlutil/html_test.go

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright 2024 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package htmlutil
5+
6+
import (
7+
"html/template"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func TestHTMLFormat(t *testing.T) {
14+
assert.Equal(t, template.HTML("<a>&lt; < 1</a>"), HTMLFormat("<a>%s %s %d</a>", "<", template.HTML("<"), 1))
15+
}

modules/markup/asciicast/asciicast.go

+2-13
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"fmt"
88
"io"
99
"net/url"
10-
"regexp"
1110

1211
"code.gitea.io/gitea/modules/markup"
1312
"code.gitea.io/gitea/modules/setting"
@@ -38,10 +37,7 @@ const (
3837

3938
// SanitizerRules implements markup.Renderer
4039
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
41-
return []setting.MarkupSanitizerRule{
42-
{Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(playerClassName)},
43-
{Element: "div", AllowAttr: playerSrcAttr},
44-
}
40+
return []setting.MarkupSanitizerRule{{Element: "div", AllowAttr: playerSrcAttr}}
4541
}
4642

4743
// Render implements markup.Renderer
@@ -53,12 +49,5 @@ func (Renderer) Render(ctx *markup.RenderContext, _ io.Reader, output io.Writer)
5349
ctx.Metas["BranchNameSubURL"],
5450
url.PathEscape(ctx.RelativePath),
5551
)
56-
57-
_, err := io.WriteString(output, fmt.Sprintf(
58-
`<div class="%s" %s="%s"></div>`,
59-
playerClassName,
60-
playerSrcAttr,
61-
rawURL,
62-
))
63-
return err
52+
return ctx.RenderInternal.FormatWithSafeAttrs(output, `<div class="%s" %s="%s"></div>`, playerClassName, playerSrcAttr, rawURL)
6453
}

modules/markup/common/html.go

-16
This file was deleted.

modules/markup/common/linkify.go

+15-3
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,27 @@ package common
99
import (
1010
"bytes"
1111
"regexp"
12+
"sync"
1213

1314
"github.com/yuin/goldmark"
1415
"github.com/yuin/goldmark/ast"
1516
"github.com/yuin/goldmark/parser"
1617
"github.com/yuin/goldmark/text"
1718
"github.com/yuin/goldmark/util"
19+
"mvdan.cc/xurls/v2"
1820
)
1921

20-
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
22+
type GlobalVarsType struct {
23+
wwwURLRegxp *regexp.Regexp
24+
LinkRegex *regexp.Regexp // fast matching a URL link, no any extra validation.
25+
}
26+
27+
var GlobalVars = sync.OnceValue[*GlobalVarsType](func() *GlobalVarsType {
28+
v := &GlobalVarsType{}
29+
v.wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
30+
v.LinkRegex, _ = xurls.StrictMatchingScheme("https?://")
31+
return v
32+
})
2133

2234
type linkifyParser struct{}
2335

@@ -60,10 +72,10 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
6072
var protocol []byte
6173
typ := ast.AutoLinkURL
6274
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
63-
m = LinkRegex.FindSubmatchIndex(line)
75+
m = GlobalVars().LinkRegex.FindSubmatchIndex(line)
6476
}
6577
if m == nil && bytes.HasPrefix(line, domainWWW) {
66-
m = wwwURLRegxp.FindSubmatchIndex(line)
78+
m = GlobalVars().wwwURLRegxp.FindSubmatchIndex(line)
6779
protocol = []byte("http")
6880
}
6981
if m != nil {

modules/markup/console/console.go

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ package console
66
import (
77
"bytes"
88
"io"
9-
"path/filepath"
10-
"regexp"
9+
"path"
1110

1211
"code.gitea.io/gitea/modules/markup"
1312
"code.gitea.io/gitea/modules/setting"
@@ -36,7 +35,7 @@ func (Renderer) Extensions() []string {
3635
// SanitizerRules implements markup.Renderer
3736
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
3837
return []setting.MarkupSanitizerRule{
39-
{Element: "span", AllowAttr: "class", Regexp: regexp.MustCompile(`^term-((fg[ix]?|bg)\d+|container)$`)},
38+
{Element: "span", AllowAttr: "class", Regexp: `^term-((fg[ix]?|bg)\d+|container)$`},
4039
}
4140
}
4241

@@ -46,7 +45,7 @@ func (Renderer) CanRender(filename string, input io.Reader) bool {
4645
if err != nil {
4746
return false
4847
}
49-
if enry.GetLanguage(filepath.Base(filename), buf) != enry.OtherLanguage {
48+
if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage {
5049
return false
5150
}
5251
return bytes.ContainsRune(buf, '\x1b')

modules/markup/csv/csv.go

+5-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"bufio"
88
"html"
99
"io"
10-
"regexp"
1110
"strconv"
1211

1312
"code.gitea.io/gitea/modules/csv"
@@ -37,9 +36,9 @@ func (Renderer) Extensions() []string {
3736
// SanitizerRules implements markup.Renderer
3837
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
3938
return []setting.MarkupSanitizerRule{
40-
{Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)},
41-
{Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
42-
{Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
39+
{Element: "table", AllowAttr: "class", Regexp: `^data-table$`},
40+
{Element: "th", AllowAttr: "class", Regexp: `^line-num$`},
41+
{Element: "td", AllowAttr: "class", Regexp: `^line-num$`},
4342
}
4443
}
4544

@@ -51,13 +50,13 @@ func writeField(w io.Writer, element, class, field string) error {
5150
return err
5251
}
5352
if len(class) > 0 {
54-
if _, err := io.WriteString(w, " class=\""); err != nil {
53+
if _, err := io.WriteString(w, ` class="`); err != nil {
5554
return err
5655
}
5756
if _, err := io.WriteString(w, class); err != nil {
5857
return err
5958
}
60-
if _, err := io.WriteString(w, "\""); err != nil {
59+
if _, err := io.WriteString(w, `"`); err != nil {
6160
return err
6261
}
6362
}

modules/markup/external/external.go

+4-5
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.
102102

103103
_, err = io.Copy(f, input)
104104
if err != nil {
105-
f.Close()
105+
_ = f.Close()
106106
return fmt.Errorf("%s write data to temp file when rendering %s failed: %w", p.Name(), p.Command, err)
107107
}
108108

@@ -113,10 +113,9 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.
113113
args = append(args, f.Name())
114114
}
115115

116-
if ctx == nil || ctx.Ctx == nil {
117-
if ctx == nil {
118-
log.Warn("RenderContext not provided defaulting to empty ctx")
119-
ctx = &markup.RenderContext{}
116+
if ctx.Ctx == nil {
117+
if !setting.IsProd || setting.IsInTesting {
118+
panic("RenderContext did not provide context")
120119
}
121120
log.Warn("RenderContext did not provide context, defaulting to Shutdown context")
122121
ctx.Ctx = graceful.GetManager().ShutdownContext()

0 commit comments

Comments
 (0)