From b24e8d38af21fc1857b6aa66351627e3b1761608 Mon Sep 17 00:00:00 2001
From: Lunny Xiao <xiaolunwen@gmail.com>
Date: Sun, 6 Mar 2022 16:41:54 +0800
Subject: [PATCH] Support ignore all santize for external renderer (#18984)

* Support ignore all santize for external renderer

* Update docs

* Apply suggestions from code review

Co-authored-by: silverwind <me@silverwind.io>

* Fix doc

Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: 6543 <6543@obermui.de>
---
 custom/conf/app.example.ini                   |  2 ++
 .../doc/advanced/config-cheat-sheet.en-us.md  |  4 +--
 .../doc/advanced/config-cheat-sheet.zh-cn.md  | 27 ++++++++++++++
 modules/markup/csv/csv.go                     |  5 +++
 modules/markup/external/external.go           |  5 +++
 modules/markup/markdown/markdown.go           |  5 +++
 modules/markup/orgmode/orgmode.go             |  5 +++
 modules/markup/renderer.go                    | 36 +++++++++++++------
 modules/setting/markup.go                     | 14 ++++----
 9 files changed, 84 insertions(+), 19 deletions(-)

diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini
index 42d1756715..ad58e6bda3 100644
--- a/custom/conf/app.example.ini
+++ b/custom/conf/app.example.ini
@@ -2125,6 +2125,8 @@ PATH =
 ;RENDER_COMMAND = "asciidoc --out-file=- -"
 ;; Don't pass the file on STDIN, pass the filename as argument instead.
 ;IS_INPUT_FILE = false
+; Don't filter html tags and attributes if true
+;DISABLE_SANITIZER = false
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
index 59b8fc31f0..70bc2ee829 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
@@ -1003,13 +1003,13 @@ IS_INPUT_FILE = false
    command. Multiple extensions needs a comma as splitter.
 - RENDER\_COMMAND: External command to render all matching extensions.
 - IS\_INPUT\_FILE: **false** Input is not a standard input but a file param followed `RENDER_COMMAND`.
+- DISABLE_SANITIZER: **false** Don't filter html tags and attributes if true. Don't change this to true except you know what that means.
 
 Two special environment variables are passed to the render command:
 - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links.
 - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths.
 
-
-Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc.
+If `DISABLE_SANITIZER` is false, Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc.
 
 ```ini
 [markup.sanitizer.TeX]
diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md
index 7db7fe705a..600e54a85e 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md
@@ -318,6 +318,33 @@ IS_INPUT_FILE = false
 - FILE_EXTENSIONS: 关联的文档的扩展名,多个扩展名用都好分隔。
 - RENDER_COMMAND: 工具的命令行命令及参数。
 - IS_INPUT_FILE: 输入方式是最后一个参数为文件路径还是从标准输入读取。
+- DISABLE_SANITIZER: **false** 如果为 true 则不过滤 HTML 标签和属性。除非你知道这意味着什么,否则不要设置为 true。
+
+以下两个环境变量将会被传递给渲染命令:
+
+- `GITEA_PREFIX_SRC`:包含当前的`src`路径的URL前缀,可以被用于链接的前缀。
+- `GITEA_PREFIX_RAW`:包含当前的`raw`路径的URL前缀,可以被用于图片的前缀。
+
+如果 `DISABLE_SANITIZER` 为 false,则 Gitea 支持自定义渲染 HTML 的净化策略。以下例子将用 pandoc 支持 KaTeX 输出。
+
+```ini
+[markup.sanitizer.TeX]
+; Pandoc renders TeX segments as <span>s with the "math" class, optionally
+; with "inline" or "display" classes depending on context.
+ELEMENT = span
+ALLOW_ATTR = class
+REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
+ALLOW_DATA_URI_IMAGES = true
+```
+
+- `ELEMENT`: 将要被应用到该策略的 HTML 元素,不能为空。
+- `ALLOW_ATTR`: 将要被应用到该策略的属性,不能为空。
+- `REGEXP`: 正则表达式,用来匹配属性的内容。如果为空,则跟属性内容无关。
+- `ALLOW_DATA_URI_IMAGES`: **false** 允许 data uri 图片 (`<img src="data:image/png;base64,..."/>`)。
+
+多个净化规则可以被同时定义,只要section名称最后一位不重复即可。如: `[markup.sanitizer.TeX-2]`。
+为了针对一种渲染类型进行一个特殊的净化策略,必须使用形如 `[markup.sanitizer.asciidoc.rule-1]` 的方式来命名 seciton。
+如果此规则没有匹配到任何渲染类型,它将会被应用到所有的渲染类型。
 
 ## Time (`time`)
 
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go
index de32c57a64..17c3fe6f4f 100644
--- a/modules/markup/csv/csv.go
+++ b/modules/markup/csv/csv.go
@@ -46,6 +46,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 	}
 }
 
+// SanitizerDisabled disabled sanitize if return true
+func (Renderer) SanitizerDisabled() bool {
+	return false
+}
+
 func writeField(w io.Writer, element, class, field string) error {
 	if _, err := io.WriteString(w, "<"); err != nil {
 		return err
diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go
index 3acb601067..4fdd4315bc 100644
--- a/modules/markup/external/external.go
+++ b/modules/markup/external/external.go
@@ -54,6 +54,11 @@ func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 	return p.MarkupSanitizerRules
 }
 
+// SanitizerDisabled disabled sanitize if return true
+func (p *Renderer) SanitizerDisabled() bool {
+	return p.DisableSanitizer
+}
+
 func envMark(envName string) string {
 	if runtime.GOOS == "windows" {
 		return "%" + envName + "%"
diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go
index b45b9c8b8a..320c2f7f82 100644
--- a/modules/markup/markdown/markdown.go
+++ b/modules/markup/markdown/markdown.go
@@ -221,6 +221,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 	return []setting.MarkupSanitizerRule{}
 }
 
+// SanitizerDisabled disabled sanitize if return true
+func (Renderer) SanitizerDisabled() bool {
+	return false
+}
+
 // Render implements markup.Renderer
 func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
 	return render(ctx, input, output)
diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go
index 8aa5f45ee2..2f394b992b 100644
--- a/modules/markup/orgmode/orgmode.go
+++ b/modules/markup/orgmode/orgmode.go
@@ -47,6 +47,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 	return []setting.MarkupSanitizerRule{}
 }
 
+// SanitizerDisabled disabled sanitize if return true
+func (Renderer) SanitizerDisabled() bool {
+	return false
+}
+
 // Render renders orgmode rawbytes to HTML
 func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
 	htmlWriter := org.NewHTMLWriter()
diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go
index 0ac0daaea9..cf8b9bace7 100644
--- a/modules/markup/renderer.go
+++ b/modules/markup/renderer.go
@@ -81,6 +81,7 @@ type Renderer interface {
 	Extensions() []string
 	NeedPostProcess() bool
 	SanitizerRules() []setting.MarkupSanitizerRule
+	SanitizerDisabled() bool
 	Render(ctx *RenderContext, input io.Reader, output io.Writer) error
 }
 
@@ -127,6 +128,12 @@ func RenderString(ctx *RenderContext, content string) (string, error) {
 	return buf.String(), nil
 }
 
+type nopCloser struct {
+	io.Writer
+}
+
+func (nopCloser) Close() error { return nil }
+
 func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error {
 	var wg sync.WaitGroup
 	var err error
@@ -136,18 +143,25 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr
 		_ = pw.Close()
 	}()
 
-	pr2, pw2 := io.Pipe()
-	defer func() {
-		_ = pr2.Close()
-		_ = pw2.Close()
-	}()
+	var pr2 io.ReadCloser
+	var pw2 io.WriteCloser
 
-	wg.Add(1)
-	go func() {
-		err = SanitizeReader(pr2, renderer.Name(), output)
-		_ = pr2.Close()
-		wg.Done()
-	}()
+	if !renderer.SanitizerDisabled() {
+		pr2, pw2 = io.Pipe()
+		defer func() {
+			_ = pr2.Close()
+			_ = pw2.Close()
+		}()
+
+		wg.Add(1)
+		go func() {
+			err = SanitizeReader(pr2, renderer.Name(), output)
+			_ = pr2.Close()
+			wg.Done()
+		}()
+	} else {
+		pw2 = nopCloser{output}
+	}
 
 	wg.Add(1)
 	go func() {
diff --git a/modules/setting/markup.go b/modules/setting/markup.go
index 09b86b9b1a..5fb6af6838 100644
--- a/modules/setting/markup.go
+++ b/modules/setting/markup.go
@@ -29,6 +29,7 @@ type MarkupRenderer struct {
 	IsInputFile          bool
 	NeedPostProcess      bool
 	MarkupSanitizerRules []MarkupSanitizerRule
+	DisableSanitizer     bool
 }
 
 // MarkupSanitizerRule defines the policy for whitelisting attributes on
@@ -144,11 +145,12 @@ func newMarkupRenderer(name string, sec *ini.Section) {
 	}
 
 	ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{
-		Enabled:         sec.Key("ENABLED").MustBool(false),
-		MarkupName:      name,
-		FileExtensions:  exts,
-		Command:         command,
-		IsInputFile:     sec.Key("IS_INPUT_FILE").MustBool(false),
-		NeedPostProcess: sec.Key("NEED_POSTPROCESS").MustBool(true),
+		Enabled:          sec.Key("ENABLED").MustBool(false),
+		MarkupName:       name,
+		FileExtensions:   exts,
+		Command:          command,
+		IsInputFile:      sec.Key("IS_INPUT_FILE").MustBool(false),
+		NeedPostProcess:  sec.Key("NEED_POSTPROCESS").MustBool(true),
+		DisableSanitizer: sec.Key("DISABLE_SANITIZER").MustBool(false),
 	})
 }