From 87141b908d4a03ce27af3ce042dc417da925b84f Mon Sep 17 00:00:00 2001
From: wxiaoguang <wxiaoguang@gmail.com>
Date: Sun, 23 Jan 2022 20:19:49 +0800
Subject: [PATCH] Fix mime-type detection for HTTP server (#18370)

Bypass the unstable behavior of Golang's mime.TypeByExtension
---
 modules/public/mime_types.go                  | 41 +++++++++++++++++++
 modules/public/public.go                      | 11 +++++
 .../public/{dynamic.go => serve_dynamic.go}   |  0
 modules/public/{static.go => serve_static.go} | 29 ++++---------
 4 files changed, 61 insertions(+), 20 deletions(-)
 create mode 100644 modules/public/mime_types.go
 rename modules/public/{dynamic.go => serve_dynamic.go} (100%)
 rename modules/public/{static.go => serve_static.go} (68%)

diff --git a/modules/public/mime_types.go b/modules/public/mime_types.go
new file mode 100644
index 0000000000..f8c92e824f
--- /dev/null
+++ b/modules/public/mime_types.go
@@ -0,0 +1,41 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package public
+
+import "strings"
+
+// wellKnownMimeTypesLower comes from Golang's builtin mime package: `builtinTypesLower`, see the comment of detectWellKnownMimeType
+var wellKnownMimeTypesLower = map[string]string{
+	".avif": "image/avif",
+	".css":  "text/css; charset=utf-8",
+	".gif":  "image/gif",
+	".htm":  "text/html; charset=utf-8",
+	".html": "text/html; charset=utf-8",
+	".jpeg": "image/jpeg",
+	".jpg":  "image/jpeg",
+	".js":   "text/javascript; charset=utf-8",
+	".json": "application/json",
+	".mjs":  "text/javascript; charset=utf-8",
+	".pdf":  "application/pdf",
+	".png":  "image/png",
+	".svg":  "image/svg+xml",
+	".wasm": "application/wasm",
+	".webp": "image/webp",
+	".xml":  "text/xml; charset=utf-8",
+
+	// well, there are some types missing from the builtin list
+	".txt": "text/plain; charset=utf-8",
+}
+
+// detectWellKnownMimeType will return the mime-type for a well-known file ext name
+// The purpose of this function is to bypass the unstable behavior of Golang's mime.TypeByExtension
+// mime.TypeByExtension would use OS's mime-type config to overwrite the well-known types (see its document).
+// If the user's OS has incorrect mime-type config, it would make Gitea can not respond a correct Content-Type to browsers.
+// For example, if Gitea returns `text/plain` for a `.js` file, the browser couldn't run the JS due to security reasons.
+// detectWellKnownMimeType makes the Content-Type for well-known files stable.
+func detectWellKnownMimeType(ext string) string {
+	ext = strings.ToLower(ext)
+	return wellKnownMimeTypesLower[ext]
+}
diff --git a/modules/public/public.go b/modules/public/public.go
index 91ecf42a3c..7804e945e7 100644
--- a/modules/public/public.go
+++ b/modules/public/public.go
@@ -92,6 +92,15 @@ func parseAcceptEncoding(val string) map[string]bool {
 	return types
 }
 
+// setWellKnownContentType will set the Content-Type if the file is a well-known type.
+// See the comments of detectWellKnownMimeType
+func setWellKnownContentType(w http.ResponseWriter, file string) {
+	mimeType := detectWellKnownMimeType(filepath.Ext(file))
+	if mimeType != "" {
+		w.Header().Set("Content-Type", mimeType)
+	}
+}
+
 func (opts *Options) handle(w http.ResponseWriter, req *http.Request, fs http.FileSystem, file string) bool {
 	// use clean to keep the file is a valid path with no . or ..
 	f, err := fs.Open(path.Clean(file))
@@ -122,6 +131,8 @@ func (opts *Options) handle(w http.ResponseWriter, req *http.Request, fs http.Fi
 		return true
 	}
 
+	setWellKnownContentType(w, file)
+
 	serveContent(w, req, fi, fi.ModTime(), f)
 	return true
 }
diff --git a/modules/public/dynamic.go b/modules/public/serve_dynamic.go
similarity index 100%
rename from modules/public/dynamic.go
rename to modules/public/serve_dynamic.go
diff --git a/modules/public/static.go b/modules/public/serve_static.go
similarity index 68%
rename from modules/public/static.go
rename to modules/public/serve_static.go
index d373c712ee..8e82175e39 100644
--- a/modules/public/static.go
+++ b/modules/public/serve_static.go
@@ -9,15 +9,12 @@ package public
 
 import (
 	"bytes"
-	"compress/gzip"
 	"io"
-	"mime"
 	"net/http"
 	"os"
 	"path/filepath"
 	"time"
 
-	"code.gitea.io/gitea/modules/log"
 	"code.gitea.io/gitea/modules/timeutil"
 )
 
@@ -66,24 +63,16 @@ func serveContent(w http.ResponseWriter, req *http.Request, fi os.FileInfo, modt
 	encodings := parseAcceptEncoding(req.Header.Get("Accept-Encoding"))
 	if encodings["gzip"] {
 		if cf, ok := fi.(*vfsgen۰CompressedFileInfo); ok {
-			rd := bytes.NewReader(cf.GzipBytes())
-			w.Header().Set("Content-Encoding", "gzip")
-			ctype := mime.TypeByExtension(filepath.Ext(fi.Name()))
-			if ctype == "" {
-				// read a chunk to decide between utf-8 text and binary
-				var buf [512]byte
-				grd, _ := gzip.NewReader(rd)
-				n, _ := io.ReadFull(grd, buf[:])
-				ctype = http.DetectContentType(buf[:n])
-				_, err := rd.Seek(0, io.SeekStart) // rewind to output whole file
-				if err != nil {
-					log.Error("rd.Seek error: %v", err)
-					http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
-					return
-				}
+			rdGzip := bytes.NewReader(cf.GzipBytes())
+			// all static files are managed by Gitea, so we can make sure every file has the correct ext name
+			// then we can get the correct Content-Type, we do not need to do http.DetectContentType on the decompressed data
+			mimeType := detectWellKnownMimeType(filepath.Ext(fi.Name()))
+			if mimeType == "" {
+				mimeType = "application/octet-stream"
 			}
-			w.Header().Set("Content-Type", ctype)
-			http.ServeContent(w, req, fi.Name(), modtime, rd)
+			w.Header().Set("Content-Type", mimeType)
+			w.Header().Set("Content-Encoding", "gzip")
+			http.ServeContent(w, req, fi.Name(), modtime, rdGzip)
 			return
 		}
 	}