From df9612bb535c0f2b4540641ce2fba2c3d65de284 Mon Sep 17 00:00:00 2001
From: qwerty287 <80460567+qwerty287@users.noreply.github.com>
Date: Sat, 4 Jun 2022 15:17:53 +0200
Subject: [PATCH] Add API to serve blob or LFS file content (#19689)

* Add LFS API

* Update routers/api/v1/repo/file.go

Co-authored-by: Gusted <williamzijl7@hotmail.com>

* Apply suggestions

* Apply suggestions

* Update routers/api/v1/repo/file.go

Co-authored-by: Gusted <williamzijl7@hotmail.com>

* Report errors

* ADd test

* Use own repo for test

* Use different repo name

* Improve handling

* Slight restructures

1. Avoid reading the blob data multiple times
2. Ensure that caching is only checked when about to serve the blob/lfs
3. Avoid nesting by returning early
4. Make log message a bit more clear
5. Ensure that the dataRc is closed by defer when passed to ServeData

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: Gusted <williamzijl7@hotmail.com>
Co-authored-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
---
 integrations/api_repo_file_get_test.go |  56 ++++++++++
 routers/api/v1/api.go                  |   1 +
 routers/api/v1/repo/file.go            | 142 +++++++++++++++++++++++++
 templates/swagger/v1_json.tmpl         |  46 ++++++++
 4 files changed, 245 insertions(+)
 create mode 100644 integrations/api_repo_file_get_test.go

diff --git a/integrations/api_repo_file_get_test.go b/integrations/api_repo_file_get_test.go
new file mode 100644
index 0000000000..8d1c4c4bcf
--- /dev/null
+++ b/integrations/api_repo_file_get_test.go
@@ -0,0 +1,56 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package integrations
+
+import (
+	"net/http"
+	"net/url"
+	"os"
+	"testing"
+
+	api "code.gitea.io/gitea/modules/structs"
+	"code.gitea.io/gitea/modules/util"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestAPIGetRawFileOrLFS(t *testing.T) {
+	defer prepareTestEnv(t)()
+
+	// Test with raw file
+	req := NewRequest(t, "GET", "/api/v1/repos/user2/repo1/media/README.md")
+	resp := MakeRequest(t, req, http.StatusOK)
+	assert.Equal(t, "# repo1\n\nDescription for repo1", resp.Body.String())
+
+	// Test with LFS
+	onGiteaRun(t, func(t *testing.T, u *url.URL) {
+		httpContext := NewAPITestContext(t, "user2", "repo-lfs-test")
+		doAPICreateRepository(httpContext, false, func(t *testing.T, repository api.Repository) {
+			u.Path = httpContext.GitPath()
+			dstPath, err := os.MkdirTemp("", httpContext.Reponame)
+			assert.NoError(t, err)
+			defer util.RemoveAll(dstPath)
+
+			u.Path = httpContext.GitPath()
+			u.User = url.UserPassword("user2", userPassword)
+
+			t.Run("Clone", doGitClone(dstPath, u))
+
+			dstPath2, err := os.MkdirTemp("", httpContext.Reponame)
+			assert.NoError(t, err)
+			defer util.RemoveAll(dstPath2)
+
+			t.Run("Partial Clone", doPartialGitClone(dstPath2, u))
+
+			lfs, _ := lfsCommitAndPushTest(t, dstPath)
+
+			reqLFS := NewRequest(t, "GET", "/api/v1/repos/user2/repo1/media/"+lfs)
+			respLFS := MakeRequestNilResponseRecorder(t, reqLFS, http.StatusOK)
+			assert.Equal(t, littleSize, respLFS.Length)
+
+			doAPIDeleteRepository(httpContext)
+		})
+	})
+}
diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go
index 62c4a8934c..1492ef07a7 100644
--- a/routers/api/v1/api.go
+++ b/routers/api/v1/api.go
@@ -826,6 +826,7 @@ func Routes() *web.Route {
 						Delete(reqAdmin(), repo.DeleteTeam)
 				}, reqToken())
 				m.Get("/raw/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFile)
+				m.Get("/media/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFileOrLFS)
 				m.Get("/archive/*", reqRepoReader(unit.TypeCode), repo.GetArchive)
 				m.Combo("/forks").Get(repo.ListForks).
 					Post(reqToken(), reqRepoReader(unit.TypeCode), bind(api.CreateForkOption{}), repo.CreateFork)
diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go
index 1fdf70c13a..ab337e66e3 100644
--- a/routers/api/v1/repo/file.go
+++ b/routers/api/v1/repo/file.go
@@ -6,8 +6,10 @@
 package repo
 
 import (
+	"bytes"
 	"encoding/base64"
 	"fmt"
+	"io"
 	"net/http"
 	"path"
 	"time"
@@ -18,7 +20,11 @@ import (
 	"code.gitea.io/gitea/modules/cache"
 	"code.gitea.io/gitea/modules/context"
 	"code.gitea.io/gitea/modules/git"
+	"code.gitea.io/gitea/modules/httpcache"
+	"code.gitea.io/gitea/modules/lfs"
+	"code.gitea.io/gitea/modules/log"
 	"code.gitea.io/gitea/modules/setting"
+	"code.gitea.io/gitea/modules/storage"
 	api "code.gitea.io/gitea/modules/structs"
 	"code.gitea.io/gitea/modules/web"
 	"code.gitea.io/gitea/routers/common"
@@ -75,6 +81,142 @@ func GetRawFile(ctx *context.APIContext) {
 	}
 }
 
+// GetRawFileOrLFS get a file by repo's path, redirecting to LFS if necessary.
+func GetRawFileOrLFS(ctx *context.APIContext) {
+	// swagger:operation GET /repos/{owner}/{repo}/media/{filepath} repository repoGetRawFileOrLFS
+	// ---
+	// summary: Get a file or it's LFS object from a repository
+	// parameters:
+	// - name: owner
+	//   in: path
+	//   description: owner of the repo
+	//   type: string
+	//   required: true
+	// - name: repo
+	//   in: path
+	//   description: name of the repo
+	//   type: string
+	//   required: true
+	// - name: filepath
+	//   in: path
+	//   description: filepath of the file to get
+	//   type: string
+	//   required: true
+	// - name: ref
+	//   in: query
+	//   description: "The name of the commit/branch/tag. Default the repository’s default branch (usually master)"
+	//   type: string
+	//   required: false
+	// responses:
+	//   200:
+	//     description: Returns raw file content.
+	//   "404":
+	//     "$ref": "#/responses/notFound"
+
+	if ctx.Repo.Repository.IsEmpty {
+		ctx.NotFound()
+		return
+	}
+
+	blob, lastModified := getBlobForEntry(ctx)
+	if ctx.Written() {
+		return
+	}
+
+	// LFS Pointer files are at most 1024 bytes - so any blob greater than 1024 bytes cannot be an LFS file
+	if blob.Size() > 1024 {
+		// First handle caching for the blob
+		if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
+			return
+		}
+
+		// OK not cached - serve!
+		if err := common.ServeBlob(ctx.Context, blob, lastModified); err != nil {
+			ctx.ServerError("ServeBlob", err)
+		}
+		return
+	}
+
+	// OK, now the blob is known to have at most 1024 bytes we can simply read this in in one go (This saves reading it twice)
+	dataRc, err := blob.DataAsync()
+	if err != nil {
+		ctx.ServerError("DataAsync", err)
+		return
+	}
+
+	buf, err := io.ReadAll(dataRc)
+	if err != nil {
+		_ = dataRc.Close()
+		ctx.ServerError("DataAsync", err)
+		return
+	}
+
+	if err := dataRc.Close(); err != nil {
+		log.Error("Error whilst closing blob %s reader in %-v. Error: %v", blob.ID, ctx.Context.Repo.Repository, err)
+	}
+
+	// Check if the blob represents a pointer
+	pointer, _ := lfs.ReadPointer(bytes.NewReader(buf))
+
+	// if its not a pointer just serve the data directly
+	if !pointer.IsValid() {
+		// First handle caching for the blob
+		if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
+			return
+		}
+
+		// OK not cached - serve!
+		if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
+			ctx.ServerError("ServeBlob", err)
+		}
+		return
+	}
+
+	// Now check if there is a meta object for this pointer
+	meta, err := models.GetLFSMetaObjectByOid(ctx.Repo.Repository.ID, pointer.Oid)
+
+	// If there isn't one just serve the data directly
+	if err == models.ErrLFSObjectNotExist {
+		// Handle caching for the blob SHA (not the LFS object OID)
+		if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
+			return
+		}
+
+		if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
+			ctx.ServerError("ServeBlob", err)
+		}
+		return
+	} else if err != nil {
+		ctx.ServerError("GetLFSMetaObjectByOid", err)
+		return
+	}
+
+	// Handle caching for the LFS object OID
+	if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) {
+		return
+	}
+
+	if setting.LFS.ServeDirect {
+		// If we have a signed url (S3, object storage), redirect to this directly.
+		u, err := storage.LFS.URL(pointer.RelativePath(), blob.Name())
+		if u != nil && err == nil {
+			ctx.Redirect(u.String())
+			return
+		}
+	}
+
+	lfsDataRc, err := lfs.ReadMetaObject(meta.Pointer)
+	if err != nil {
+		ctx.ServerError("ReadMetaObject", err)
+		return
+	}
+	defer lfsDataRc.Close()
+
+	if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, meta.Size, lfsDataRc); err != nil {
+		ctx.ServerError("ServeData", err)
+	}
+}
+
 func getBlobForEntry(ctx *context.APIContext) (blob *git.Blob, lastModified time.Time) {
 	entry, err := ctx.Repo.Commit.GetTreeEntryByPath(ctx.Repo.TreePath)
 	if err != nil {
diff --git a/templates/swagger/v1_json.tmpl b/templates/swagger/v1_json.tmpl
index d63cde60ec..c23bcb2e9a 100644
--- a/templates/swagger/v1_json.tmpl
+++ b/templates/swagger/v1_json.tmpl
@@ -7150,6 +7150,52 @@
         }
       }
     },
+    "/repos/{owner}/{repo}/media/{filepath}": {
+      "get": {
+        "tags": [
+          "repository"
+        ],
+        "summary": "Get a file or it's LFS object from a repository",
+        "operationId": "repoGetRawFileOrLFS",
+        "parameters": [
+          {
+            "type": "string",
+            "description": "owner of the repo",
+            "name": "owner",
+            "in": "path",
+            "required": true
+          },
+          {
+            "type": "string",
+            "description": "name of the repo",
+            "name": "repo",
+            "in": "path",
+            "required": true
+          },
+          {
+            "type": "string",
+            "description": "filepath of the file to get",
+            "name": "filepath",
+            "in": "path",
+            "required": true
+          },
+          {
+            "type": "string",
+            "description": "The name of the commit/branch/tag. Default the repository’s default branch (usually master)",
+            "name": "ref",
+            "in": "query"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Returns raw file content."
+          },
+          "404": {
+            "$ref": "#/responses/notFound"
+          }
+        }
+      }
+    },
     "/repos/{owner}/{repo}/milestones": {
       "get": {
         "produces": [