From 6203ae764a15b6d23125d616c9b72d9c940a301c Mon Sep 17 00:00:00 2001
From: Giteabot <teabot@gitea.io>
Date: Wed, 31 Jul 2024 23:06:37 +0800
Subject: [PATCH] Distinguish LFS object errors to ignore missing objects
 during migration (#31702) (#31745)

Backport #31702 by @wolfogre

Fix #31137.

Replace #31623 #31697.

When migrating LFS objects, if there's any object that failed (like some
objects are losted, which is not really critical), Gitea will stop
migrating LFS immediately but treat the migration as successful.

This PR checks the error according to the [LFS api
doc](https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md#successful-responses).

> LFS object error codes should match HTTP status codes where possible:
>
> - 404 - The object does not exist on the server.
> - 409 - The specified hash algorithm disagrees with the server's
acceptable options.
> - 410 - The object was removed by the owner.
> - 422 - Validation error.

If the error is `404`, it's safe to ignore it and continue migration.
Otherwise, stop the migration and mark it as failed to ensure data
integrity of LFS objects.

And maybe we should also ignore others errors (maybe `410`? I'm not sure
what's the difference between "does not exist" and "removed by the
owner".), we can add it later when some users report that they have
failed to migrate LFS because of an error which should be ignored.

Co-authored-by: Jason Song <i@wolfogre.com>
---
 modules/lfs/http_client.go     |  7 +++----
 modules/lfs/shared.go          | 37 ++++++++++++++++++++++++++++++++++
 modules/repository/repo.go     |  5 +++++
 services/repository/migrate.go |  1 +
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/modules/lfs/http_client.go b/modules/lfs/http_client.go
index f5ddd38b09..427f57df2d 100644
--- a/modules/lfs/http_client.go
+++ b/modules/lfs/http_client.go
@@ -136,14 +136,13 @@ func (c *HTTPClient) performOperation(ctx context.Context, objects []Pointer, dc
 
 	for _, object := range result.Objects {
 		if object.Error != nil {
-			objectError := errors.New(object.Error.Message)
-			log.Trace("Error on object %v: %v", object.Pointer, objectError)
+			log.Trace("Error on object %v: %v", object.Pointer, object.Error)
 			if uc != nil {
-				if _, err := uc(object.Pointer, objectError); err != nil {
+				if _, err := uc(object.Pointer, object.Error); err != nil {
 					return err
 				}
 			} else {
-				if err := dc(object.Pointer, nil, objectError); err != nil {
+				if err := dc(object.Pointer, nil, object.Error); err != nil {
 					return err
 				}
 			}
diff --git a/modules/lfs/shared.go b/modules/lfs/shared.go
index 80f4fed00d..aef9e456fc 100644
--- a/modules/lfs/shared.go
+++ b/modules/lfs/shared.go
@@ -4,7 +4,11 @@
 package lfs
 
 import (
+	"errors"
+	"fmt"
 	"time"
+
+	"code.gitea.io/gitea/modules/util"
 )
 
 const (
@@ -63,6 +67,39 @@ type ObjectError struct {
 	Message string `json:"message"`
 }
 
+var (
+	// See https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md#successful-responses
+	// LFS object error codes should match HTTP status codes where possible:
+	//   404 - The object does not exist on the server.
+	//   409 - The specified hash algorithm disagrees with the server's acceptable options.
+	//   410 - The object was removed by the owner.
+	//   422 - Validation error.
+
+	ErrObjectNotExist     = util.ErrNotExist // the object does not exist on the server
+	ErrObjectHashMismatch = errors.New("the specified hash algorithm disagrees with the server's acceptable options")
+	ErrObjectRemoved      = errors.New("the object was removed by the owner")
+	ErrObjectValidation   = errors.New("validation error")
+)
+
+func (e *ObjectError) Error() string {
+	return fmt.Sprintf("[%d] %s", e.Code, e.Message)
+}
+
+func (e *ObjectError) Unwrap() error {
+	switch e.Code {
+	case 404:
+		return ErrObjectNotExist
+	case 409:
+		return ErrObjectHashMismatch
+	case 410:
+		return ErrObjectRemoved
+	case 422:
+		return ErrObjectValidation
+	default:
+		return errors.New(e.Message)
+	}
+}
+
 // PointerBlob associates a Git blob with a Pointer.
 type PointerBlob struct {
 	Hash string
diff --git a/modules/repository/repo.go b/modules/repository/repo.go
index cb926084ba..3d1899b2fe 100644
--- a/modules/repository/repo.go
+++ b/modules/repository/repo.go
@@ -5,6 +5,7 @@ package repository
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"strings"
@@ -181,6 +182,10 @@ func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Re
 	downloadObjects := func(pointers []lfs.Pointer) error {
 		err := lfsClient.Download(ctx, pointers, func(p lfs.Pointer, content io.ReadCloser, objectError error) error {
 			if objectError != nil {
+				if errors.Is(objectError, lfs.ErrObjectNotExist) {
+					log.Warn("Repo[%-v]: Ignore missing LFS object %-v: %v", repo, p, objectError)
+					return nil
+				}
 				return objectError
 			}
 
diff --git a/services/repository/migrate.go b/services/repository/migrate.go
index df5cc67ae1..2e901791b4 100644
--- a/services/repository/migrate.go
+++ b/services/repository/migrate.go
@@ -169,6 +169,7 @@ func MigrateRepositoryGitData(ctx context.Context, u *user_model.User,
 			lfsClient := lfs.NewClient(endpoint, httpTransport)
 			if err = repo_module.StoreMissingLfsObjectsInRepository(ctx, repo, gitRepo, lfsClient); err != nil {
 				log.Error("Failed to store missing LFS objects for repository: %v", err)
+				return repo, fmt.Errorf("StoreMissingLfsObjectsInRepository: %w", err)
 			}
 		}
 	}