// Package gitaly provides a client for interacting with GitLab's Gitaly service.
// It implements functionality for retrieving repository data, tracking file changes,
// and efficiently processing Git blobs for indexing purposes. The package handles
// both SHA1 and SHA256 hash formats, manages repository connections via gRPC,
// and provides optimized methods for batch processing of file changes between
// different Git revisions.
package gitaly

import (
	"context"
	"errors"
	"fmt"
	"io"
	"log/slog"
	"time"

	gitalyauth "gitlab.com/gitlab-org/gitaly/v16/auth"
	gitalyclient "gitlab.com/gitlab-org/gitaly/v16/client"
	pb "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
	grpccorrelation "gitlab.com/gitlab-org/labkit/correlation/grpc"
	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials"
)

const (
	NullTreeSHA           = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" // SHA1("tree 0\0")
	ZeroSHA               = "0000000000000000000000000000000000000000"
	NullTreeSHA256        = "6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321" // SHA256("tree 0\0")
	ZeroSHA256            = "0000000000000000000000000000000000000000000000000000000000000000"
	FormatSha256          = "OBJECT_FORMAT_SHA256"
	ClientName            = "gitlab-elasticsearch-indexer"
	SubmoduleFileMode     = 0160000
	DefaultIndexBatchSize = 128 // With higher batch size beyond 10k you might get this type of error rpc error: code = Internal desc = processing blobs: rev-list: starting process argument list too long, stderr: \"\""
)

func IsBlankSHA(fromSHA string) bool {
	return fromSHA == "" ||
		fromSHA == ZeroSHA ||
		fromSHA == ZeroSHA256 ||
		fromSHA == NullTreeSHA ||
		fromSHA == NullTreeSHA256
}

type StorageConfig struct {
	Address      string `json:"address"`
	Token        string `json:"token"`
	StorageName  string `json:"storage"`
	RelativePath string `json:"relative_path"`
	ProjectPath  string `json:"project_path"`
	TokenVersion int    `json:"token_version"`
}

type GitalyReader interface {
	EachFileChangeBatched(ctx context.Context, bulkPut BulkPutFunc, bulkDel BulkDelFunc, indexBatchSize int) error
	Close() error
}

type GitalyClient struct {
	conn                    *grpc.ClientConn
	repository              *pb.Repository
	blobServiceClient       pb.BlobServiceClient
	diffServiceClient       pb.DiffServiceClient
	repositoryServiceClient pb.RepositoryServiceClient
	refServiceClient        pb.RefServiceClient
	commitServiceClient     pb.CommitServiceClient
	FromHash                string
	ToHash                  string
	limitFileSize           int64
}

type File struct {
	Path     string
	Content  []byte
	Oid      string
	Size     int64
	TooLarge bool
}

type BulkPutFunc func(ctx context.Context, files []File) error
type BulkDelFunc func(ctx context.Context, paths []string) error

type Signature struct {
	Name  string
	Email string
	When  time.Time
}

type Commit struct {
	Author    Signature
	Committer Signature
	Message   string
	Hash      string
}

type CommitFunc func(commit *Commit) error

func NewGitalyClient(ctx context.Context, config *StorageConfig, projectID uint64, fromSHA string, toSHA string, limitFileSize int64) (*GitalyClient, error) {
	var RPCCred credentials.PerRPCCredentials
	if config.TokenVersion == 0 || config.TokenVersion == 2 {
		RPCCred = gitalyauth.RPCCredentialsV2(config.Token)
	} else {
		return nil, errors.New("unknown token version")
	}

	connOpts := append(
		gitalyclient.DefaultDialOpts,
		grpc.WithPerRPCCredentials(RPCCred),
		grpc.WithStreamInterceptor(
			grpccorrelation.StreamClientCorrelationInterceptor(
				grpccorrelation.WithClientName(ClientName),
			),
		),
		grpc.WithUnaryInterceptor(
			grpccorrelation.UnaryClientCorrelationInterceptor(
				grpccorrelation.WithClientName(ClientName),
			),
		),
	)

	conn, err := gitalyclient.Dial(config.Address, connOpts)
	if err != nil {
		return nil, fmt.Errorf("did not connect: %w", err)
	}

	repository := &pb.Repository{
		StorageName:   config.StorageName,
		RelativePath:  config.RelativePath,
		GlProjectPath: config.ProjectPath,
		GlRepository:  fmt.Sprint(projectID),
	}

	client := &GitalyClient{
		conn:                    conn,
		repository:              repository,
		blobServiceClient:       pb.NewBlobServiceClient(conn),
		diffServiceClient:       pb.NewDiffServiceClient(conn),
		repositoryServiceClient: pb.NewRepositoryServiceClient(conn),
		refServiceClient:        pb.NewRefServiceClient(conn),
		commitServiceClient:     pb.NewCommitServiceClient(conn),
		limitFileSize:           limitFileSize,
	}

	err = client.setFromHash(ctx, fromSHA)
	if err != nil {
		return nil, fmt.Errorf("setFromHash %w", err)
	}
	err = client.setToHash(ctx, toSHA)
	if err != nil {
		return nil, fmt.Errorf("setToHash %w", err)
	}

	return client, nil
}

func (gc *GitalyClient) setFromHash(ctx context.Context, fromSHA string) error {
	switch fromSHA {
	case "":
		nullSha, err := gc.getDefaultSHAFromHash(ctx)
		if err != nil {
			return fmt.Errorf("getDefaultSHAFromHash: %w", err)
		}
		gc.FromHash = nullSha
	case ZeroSHA:
		gc.FromHash = NullTreeSHA
	case ZeroSHA256:
		gc.FromHash = NullTreeSHA256
	default:
		gc.FromHash = fromSHA
	}

	return nil
}

func (gc *GitalyClient) setToHash(ctx context.Context, toSHA string) error {
	if toSHA == "" {
		latestSHA, err := gc.getLatestSHA(ctx)
		if err != nil {
			return fmt.Errorf("getLatestSHA: %w", err)
		}
		gc.ToHash = latestSHA
	} else {
		gc.ToHash = toSHA
	}

	return nil
}

func (gc *GitalyClient) getDefaultSHAFromHash(ctx context.Context) (string, error) {
	request := &pb.ObjectFormatRequest{Repository: gc.repository}

	response, err := gc.repositoryServiceClient.ObjectFormat(ctx, request)
	if err != nil {
		return "", fmt.Errorf("could not call rpc.ObjectFormat: %w", err)
	}
	if response.Format.String() == FormatSha256 {
		return NullTreeSHA256, nil
	}
	return NullTreeSHA, nil
}

func (gc *GitalyClient) getLatestSHA(ctx context.Context) (string, error) {
	defaultBranchName, err := gc.findDefaultBranchName(ctx)
	if err != nil {
		return "", err
	}

	request := &pb.FindCommitRequest{
		Repository: gc.repository,
		Revision:   defaultBranchName,
	}

	response, err := gc.commitServiceClient.FindCommit(ctx, request)
	if err != nil {
		return "", fmt.Errorf("cannot look up HEAD: %w", err)
	}
	return response.Commit.Id, nil
}

func (gc *GitalyClient) findDefaultBranchName(ctx context.Context) ([]byte, error) {
	request := &pb.FindDefaultBranchNameRequest{
		Repository: gc.repository,
	}

	response, err := gc.refServiceClient.FindDefaultBranchName(ctx, request)
	if err != nil {
		return nil, fmt.Errorf("cannot find a default branch: %w", err)
	}
	return response.Name, nil
}

func (gc *GitalyClient) Close() error {
	err := gc.conn.Close()
	if err != nil {
		return fmt.Errorf("error closing gitaly client: %w", err)
	}

	return nil
}

// From the Zoekt Indexer, we originally had an EachFileChange function with `put` and `del` callback functions for a singular path or file.
// Here, we have an EachFileChangeBatched function which allows a `bulkPut` and `bulkDel` callbacks for multiple deleted paths or changed files.
// If, later on, we need a public EachFileChange with non-bulk `put` and `del` callback functions, consider refactoring this code.
func (gc *GitalyClient) EachFileChangeBatched(ctx context.Context, bulkPut BulkPutFunc, bulkDel BulkDelFunc, indexBatchSize int) error {
	// Gather the changed and deleted files
	deletedPaths, updatedPathsByBlobId, err := gc.getChangedPaths(ctx)
	if err != nil {
		return fmt.Errorf("getChangedPaths %w", err)
	}

	// Call the bulk deletion function
	err = bulkDel(ctx, deletedPaths)
	if err != nil {
		return fmt.Errorf("bulkDel %w", err)
	}

	// Call the bulk put function by batch
	if indexBatchSize == 0 {
		indexBatchSize = DefaultIndexBatchSize
	}
	revisions := make([]string, 0, indexBatchSize)
	for blobID := range updatedPathsByBlobId {
		revisions = append(revisions, blobID)
		if len(revisions) == indexBatchSize {
			files, err := gc.getFilesByPathAndBlobId(ctx, revisions, updatedPathsByBlobId)
			if err != nil {
				return fmt.Errorf("getFilesByPathAndBlobId: %w", err)
			}

			err = bulkPut(ctx, files)
			if err != nil {
				return fmt.Errorf("bulkPut: %w", err)
			}

			// reset the revisions
			revisions = revisions[:0]
		}
	}
	// Call the bulk put function for the last remaining batch
	if len(revisions) > 0 {
		files, err := gc.getFilesByPathAndBlobId(ctx, revisions, updatedPathsByBlobId)
		if err != nil {
			return fmt.Errorf("getFilesByPathAndBlobId: %w", err)
		}

		err = bulkPut(ctx, files)
		if err != nil {
			return fmt.Errorf("bulkPut: %w", err)
		}
	}

	return nil
}

func (gc *GitalyClient) getChangedPaths(ctx context.Context) ([]string, map[string][]string, error) {
	request := &pb.FindChangedPathsRequest{
		Repository: gc.repository,
		Requests: []*pb.FindChangedPathsRequest_Request{{
			Type: &pb.FindChangedPathsRequest_Request_TreeRequest_{
				TreeRequest: &pb.FindChangedPathsRequest_Request_TreeRequest{
					LeftTreeRevision:  gc.FromHash,
					RightTreeRevision: gc.ToHash,
				},
			},
		}},
	}

	ctxWithCancel, cancel := context.WithCancel(ctx)
	defer cancel()
	stream, err := gc.diffServiceClient.FindChangedPaths(ctxWithCancel, request)
	if err != nil {
		return nil, nil, fmt.Errorf("find changed paths: %w", err)
	}

	updatedPathsByBlobId := map[string][]string{}
	deletedPaths := []string{}

	for {
		c, errFindChangedPathsResp := stream.Recv()
		if errFindChangedPathsResp == io.EOF { //nolint:errorlint
			break
		}
		if errFindChangedPathsResp != nil {
			return nil, nil, fmt.Errorf("recv: %w", errFindChangedPathsResp)
		}
		for _, change := range c.Paths {
			// We skip submodules from indexing now just to mirror the go-git
			// implementation but it can be not that expensive to implement with gitaly actually so some
			// investigation is required here
			if change.OldMode == SubmoduleFileMode || change.NewMode == SubmoduleFileMode {
				continue
			}

			switch change.GetStatus() {
			case pb.ChangedPaths_DELETED:
				deletedPaths = append(deletedPaths, string(change.Path))
			case pb.ChangedPaths_RENAMED:
				deletedPaths = append(deletedPaths, string(change.OldPath))

				// Fallthrough to index the blob at its new path.
				fallthrough
			case pb.ChangedPaths_ADDED, pb.ChangedPaths_MODIFIED, pb.ChangedPaths_COPIED:
				updatedPathsByBlobId[change.NewBlobId] = append(updatedPathsByBlobId[change.NewBlobId], string(change.Path))
			case pb.ChangedPaths_TYPE_CHANGE:
				slog.Warn("status is not supported to perform indexing", "status", change.GetStatus(), "repoId", gc.repository.GlRepository)
			default:
				slog.Warn("status is not supported to perform indexing", "status", change.GetStatus(), "repoId", gc.repository.GlRepository)
			}
		}
	}

	return deletedPaths, updatedPathsByBlobId, nil
}

func (gc *GitalyClient) getFilesByPathAndBlobId(ctx context.Context, revisions []string, pathsByBlobID map[string][]string) ([]File, error) {
	listBlobsRequest := &pb.ListBlobsRequest{
		Repository: gc.repository,
		Revisions:  revisions,
		BytesLimit: gc.limitFileSize,
	}

	ctxWithCancel, cancel := context.WithCancel(ctx)
	defer cancel()
	blobsStream, err := gc.blobServiceClient.ListBlobs(ctxWithCancel, listBlobsRequest)
	if err != nil {
		return nil, fmt.Errorf("ListBlobs: %w", err)
	}

	streamStart := true
	var data []byte
	var oid string
	var size int64

	files := []File{}

	for {
		listblobsResponse, err := blobsStream.Recv()
		if err == io.EOF { //nolint:errorlint
			files = append(files, gc.buildFilesForOid(pathsByBlobID[oid], oid, data, size)...)
			break
		}
		if err != nil {
			return nil, fmt.Errorf("blobsStream.Recv: %w", err)
		}

		for _, blob := range listblobsResponse.GetBlobs() {
			if !streamStart && blob.Oid != "" {
				files = append(files, gc.buildFilesForOid(pathsByBlobID[oid], oid, data, size)...)
				data = nil
			}

			streamStart = false
			data = append(data, blob.Data...)
			if blob.Oid != "" {
				oid = blob.Oid
				size = blob.Size
			}
		}
	}

	return files, nil
}

func (gc *GitalyClient) buildFilesForOid(paths []string, oid string, data []byte, size int64) []File {
	files := []File{}

	for _, path := range paths {
		files = append(files, File{
			Path:     path,
			Oid:      oid,
			Content:  data,
			Size:     size,
			TooLarge: size > gc.limitFileSize,
		})
	}

	return files
}

func (gc *GitalyClient) EachCommit(f CommitFunc) error {
	ctx := context.Background()
	request := &pb.ListCommitsRequest{
		Repository: gc.repository,
		Revisions: []string{
			"^" + gc.FromHash,
			gc.ToHash,
		},
		Reverse: true,
	}

	ctxWithCancel, cancel := context.WithCancel(ctx)
	defer cancel()
	stream, err := gc.commitServiceClient.ListCommits(ctxWithCancel, request)
	if err != nil {
		return fmt.Errorf("could not call rpc.ListCommits: %w", err)
	}

	for {
		c, err := stream.Recv()
		if errors.Is(err, io.EOF) {
			break
		}
		if err != nil {
			return fmt.Errorf("error calling rpc.ListCommits: %w", err)
		}
		for _, cmt := range c.Commits {
			commit := &Commit{
				Message:   string(cmt.Body),
				Hash:      cmt.Id,
				Author:    buildSignature(cmt.Author),
				Committer: buildSignature(cmt.Committer),
			}

			slog.Debug("Indexing commit", "commitID", cmt.Id)

			if err := f(commit); err != nil {
				return err
			}
		}
	}
	return nil
}

func (gc *GitalyClient) GetLimitFileSize() int64 {
	return gc.limitFileSize
}

func (gc *GitalyClient) GetFromHash() string {
	return gc.FromHash
}

func (gc *GitalyClient) GetToHash() string {
	return gc.ToHash
}

func buildSignature(ca *pb.CommitAuthor) Signature {
	return Signature{
		Name:  string(ca.Name),
		Email: string(ca.Email),
		When:  time.Unix(ca.Date.GetSeconds(), 0),
	}
}
