package chunk

import (
	"context"
	"encoding/json"
	"fmt"
	"log/slog"
	"strings"
	"time"

	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/chunker"
	codeChunker "gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/chunker/code"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/client/elasticsearch"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/indexer"
	elasticsearchIndexer "gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/indexer/elasticsearch"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/streamer"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/types"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/shared"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/shared/binary"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/shared/gitaly"
)

type OperationType string

const (
	OperationTypeIndex  OperationType = "index"
	OperationTypeDelete OperationType = "delete"
)

type Options struct {
	ProjectID       uint64                `json:"project_id"`
	Operation       OperationType         `json:"operation,omitempty"`
	FromSHA         string                `json:"from_sha"`
	ToSHA           string                `json:"to_sha"`
	CorrelationID   string                `json:"correlation_id"`
	SchemaVersion   uint16                `json:"schema_version"`
	ForceReindex    bool                  `json:"force_reindex"`
	ChunkSize       uint16                `json:"chunk_size"`
	ChunkOverlap    uint16                `json:"chunk_overlap"`
	ChunkStrategy   chunker.ChunkStrategy `json:"chunk_strategy"`
	GitalyConfig    gitaly.StorageConfig  `json:"gitaly_config"`
	Timeout         string                `json:"timeout"`
	GitalyBatchSize uint16                `json:"gitaly_batch_size"`
	PartitionName   string                `json:"partition_name"`
	PartitionNumber uint16                `json:"partition_number"`
	ElasticBulkSize uint16                `json:"elastic_bulk_size"`
}

// Create the chunker with the provided options
func createChunker(options Options) (chunker.Chunker, func(), error) {
	chunkOpts := chunker.ChunkOptions{
		ChunkSize:    options.ChunkSize,
		ChunkOverlap: options.ChunkOverlap,
	}

	switch options.ChunkStrategy {
	case "", chunker.ChunkStrategyCodeBytes:
		codeChunker, err := codeChunker.New(chunkOpts)
		if err != nil {
			return nil, nil, fmt.Errorf("failed to create CodeChunker: %w", err)
		}
		return codeChunker, func() {
			codeChunker.Close()
		}, nil
	case chunker.ChunkStrategyCodePreBert:
		chunker, err := codeChunker.NewPreBert(chunkOpts)
		if err != nil {
			return nil, nil, fmt.Errorf("failed to create code pre-bert chunker: %w", err)
		}
		return chunker, func() {
			chunker.Close()
		}, nil
	default:
		return nil, nil, fmt.Errorf("unknown chunking strategy: %q", options.ChunkStrategy)
	}
}

// Create the vector store indexer based on the specified connection type
func createVectorStoreIndexer(ctx context.Context, conn types.Connection, options Options) (indexer.IndexingStrategy, error) {
	var indexingStrategy indexer.IndexingStrategy
	var err error

	switch conn.GetAdapterType() {
	case types.ElasticsearchAdapter:
		esConn := conn.(types.ElasticsearchConnection)
		esClient := elasticsearch.New(esConn)

		if err := esClient.Connect(ctx); err != nil {
			return nil, fmt.Errorf("failed to connect to elasticsearch: %w", err)
		}

		indexName := fmt.Sprintf("%s_%d", options.PartitionName, options.PartitionNumber)

		indexingStrategy, err = elasticsearchIndexer.New(esClient, indexName, int(options.ElasticBulkSize), options.ForceReindex)
		if err != nil {
			return nil, fmt.Errorf("failed to create elasticsearch indexer: %w", err)
		}

	case types.PostgreSQLAdapter:
		// TODO: Implement PostgreSQL indexing strategy https://gitlab.com/gitlab-org/gitlab/-/issues/545483
		return nil, fmt.Errorf("indexing not implemented for adapter: %s", conn.GetAdapterType())

	case types.OpenSearchAdapter:
		// TODO: Implement OpenSearch indexing strategy https://gitlab.com/gitlab-org/gitlab/-/issues/545483
		return nil, fmt.Errorf("indexing not implemented for adapter: %s", conn.GetAdapterType())

	default:
		return nil, fmt.Errorf("unknown adapter: %s", conn.GetAdapterType())
	}

	return indexingStrategy, nil
}

// Run executes the chunk mode with the provided arguments and options
func Run(buildOpts shared.BuildOpts) error {
	cmdOpts, err := parseFlags()
	if err != nil {
		return err
	}

	slog.Debug("parsing options", "optionsJSON", cmdOpts.OptionsJSON)
	options := Options{}
	decoder := json.NewDecoder(strings.NewReader(cmdOpts.OptionsJSON))
	decoder.DisallowUnknownFields()
	if err := decoder.Decode(&options); err != nil {
		return fmt.Errorf("failed to parse options: %w", err)
	}

	// Default to index operation if not specified
	if options.Operation == "" {
		options.Operation = OperationTypeIndex
	}

	timeout, err := time.ParseDuration(options.Timeout)
	if err != nil {
		return fmt.Errorf("failed to parse Timeout: %v with error %w", options.Timeout, err)
	}

	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()

	slog.Debug("created ctx", "ctx", ctx)

	// Parse connection parameters
	conn, err := parseConnection(cmdOpts.AdapterType, cmdOpts.ConnectionJSON)
	if err != nil {
		return err
	}

	// Create the stdout streamer
	stdoutStreamer := streamer.NewStdout()

	// Stream version info
	if err = stdoutStreamer.StreamSingle(&streamer.IndexerVersionInfo{
		Version:   buildOpts.Version,
		BuildTime: buildOpts.BuildTime,
	}); err != nil {
		slog.Error("failed to stream indexer version info", "error", err)
		return nil
	}

	// Create the vector store indexer based on adapter type
	vectorStoreIndexer, err := createVectorStoreIndexer(ctx, conn, options)
	if err != nil {
		return err
	}

	switch options.Operation {
	case OperationTypeDelete:
		slog.Info("deleting project", "project_id", options.ProjectID)

		// Delete project
		if err := vectorStoreIndexer.Delete(ctx, options.ProjectID); err != nil {
			return fmt.Errorf("failed to delete project: %w", err)
		}

	case OperationTypeIndex:
		// Create code chunker
		chunker, cleanupChunker, err := createChunker(options)
		if err != nil {
			return err
		}
		defer cleanupChunker()

		// Initiate classes for reading git files
		limitFileSize := int64(1024 * 1024)
		byteConverter, err := types.NewByteConverter(limitFileSize)
		if err != nil {
			return fmt.Errorf("NewByteConverter %w", err)
		}
		var gitalyClient gitaly.GitalyReader
		gitalyClient, err = gitaly.NewGitalyClient(ctx, &options.GitalyConfig, options.ProjectID, options.FromSHA, options.ToSHA, limitFileSize)
		if err != nil {
			return fmt.Errorf("NewGitalyClient %w", err)
		}

		// Create the chunk mode indexer
		chunkModeIndexer := NewChunkIndexer(options, stdoutStreamer, chunker, byteConverter, gitalyClient, vectorStoreIndexer)

		// Ensure gitaly client is closed
		defer func() {
			if err := gitalyClient.Close(); err != nil {
				slog.Error("failed to close gitaly client", "error", err)
			}
		}()

		// Start indexing
		if err = chunkModeIndexer.PerformIndexing(ctx); err != nil {
			return err
		}

	default:
		return fmt.Errorf("unknown operation: %s", options.Operation)
	}

	// Close common resources
	if err = vectorStoreIndexer.Close(ctx); err != nil {
		slog.Error("failed to close vector store indexer", "error", err)
	}
	if err = stdoutStreamer.Close(); err != nil {
		slog.Error("failed to close streamer", "error", err)
	}

	return nil
}

// Chunk represents the chunk mode indexer
type ChunkIndexer struct {
	options            Options
	chunker            chunker.Chunker
	stdoutStreamer     *streamer.Streamer
	byteConverter      *types.ByteConverter
	gitalyClient       gitaly.GitalyReader
	vectorStoreIndexer indexer.IndexingStrategy
}

func NewChunkIndexer(options Options, s *streamer.Streamer, chunker chunker.Chunker, byteConverter *types.ByteConverter, gitalyClient gitaly.GitalyReader, vectorStoreIndexer indexer.IndexingStrategy) *ChunkIndexer {
	return &ChunkIndexer{
		options:            options,
		stdoutStreamer:     s,
		chunker:            chunker,
		byteConverter:      byteConverter,
		gitalyClient:       gitalyClient,
		vectorStoreIndexer: vectorStoreIndexer,
	}
}

func (chunkIndexer *ChunkIndexer) PerformIndexing(ctx context.Context) error {
	err := chunkIndexer.gitalyClient.EachFileChangeBatched(ctx, chunkIndexer.BulkIndex, chunkIndexer.BulkDelete, int(chunkIndexer.options.GitalyBatchSize))
	if err != nil {
		return fmt.Errorf("chunkIndexer.gitalyClient.EachFileChangeBatched: %w", err)
	}

	if chunkIndexer.options.ForceReindex && gitaly.IsBlankSHA(chunkIndexer.options.FromSHA) {
		err = chunkIndexer.vectorStoreIndexer.ResolveReindexing(ctx, chunkIndexer.options.ProjectID)
		if err != nil {
			return fmt.Errorf("chunkIndexer.vectorStoreIndexer.ResolveReindexing: %w", err)
		}
	}

	return nil
}

func (chunkIndexer *ChunkIndexer) BulkDelete(ctx context.Context, paths []string) error {
	err := chunkIndexer.vectorStoreIndexer.DeletePaths(ctx, chunkIndexer.options.ProjectID, paths)
	if err != nil {
		return fmt.Errorf("chunkIndexer.vectorStoreIndexer.DeletePaths: %w", err)
	}

	return nil
}

func (chunkIndexer *ChunkIndexer) BulkIndex(ctx context.Context, gitFiles []gitaly.File) error {
	slog.Debug("Updated files", "count", len(gitFiles))

	// build files for chunking, filtering out binary files as we only want to chunk text content
	files := make([]types.File, 0)
	for _, file := range gitFiles {
		if binary.DetectBinary(file.Path, file.Content) {
			slog.Debug("Skipping binary file", "path", file.Path)
			continue
		}

		files = append(files, types.File{
			Path:    file.Path,
			Content: chunkIndexer.byteConverter.TryConvertBytesToString(file.Content),
			OID:     file.Oid,
		})
	}

	// chunk files
	chunks, err := chunkIndexer.chunker.ChunkFiles(ctx, files)
	if err != nil {
		return fmt.Errorf("failed to chunk files: %w", err)
	}
	slog.Debug("chunked files", "count", len(chunks))

	// Index chunks and get the IDs
	chunkIDs, err := chunkIndexer.vectorStoreIndexer.Index(ctx, chunkIndexer.options.ProjectID, chunks)
	if err != nil {
		return fmt.Errorf("failed to index chunks: %w", err)
	}

	// Stream indexed chunk IDs for this batch
	if len(chunkIDs) > 0 {
		records := make([]streamer.Record, 0, len(chunkIDs))
		for _, id := range chunkIDs {
			records = append(records, &streamer.IndexedChunkInfo{
				ID: id,
			})
		}

		if err := chunkIndexer.stdoutStreamer.Stream(records); err != nil {
			slog.Error("failed to stream indexed chunk info", "error", err)
		}
	}

	return nil
}
