package elasticsearch

import (
	"context"
	"fmt"
	"log/slog"

	"github.com/olivere/elastic/v7"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/chunker"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/indexer"
)

const (
	DefaultBulkSize = 1000
)

// Client interface abstracts Elasticsearch and OpenSearch clients
type Client interface {
	GetClient() *elastic.Client
}

// OrphanData holds the data needed for orphan cleanup
type OrphanData struct {
	ChunkIDs []string
	Paths    []string
}

type Indexer struct {
	client     *elastic.Client
	indexName  string
	bulkSize   int
	bulk       *elastic.BulkService
	reindexing bool
}

func New(client Client, indexName string, bulkSize int, reindexing bool) (*Indexer, error) {
	if bulkSize <= 0 {
		bulkSize = DefaultBulkSize
	}

	elasticClient := client.GetClient()
	if elasticClient == nil {
		return nil, fmt.Errorf("client not connected")
	}

	return &Indexer{
		client:     elasticClient,
		indexName:  indexName,
		bulkSize:   bulkSize,
		bulk:       elasticClient.Bulk().Index(indexName),
		reindexing: reindexing,
	}, nil
}

// Index handles upserting chunks for modified/new files
// If in reindexing mode (i.reindexing=true), the documents' reindexing fields are set to `true`
// Returns the IDs of successfully indexed chunks
func (i *Indexer) Index(ctx context.Context, projectID uint64, chunks []chunker.Chunk) ([]string, error) {
	slog.Debug("indexing chunks", "count", len(chunks), "projectID", projectID)

	// Collect all indexed chunk IDs
	var allIndexedChunkIDs []string

	// Collect data for orphan cleanup
	orphanData := OrphanData{
		ChunkIDs: make([]string, 0, len(chunks)),
		Paths:    make([]string, 0, len(chunks)),
	}

	// Index all new chunks
	for _, chunk := range chunks {
		doc := indexer.BuildChunkDocument(chunk, projectID, i.reindexing)

		// Note: Paths may contain duplicates, but the terms query handles this efficiently
		orphanData.Paths = append(orphanData.Paths, chunk.Path)
		orphanData.ChunkIDs = append(orphanData.ChunkIDs, doc.ID)

		req := elastic.NewBulkUpdateRequest().
			Id(doc.ID).
			Doc(doc).
			DocAsUpsert(true).
			DetectNoop(true).
			Routing(fmt.Sprintf("%d", projectID))

		i.bulk.Add(req)

		if i.bulk.NumberOfActions() >= i.bulkSize {
			ids, err := i.Flush(ctx)
			if err != nil {
				return nil, fmt.Errorf("failed to flush bulk operations: %w", err)
			}
			allIndexedChunkIDs = append(allIndexedChunkIDs, ids...)
		}
	}

	// Flush remaining chunks
	ids, err := i.Flush(ctx)
	if err != nil {
		return nil, fmt.Errorf("failed to flush chunks: %w", err)
	}
	allIndexedChunkIDs = append(allIndexedChunkIDs, ids...)

	// Delete orphaned chunks
	// For new files, this won't find anything to delete
	// For modified files, this will delete chunks that no longer exist
	if err := i.deleteOrphanedChunks(ctx, projectID, orphanData); err != nil {
		return nil, fmt.Errorf("failed to delete orphaned chunks: %w", err)
	}

	return allIndexedChunkIDs, nil
}

// deleteOrphanedChunks deletes chunks that are no longer associated with the current state of the repository
// This deletes chunks that no longer exist in still-existing files
func (i *Indexer) deleteOrphanedChunks(ctx context.Context, projectID uint64, data OrphanData) error {
	if len(data.ChunkIDs) == 0 {
		return nil
	}

	// Convert paths to interface{} for the terms query
	pathInterfaces := make([]interface{}, 0, len(data.Paths))
	for _, path := range data.Paths {
		pathInterfaces = append(pathInterfaces, path)
	}

	// Find orphaned chunks. We want: project = X, path IN (paths) AND id NOT IN (current IDs)
	query := elastic.NewBoolQuery().
		Filter(
			elastic.NewTermQuery("project_id", projectID),
			elastic.NewTermsQuery("path", pathInterfaces...),
		).
		MustNot(
			elastic.NewIdsQuery().Ids(data.ChunkIDs...),
		)

	result, err := i.client.DeleteByQuery().
		Index(i.indexName).
		Query(query).
		Do(ctx)

	if err != nil {
		return fmt.Errorf("failed to delete orphaned chunks: %w", err)
	}

	slog.Debug("deleted orphaned chunks", "deleted", result.Deleted)
	return nil
}

// DeletePaths handles complete file deletions
func (i *Indexer) DeletePaths(ctx context.Context, projectID uint64, paths []string) error {
	if len(paths) == 0 {
		return nil
	}

	// Convert []string to []interface{} for the terms query
	pathInterfaces := make([]interface{}, 0, len(paths))
	for _, path := range paths {
		pathInterfaces = append(pathInterfaces, path)
	}

	query := elastic.NewBoolQuery().
		Must(
			elastic.NewTermQuery("project_id", projectID),
			elastic.NewTermsQuery("path", pathInterfaces...),
		)

	result, err := i.client.DeleteByQuery().
		Index(i.indexName).
		Query(query).
		Do(ctx)

	if err != nil {
		return fmt.Errorf("failed to delete paths: %w", err)
	}

	slog.Debug("deleted paths", "paths", paths, "deleted", result.Deleted)
	return nil
}

// Delete deletes all chunks for a given project
func (i *Indexer) Delete(ctx context.Context, projectID uint64) error {
	query := elastic.NewTermQuery("project_id", projectID)

	result, err := i.client.DeleteByQuery().
		Index(i.indexName).
		Query(query).
		Routing(fmt.Sprintf("%d", projectID)).
		Do(ctx)

	if err != nil {
		return fmt.Errorf("failed to delete project: %w", err)
	}

	slog.Info("deleted project", "project_id", projectID, "deleted", result.Deleted)
	return nil
}

// ResolveReindexing performs remaining tasks done when in reindexing mode (i.reindexing=true)
// During reindexing, we set all documents with found file paths as doc.reindexing=true to indicate it is being reindexed
// In this function, we delete the documents with file paths that were not found during reindexing (doc.reindexing=false)
// After the deletion, we ensure all remaining documents in the project have doc.reindexing=false to set them back to "normal" mode
func (i *Indexer) ResolveReindexing(ctx context.Context, projectID uint64) error {
	// Do not proceed if not in reindexing mode
	if !i.reindexing {
		return nil
	}

	// Refresh the index to make sure we have the latest values

	slog.Debug("resolve_reindexing refreshing index before delete")

	_, err := i.client.Refresh(i.indexName).Do(ctx)
	if err != nil {
		return fmt.Errorf("failed to refresh index: %w", err)
	}

	// Delete files that were not reindexed

	slog.Debug("resolve_reindexing purging files not in reindex")

	deleteQuery := elastic.NewBoolQuery().
		Must(
			elastic.NewTermQuery("project_id", projectID),
			elastic.NewTermsQuery("reindexing", false),
		)

	deleteResponse, err := i.client.DeleteByQuery().
		Index(i.indexName).
		Query(deleteQuery).
		Routing(fmt.Sprintf("%d", projectID)).
		WaitForCompletion(true).
		Refresh("true").
		Timeout("5m").
		Conflicts("proceed").
		Do(ctx)

	if err != nil {
		return fmt.Errorf("failed to purge files: %w", err)
	}

	slog.Info(
		"resolve_reindexing deleted documents",
		"batches", deleteResponse.Batches,
		"total", deleteResponse.Total,
		"updated", deleteResponse.Updated,
		"created", deleteResponse.Created,
		"deleted", deleteResponse.Deleted,
		"noops", deleteResponse.Noops,
		"took", deleteResponse.Took,
		"timed_out", deleteResponse.TimedOut,
	)

	// Set `reindexing` back to false for all files with the given `projectId`

	slog.Debug("resolve_reindexing set all documents back to reindexing=false")

	updateQuery := elastic.NewTermQuery("project_id", projectID)
	updateScript := elastic.NewScript("ctx._source.reindexing = false;").Lang("painless")

	updateResponse, err := i.client.UpdateByQuery().
		Index(i.indexName).
		Query(updateQuery).
		Routing(fmt.Sprintf("%d", projectID)).
		Script(updateScript).
		WaitForCompletion(true).
		Timeout("5m").
		Do(ctx)

	if err != nil {
		return fmt.Errorf("failed to update reindexing to false: %w", err)
	}

	slog.Info(
		"resolve_reindexing updated documents",
		"batches", updateResponse.Batches,
		"total", updateResponse.Total,
		"updated", updateResponse.Updated,
		"created", updateResponse.Created,
		"deleted", updateResponse.Deleted,
		"noops", updateResponse.Noops,
		"took", updateResponse.Took,
		"timed_out", updateResponse.TimedOut,
	)

	return nil
}

// Flush executes bulk operations and returns the IDs of successfully indexed chunks
func (i *Indexer) Flush(ctx context.Context) ([]string, error) {
	if i.bulk.NumberOfActions() == 0 {
		return nil, nil
	}

	slog.Debug("flushing bulk operations", "count", i.bulk.NumberOfActions(), "index", i.indexName)

	resp, err := i.bulk.Do(ctx)
	if err != nil {
		return nil, fmt.Errorf("failed to execute bulk operation: %w", err)
	}

	// Collect successful chunk IDs
	successfulIDs := make([]string, 0, len(resp.Succeeded()))
	for _, item := range resp.Succeeded() {
		successfulIDs = append(successfulIDs, item.Id)
	}

	// Manage failures. TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/548766
	if resp.Errors {
		slog.Error("bulk operation completed with errors", "failed", len(resp.Failed()))
	}

	// Reset bulk for next batch
	i.bulk = i.client.Bulk().Index(i.indexName)

	return successfulIDs, nil
}

func (i *Indexer) Close(ctx context.Context) error {
	_, err := i.Flush(ctx)
	if err != nil {
		return fmt.Errorf("failed to flush on close: %w", err)
	}

	return nil
}
