package elastic

import (
	"context"
	"encoding/json"
	"fmt"
	"log"
	"sync"

	"net/http"
	"os"
	"strings"
	"time"

	logkit "gitlab.com/gitlab-org/labkit/log"

	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/credentials"
	"github.com/aws/aws-sdk-go/aws/defaults"
	v4 "github.com/aws/aws-sdk-go/aws/signer/v4"
	"github.com/deoxxa/aws_signing_client"
	"github.com/olivere/elastic/v7"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/advanced/indexer"
)

var (
	errTimeout = fmt.Errorf("timeout")
)

const (
	// bulkOperationOverhead is a conservative estimate of the additional bytes required for
	// Elasticsearch bulk API formatting (JSON structure, field names, separators) beyond the raw data.
	bulkOperationOverhead = 200
)

// Client wraps the Elasticsearch client with additional functionality for GitLab indexing.
// It includes proactive bulk size tracking to prevent requests from exceeding size limits,
// which is particularly important for AWS OpenSearch deployments with strict size constraints.
type Client struct {
	IndexNameDefault      string
	traversalIDs          string
	IndexNameCommits      string
	IndexNameWikis        string
	ProjectID             int64
	GroupID               int64
	hashedRootNamespaceId int16
	Permissions           *indexer.ProjectPermissions
	PermissionsWiki       *indexer.WikiPermissions
	maxBulkSize           int
	Client                *elastic.Client
	bulk                  *elastic.BulkProcessor
	bulkFailed            bool
	archived              string
	schemaVersionBlob     uint16
	schemaVersionCommit   uint16
	schemaVersionWiki     uint16
	currentBatchSize      int
	mu                    sync.Mutex
}

func BuildIndexName() string {
	railsEnv := os.Getenv("RAILS_ENV")
	var indexName = "gitlab"
	if railsEnv != "" {
		indexName = indexName + "-" + railsEnv
	}
	return indexName
}

// ConfigFromEnv creates a Config from the `ELASTIC_CONNECTION_INFO`
// environment variable
func ConfigFromEnv() (*Config, error) {
	data := strings.NewReader(os.Getenv("ELASTIC_CONNECTION_INFO"))

	config, err := ReadConfig(data)
	if err != nil {
		return nil, fmt.Errorf("couldn't parse ELASTIC_CONNECTION_INFO: %w", err)
	}

	if config.IndexNameCommits == "" {
		config.IndexNameCommits = BuildIndexName() + "-commits"
	}

	if config.IndexNameDefault == "" {
		config.IndexNameDefault = BuildIndexName()
	}

	return config, nil
}

func (c *Client) afterCallback(executionId int64, requests []elastic.BulkableRequest, response *elastic.BulkResponse, err error) {
	if err != nil {
		c.bulkFailed = true

		if elastic.IsStatusCode(err, http.StatusRequestEntityTooLarge) {
			logkit.WithFields(
				logkit.Fields{
					"bulkRequestId":      executionId,
					"maxBulkSizeSetting": c.maxBulkSize,
				},
			).WithError(err).Error("Consider lowering maximum bulk request size or/and increasing http.max_content_length")
		} else {
			logkit.WithFields(
				logkit.Fields{
					"bulkRequestId": executionId,
				},
			).WithError(err).Error("Bulk request failed")
		}
	}

	// bulk response can be nil in some cases, we must check first
	if response != nil && response.Errors {
		failedBulkResponseItems := response.Failed()
		numFailed := len(failedBulkResponseItems)
		if numFailed > 0 {
			c.bulkFailed = true
			total := numFailed + len(response.Succeeded())

			logkit.WithField("bulkRequestId", executionId).Errorf("Bulk request failed to insert %d/%d documents", numFailed, total)
		}
		for i, v := range failedBulkResponseItems {
			logkit.WithField("item", i).Errorf("failed with error %s", v.Error.Reason)
		}
	}
}

func NewClient(config *Config, correlationID string) (*Client, error) {
	var opts []elastic.ClientOptionFunc

	httpClient := &http.Client{}
	if config.RequestTimeout != 0 {
		httpClient.Timeout = time.Duration(config.RequestTimeout) * time.Second
	}
	// AWS settings have to come first or they override custom URL, etc
	if config.AWS {
		awsConfig := defaults.Config().WithRegion(config.Region)
		credentials := ResolveAWSCredentials(config, awsConfig)
		signer := v4.NewSigner(credentials)
		awsClient, err := aws_signing_client.New(signer, httpClient, "es", config.Region)
		if err != nil {
			return nil, err
		}

		opts = append(opts, elastic.SetHttpClient(awsClient))
	} else {
		if config.RequestTimeout != 0 {
			opts = append(opts, elastic.SetHttpClient(httpClient))
		}
	}

	// Sniffer should look for HTTPS URLs if at-least-one initial URL is HTTPS
	for _, url := range config.URL {
		if strings.HasPrefix(url, "https:") {
			opts = append(opts, elastic.SetScheme("https"))
			break
		}
	}

	headers := http.Header{}
	headers.Add("X-Opaque-Id", correlationID)
	opts = append(opts, elastic.SetHeaders(headers))

	opts = append(opts, elastic.SetURL(config.URL...), elastic.SetSniff(false))

	_, debug := os.LookupEnv("ELASTIC_DEBUG")
	if debug {
		opts = append(opts, elastic.SetTraceLog(log.New(os.Stderr, "ELASTIC-DEBUG:", log.LstdFlags)))
	}

	opts = append(opts, elastic.SetHealthcheck(false))

	client, err := elastic.NewClient(opts...)
	if err != nil {
		return nil, err
	}

	wrappedClient := &Client{
		IndexNameDefault:      config.IndexNameDefault,
		IndexNameCommits:      config.IndexNameCommits,
		IndexNameWikis:        config.IndexNameWikis,
		ProjectID:             config.ProjectID,
		GroupID:               config.GroupID,
		Permissions:           config.Permissions,
		PermissionsWiki:       config.PermissionsWiki,
		maxBulkSize:           config.MaxBulkSize,
		traversalIDs:          config.TraversalIDs,
		Client:                client,
		hashedRootNamespaceId: config.HashedRootNamespaceId,
		archived:              config.Archived,
		schemaVersionBlob:     config.SchemaVersionBlob,
		schemaVersionCommit:   config.SchemaVersionCommit,
		schemaVersionWiki:     config.SchemaVersionWiki,
	}

	bulk, err := client.BulkProcessor().
		Workers(config.BulkWorkers).
		BulkSize(config.MaxBulkSize).
		After(wrappedClient.afterCallback).
		Do(context.Background())

	if err != nil {
		return nil, err
	}

	wrappedClient.bulk = bulk

	return wrappedClient, nil
}

// ResolveAWSCredentials returns Credentials object
//
// Order of resolution
//  1. Static Credentials - As configured in Indexer config
//  2. Credentials from other providers
//     2a.  Credentials via env variables
//     2b.  Credentials via config files
//     2c.  ECS Role Credentials
//     2d.  EC2 Instance Role Credentials
func ResolveAWSCredentials(config *Config, awsConfig *aws.Config) *credentials.Credentials {
	providers := []credentials.Provider{
		&credentials.StaticProvider{
			Value: credentials.Value{
				AccessKeyID:     config.AccessKey,
				SecretAccessKey: config.SecretKey,
			},
		},
	}
	providers = append(providers, defaults.CredProviders(awsConfig, defaults.Handlers())...)
	return credentials.NewChainCredentials(providers)
}

func (c *Client) ParentID() int64 {
	return c.ProjectID
}

func (c *Client) ParentGroupID() int64 {
	return c.GroupID
}

func (c *Client) ProjectPermissions() *indexer.ProjectPermissions {
	return c.Permissions
}

func (c *Client) WikiPermissions() *indexer.WikiPermissions {
	return c.PermissionsWiki
}

// Flush forces the bulk processor to flush all pending operations and resets
// the batch size counter. This method is thread-safe and can be called concurrently.
func (c *Client) Flush() error {
	c.mu.Lock()
	defer c.mu.Unlock()

	return c.flushInternal()
}

func (c *Client) flushInternal() error {
	err := c.bulk.Flush()

	if err == nil && c.bulkFailed {
		err = fmt.Errorf("failed to perform all operations")
	}

	if err == nil {
		c.currentBatchSize = 0
	}

	return err
}

func (c *Client) flushIfNeeded(operationSize int, fields logkit.Fields) {
	if (c.currentBatchSize + operationSize) > c.maxBulkSize {
		logkit.WithFields(fields).Debug("Flushing bulk processor - would exceed max size")
		if err := c.flushInternal(); err != nil {
			logkit.WithError(err).Fatalln("Flushing error")
		}
	}
}

func (c *Client) Close() {
	c.Client.Stop()
}

func (c *Client) indexNameFor(documentType string) string {
	if documentType == "commit" && c.IndexNameCommits != "" {
		return c.IndexNameCommits
	} else if documentType == "wiki_blob" && c.IndexNameWikis != "" {
		return c.IndexNameWikis
	} else {
		return c.IndexNameDefault
	}
}

func (c *Client) routingFor(documentType string) string {
	if documentType == "wiki_blob" {
		return fmt.Sprintf("n_%v", strings.Split(c.TraversalIDs(), "-")[0]) // Using short string like n will help to keep the length of the url short
	} else {
		return fmt.Sprintf("project_%v", c.ProjectID)
	}
}

// calculateMetadataSize calculates the size of Elasticsearch metadata
func (c *Client) calculateMetadataSize(index, routing, docId string) int {
	return len(index) + len(routing) + len(docId) + bulkOperationOverhead
}

func (c *Client) calculateDocumentSize(documentType, id string, document interface{}) int {
	jsonBytes, err := json.Marshal(document)
	if err != nil {
		logkit.WithError(err).WithFields(logkit.Fields{
			"documentType": documentType,
			"id":           id,
		}).Error("Unexpected JSON marshaling failure for internal document structure")

		jsonBytes = []byte("{}")
	}

	indexName := c.indexNameFor(documentType)
	routing := c.routingFor(documentType)

	return len(jsonBytes) + c.calculateMetadataSize(indexName, routing, id)
}

// Index adds a document to the bulk processor with proactive size checking.
// This method implements size tracking to prevent bulk requests from exceeding
// the configured maxBulkSize limit, which helps avoid "Request Entity Too Large"
// errors particularly common with AWS OpenSearch deployments.
func (c *Client) Index(documentType, id string, thing interface{}) {
	c.mu.Lock()
	defer c.mu.Unlock()

	docSize := c.calculateDocumentSize(documentType, id, thing)

	c.flushIfNeeded(docSize, logkit.Fields{
		"currentBatchSize": c.currentBatchSize,
		"docSize":          docSize,
		"maxBulkSize":      c.maxBulkSize,
		"documentType":     documentType,
	})

	// Add to batch and track size
	c.currentBatchSize += docSize

	indexName := c.indexNameFor(documentType)
	routing := c.routingFor(documentType)
	req := elastic.NewBulkIndexRequest().
		Index(indexName).
		Routing(routing).
		Id(id).
		Doc(thing)

	c.bulk.Add(req)
}

// Get only used in tests
func (c *Client) Get(documentType, id string) (*elastic.GetResult, error) {
	routing := c.routingFor(documentType)
	return c.Client.Get().
		Index(c.indexNameFor(documentType)).
		Routing(routing).
		Id(id).
		Do(context.TODO())
}

func (c *Client) GetCommit(id string) (*elastic.GetResult, error) {
	return c.Get("commit", fmt.Sprintf("%v_%v", c.ProjectID, id))
}

func (c *Client) GetBlob(path string) (*elastic.GetResult, error) {
	return c.Get("blob", fmt.Sprintf("%v_%v", c.ProjectID, path))
}

func (c *Client) GetWikiBlob(path string) (*elastic.GetResult, error) {
	var id string
	if c.IsGroupDocument() {
		id = fmt.Sprintf("g_%v_%v", c.GroupID, path)
	} else {
		id = fmt.Sprintf("p_%v_%v", c.ProjectID, path)
	}

	return c.Get("wiki_blob", id)
}

// Remove adds a delete operation to the bulk processor with proactive size checking.
// Like Index, Delete operations are tracked for size to prevent bulk requests from
// exceeding the configured limit.
func (c *Client) Remove(documentType, id string) {
	c.mu.Lock()
	defer c.mu.Unlock()

	indexName := c.indexNameFor(documentType)
	routing := c.routingFor(documentType)
	deleteSize := c.calculateMetadataSize(indexName, routing, id)

	c.flushIfNeeded(deleteSize, logkit.Fields{
		"currentBatchSize": c.currentBatchSize,
		"deleteSize":       deleteSize,
		"maxBulkSize":      c.maxBulkSize,
		"documentType":     documentType,
	})

	c.currentBatchSize += deleteSize

	req := elastic.NewBulkDeleteRequest().
		Index(indexName).
		Routing(routing).
		Id(id)

	c.bulk.Add(req)
}

type DeleteParams struct {
	Index   string
	Routing string
	DocId   string
}

// Delete adds a delete operation to the bulk processor with flexible parameters
// and proactive size checking. This is similar to Remove but allows specifying
// custom index, routing, and document ID parameters.
func (c *Client) Delete(params *DeleteParams) {
	c.mu.Lock()
	defer c.mu.Unlock()

	deleteSize := c.calculateMetadataSize(params.Index, params.Routing, params.DocId)

	c.flushIfNeeded(deleteSize, logkit.Fields{
		"currentBatchSize": c.currentBatchSize,
		"docId":            params.DocId,
		"index":            params.Index,
		"deleteSize":       deleteSize,
		"maxBulkSize":      c.maxBulkSize})

	c.currentBatchSize += deleteSize

	req := elastic.NewBulkDeleteRequest().
		Index(params.Index).
		Routing(params.Routing).
		Id(params.DocId)

	c.bulk.Add(req)
}

func (c *Client) TraversalIDs() string {
	return c.traversalIDs
}

func (c *Client) Archived() string {
	return c.archived
}

func (c *Client) HashedRootNamespaceId() int16 {
	return c.hashedRootNamespaceId
}

func (c *Client) IsGroupDocument() bool {
	return !c.IsProjectDocument() && c.GroupID > 0
}

func (c *Client) IsProjectDocument() bool {
	return c.ProjectID > 0
}

func (c *Client) SchemaVersionBlob() uint16 {
	return c.schemaVersionBlob
}

func (c *Client) SchemaVersionCommit() uint16 {
	return c.schemaVersionCommit
}

func (c *Client) SchemaVersionWiki() uint16 {
	return c.schemaVersionWiki
}

// CurrentBatchSize returns the current batch size in bytes for testing purposes.
// This method is thread-safe and provides visibility into the internal size tracking.
func (c *Client) CurrentBatchSize() int {
	c.mu.Lock()
	defer c.mu.Unlock()
	return c.currentBatchSize
}
