package acquisition

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"maps"
	"os"
	"time"
	"slices"
	"strconv"
	"strings"

	"github.com/cenkalti/backoff/v5"
	"github.com/expr-lang/expr"
	"github.com/expr-lang/expr/vm"
	"github.com/goccy/go-yaml"
	"github.com/google/uuid"
	"github.com/prometheus/client_golang/prometheus"
	log "github.com/sirupsen/logrus"
	tomb "gopkg.in/tomb.v2"

	"github.com/crowdsecurity/go-cs-lib/csstring"
	"github.com/crowdsecurity/go-cs-lib/csyaml"
	"github.com/crowdsecurity/go-cs-lib/trace"

	"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
	"github.com/crowdsecurity/crowdsec/pkg/acquisition/registry"
	"github.com/crowdsecurity/crowdsec/pkg/acquisition/types"
	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
	"github.com/crowdsecurity/crowdsec/pkg/cwhub"
	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
	"github.com/crowdsecurity/crowdsec/pkg/logging"
	"github.com/crowdsecurity/crowdsec/pkg/metrics"
	"github.com/crowdsecurity/crowdsec/pkg/pipeline"
)

type DataSourceUnavailableError struct {
	Name string
	Err  error
}

func (e *DataSourceUnavailableError) Error() string {
	return fmt.Sprintf("datasource '%s' is not available: %v", e.Name, e.Err)
}

func (e *DataSourceUnavailableError) Unwrap() error {
	return e.Err
}

var transformRuntimes  = map[string]*vm.Program{}

// DataSourceConfigure creates and returns a DataSource object from a configuration,
// if the configuration is not valid it returns an error.
// If the datasource can't be run (eg. journalctl not available), it still returns an error which
// can be checked for the appropriate action.
func DataSourceConfigure(
	ctx context.Context,
	commonConfig configuration.DataSourceCommonCfg,
	yamlConfig []byte,
	metricsLevel metrics.AcquisitionMetricsLevel,
	hub *cwhub.Hub,
) (types.DataSource, error) {
	factory, err := registry.LookupFactory(commonConfig.Source)
	if err != nil {
		return nil, err
	}

	dataSrc := factory()

	/* check eventual dependencies are satisfied (ie. journald will check journalctl availability) */
	if err := dataSrc.CanRun(); err != nil {
		return nil, &DataSourceUnavailableError{Name: commonConfig.Source, Err: err}
	}

	clog := logging.SubLogger(log.StandardLogger(), "acquisition."+commonConfig.Source, commonConfig.LogLevel)
	subLogger := clog.WithField("type", commonConfig.Source)

	if commonConfig.Name != "" {
		subLogger = subLogger.WithField("name", commonConfig.Name)
	}

	subLogger.Info("Configuring datasource")

	if hubAware, ok := dataSrc.(types.HubAware); ok {
		hubAware.SetHub(hub)
	}

	if lapiClientAware, ok := dataSrc.(types.LAPIClientAware); ok {
		cConfig := csconfig.GetConfig()
		if cConfig.API == nil {
			return nil, errors.New("crowdsec configuration not loaded while initializing appsec - this is a bug, plese report")
		}
		lapiClientAware.SetClientConfig(cConfig.API.Client)
	}

	/* configure the actual datasource */
	if err := dataSrc.Configure(ctx, yamlConfig, subLogger, metricsLevel); err != nil {
		return nil, err
	}

	return dataSrc, nil
}

func LoadAcquisitionFromDSN(
	ctx context.Context,
	dsn string,
	labels map[string]string,
	transformExpr string,
	hub *cwhub.Hub,
) (types.DataSource, error) {
	frags := strings.Split(dsn, ":")
	if len(frags) == 1 {
		return nil, fmt.Errorf("%s is not a valid dsn (no protocol)", dsn)
	}

	factory, err := registry.LookupFactory(frags[0])
	if err != nil {
		return nil, fmt.Errorf("no acquisition for protocol %s:// - %w", frags[0], err)
	}

	dataSrc := factory()
	uniqueID := uuid.NewString()

	if transformExpr != "" {
		vm, err := expr.Compile(transformExpr, exprhelpers.GetExprOptions(map[string]any{"evt": &pipeline.Event{}})...)
		if err != nil {
			return nil, fmt.Errorf("while compiling transform expression '%s': %w", transformExpr, err)
		}

		transformRuntimes[uniqueID] = vm
	}

	if hubAware, ok := dataSrc.(types.HubAware); ok {
		hubAware.SetHub(hub)
	}

	if lapiClientAware, ok := dataSrc.(types.LAPIClientAware); ok {
		cConfig := csconfig.GetConfig()
		lapiClientAware.SetClientConfig(cConfig.API.Client)
	}

	dsnConf, ok := dataSrc.(types.DSNConfigurer)
	if !ok {
		return nil, fmt.Errorf("%s datasource does not support command-line acquisition", frags[0])
	}

	subLogger := log.StandardLogger().WithField("type", labels["type"])

	if err = dsnConf.ConfigureByDSN(ctx, dsn, labels, subLogger, uniqueID); err != nil {
		return nil, fmt.Errorf("datasource for %q: %w", dsn, err)
	}

	return dataSrc, nil
}

func GetMetricsLevelFromPromCfg(prom *csconfig.PrometheusCfg) metrics.AcquisitionMetricsLevel {
	if prom == nil {
		return metrics.AcquisitionMetricsLevelFull
	}

	if !prom.Enabled {
		return metrics.AcquisitionMetricsLevelNone
	}

	if prom.Level == metrics.MetricsLevelNone {
		return metrics.AcquisitionMetricsLevelNone
	}

	if prom.Level == metrics.MetricsLevelAggregated {
		return metrics.AcquisitionMetricsLevelAggregated
	}

	if prom.Level == metrics.MetricsLevelFull {
		return metrics.AcquisitionMetricsLevelFull
	}

	return metrics.AcquisitionMetricsLevelFull
}

func detectType(r io.Reader) (string, error) {
	collectedKeys, err := csyaml.GetDocumentKeys(r)
	if err != nil {
		return "", err
	}

	if len(collectedKeys) == 0 {
		return "", nil
	}

	keys := collectedKeys[0]

	switch {
	case slices.Contains(keys, "source"):
		return "", nil
	case slices.Contains(keys, "filename"):
		return "file", nil
	case slices.Contains(keys, "filenames"):
		return "file", nil
	case slices.Contains(keys, "journalctl_filter"):
		return "journalctl", nil
	default:
		return "", nil
	}
}

type ParsedSourceConfig struct {
	Common    configuration.DataSourceCommonCfg
	Source    types.DataSource
	Transform *vm.Program
	SourceMissing bool      // the "source" field was missing, and detected
	SourceOverridden string // the "source" field was not missing, but didn't match the detected one
}

var ErrEmptyYAMLDocument = errors.New("empty yaml document")

// ParseSourceConfig validates and configures one YAML document.
//
// It does not expand env variables, they must already be expanded.
//
// - return sentinel error for empty/comment-only documents
// - backward-compat source auto-detection (filename/filenames/journalctl_filter)
// - validate common fields
// - delegate per-source config validation to the appropriate module
// - compile transform expression
func ParseSourceConfig(ctx context.Context, yamlDoc []byte, metricsLevel metrics.AcquisitionMetricsLevel, hub *cwhub.Hub) (*ParsedSourceConfig, error) {
	detectedType, err := detectType(bytes.NewReader(yamlDoc))
	if err != nil {
		return nil, err
	}

	// if there are not keys or only comments, the document will be skipped
	empty, err := csyaml.IsEmptyYAML(bytes.NewReader(yamlDoc))
	if err != nil {
		return nil, err
	}

	if empty {
		return nil, ErrEmptyYAMLDocument
	}

	var sub configuration.DataSourceCommonCfg

	// can't be strict here, the doc contains specific datasource config too but we won't collect them now.
	if err = yaml.UnmarshalWithOptions(yamlDoc, &sub); err != nil {
		return nil, fmt.Errorf("failed to parse: %w", errors.New(yaml.FormatError(err, false, false)))
	}

	parsed := &ParsedSourceConfig{}

	// report that the user did not specify a source
	if sub.Source == "" {
		parsed.SourceMissing = true
	}

	// report that the user specified a source that doesn't match with one detected from the presence of other fields
	if detectedType != "" {
		if sub.Source != "" && sub.Source != detectedType {
			parsed.SourceOverridden = sub.Source
		}

		sub.Source = detectedType
	}

	parsed.Common = sub

	// could not detect, alas
	if sub.Source == "" {
		return nil, errors.New("missing 'source' field")
	}

	// pre-check that the source is valid
	_, err = registry.LookupFactory(sub.Source)
	if err != nil {
		return nil, err
	}

	// check for labels now, an error for missing labels has lower priority
	// than missing or unknown source type
	if len(sub.Labels) == 0 && sub.Source != "docker" {
		// docker is the only source that does not require labels
		return nil, errors.New("missing labels")
	}

	uniqueID := uuid.NewString()
	sub.UniqueId = uniqueID

	src, err := DataSourceConfigure(ctx, sub, yamlDoc, metricsLevel, hub)
	if err != nil {
		return nil, fmt.Errorf("datasource of type %s: %w", sub.Source, err)
	}
	parsed.Source = src

	if sub.TransformExpr != "" {
		vm, err := expr.Compile(sub.TransformExpr, exprhelpers.GetExprOptions(map[string]any{"evt": &pipeline.Event{}})...)
		if err != nil {
			return nil, fmt.Errorf("while compiling transform expression '%s' for datasource %s: %w", sub.TransformExpr, sub.Source, err)
		}
 
		parsed.Transform = vm
	}

	return parsed, nil
}

func formatConfigLocation(acquisFile string, withPos bool, idx int) string {
	ret := acquisFile

	if withPos {
		ret += " (position " + strconv.Itoa(idx) + ")"
	}

	return ret
}

// sourcesFromFile reads and parses one acquisition file into DataSources.
func sourcesFromFile(
	ctx context.Context,
	acquisFile string,
	metricsLevel metrics.AcquisitionMetricsLevel,
	hub *cwhub.Hub,
) ([]types.DataSource, error) {
	var sources []types.DataSource

	log.Infof("loading acquisition file : %s", acquisFile)

	yamlFile, err := os.Open(acquisFile)
	if err != nil {
		return nil, err
	}

	defer yamlFile.Close()

	acquisContent, err := io.ReadAll(yamlFile)
	if err != nil {
		return nil, fmt.Errorf("failed to read %s: %w", acquisFile, err)
	}

	expandedAcquis := csstring.StrictExpand(string(acquisContent), os.LookupEnv)

	documents, err := csyaml.SplitDocuments(strings.NewReader(expandedAcquis))
	if err != nil {
		return nil, err
	}

	idx := -1

	for _, yamlDoc := range documents {
		idx += 1

		loc := formatConfigLocation(acquisFile, len(documents) > 1, idx)

		parsed, err := ParseSourceConfig(ctx, yamlDoc, metricsLevel, hub)

		// report data source detection, it can be required to understand an error
		if parsed != nil {
			if parsed.SourceMissing {
				log.Debugf("%s: datasource type missing, detected 'source=%s'", loc, parsed.Common.Source)
			}

			if parsed.SourceOverridden != "" {
				log.Warnf("%s: datasource type mismatch: found '%s' but should probably be '%s'", loc, parsed.SourceOverridden, parsed.Common.Source)
			}
		}

		if err != nil {
			if errors.Is(err, ErrEmptyYAMLDocument) {
				continue
			}

			var dserr *DataSourceUnavailableError
			if errors.As(err, &dserr) {
				log.Error(fmt.Errorf("%s: %w", loc, err))
				continue
			}

			return nil, fmt.Errorf("%s: %w", loc, err)
		}

		if parsed.Transform != nil {
			transformRuntimes[parsed.Common.UniqueId] = parsed.Transform
		}

		sources = append(sources, parsed.Source)
	}

	return sources, nil
}

// LoadAcquisitionFromFiles unmarshals the configuration item and checks its availability
func LoadAcquisitionFromFiles(
	ctx context.Context,
	config *csconfig.CrowdsecServiceCfg,
	prom *csconfig.PrometheusCfg,
	hub *cwhub.Hub,
) ([]types.DataSource, error) {
	var allSources []types.DataSource

	metricsLevel := GetMetricsLevelFromPromCfg(prom)

	for _, acquisFile := range config.AcquisitionFiles {
		sources, err := sourcesFromFile(ctx, acquisFile, metricsLevel, hub)
		if err != nil {
			return nil, err
		}

		allSources = append(allSources, sources...)
	}

	return allSources, nil
}

func GetMetrics(sources []types.DataSource, aggregated bool) error {
	for i := range sources {
		mp, ok := sources[i].(types.MetricsProvider)
		if !ok {
			// the source does not expose metrics
			continue
		}

		var metrics []prometheus.Collector

		if aggregated {
			metrics = mp.GetMetrics()
		} else {
			metrics = mp.GetAggregMetrics()
		}

		for _, metric := range metrics {
			if err := prometheus.Register(metric); err != nil {
				var alreadyRegisteredErr prometheus.AlreadyRegisteredError
				if !errors.As(err, &alreadyRegisteredErr) {
					return fmt.Errorf("could not register metrics for datasource %s: %w", sources[i].GetName(), err)
				}
				// ignore the error
			}
		}
	}

	return nil
}

// There's no need for an actual deep copy
// The event is almost empty, we are mostly interested in allocating new maps for Parsed/Meta/...
func copyEvent(evt pipeline.Event, line string) pipeline.Event {
	evtCopy := pipeline.MakeEvent(evt.ExpectMode == pipeline.TIMEMACHINE, evt.Type, evt.Process)
	evtCopy.Line = evt.Line
	evtCopy.Line.Raw = line
	evtCopy.Line.Labels = make(map[string]string)

	maps.Copy(evtCopy.Line.Labels, evt.Line.Labels)

	return evtCopy
}

func transform(
	transformChan chan pipeline.Event,
	output chan pipeline.Event,
	acquisTomb *tomb.Tomb,
	transformRuntime *vm.Program,
	logger *log.Entry,
) {
	defer trace.CatchPanic("crowdsec/acquis")

	logger.Info("transformer started")

	for {
		select {
		case <-acquisTomb.Dying():
			logger.Debugf("transformer is dying")
			return
		case evt := <-transformChan:
			logger.Tracef("Received event %s", evt.Line.Raw)

			out, err := expr.Run(transformRuntime, map[string]any{"evt": &evt})
			if err != nil {
				logger.Errorf("while running transform expression: %s, sending event as-is", err)
				output <- evt
				continue
			}

			if out == nil {
				logger.Errorf("transform expression returned nil, sending event as-is")
				output <- evt
				continue
			}

			switch v := out.(type) {
			case string:
				logger.Tracef("transform expression returned %s", v)
				output <- copyEvent(evt, v)
			case []any:
				logger.Tracef("transform expression returned %v", v) // We actually want to log the slice content

				for _, line := range v {
					l, ok := line.(string)
					if !ok {
						logger.Errorf("transform expression returned []interface{}, but cannot assert an element to string")
						output <- evt

						continue
					}

					output <- copyEvent(evt, l)
				}
			case []string:
				logger.Tracef("transform expression returned %v", v)

				for _, line := range v {
					output <- copyEvent(evt, line)
				}
			default:
				logger.Errorf("transform expression returned an invalid type %T, sending event as-is", out)
				output <- evt
			}
		}
	}
}

func runBatchFetcher(ctx context.Context, bf types.BatchFetcher, output chan pipeline.Event, acquisTomb *tomb.Tomb) error {
	// wrap tomb logic with context
	ctx, cancel := context.WithCancel(ctx)
	go func() {
		<-acquisTomb.Dying()
		cancel()
	}()

	return bf.OneShot(ctx, output)
}

func runRestartableStream(
	ctx context.Context,
	rs types.RestartableStreamer,
	name string,
	output chan pipeline.Event,
	acquisTomb *tomb.Tomb,
) error {
	// wrap tomb logic with context
	ctx, cancel := context.WithCancel(ctx)
	go func() {
		<-acquisTomb.Dying()
		cancel()
	}()

	acquisTomb.Go(func() error {
		// TODO: check timing and exponential?
		bo := backoff.NewConstantBackOff(10 * time.Second)
		bo.Reset() // TODO: reset according to run time

		for {
			select {
			case <-ctx.Done():
				return nil
			default:
			}

			if err := rs.Stream(ctx, output); err != nil {
				log.Errorf("datasource %q: stream error: %v (retrying)", name, err)
			}

			select {
			case <-ctx.Done():
				return nil
			default:
			}

			d := bo.NextBackOff()
			log.Infof("datasource %q: restarting stream in %s", name, d)

			select {
			case <-ctx.Done():
				return nil
			case <-time.After(d):
			}
		}
	})

	return nil
}


func acquireSource(
	ctx context.Context,
	source types.DataSource,
	name string,
	output chan pipeline.Event,
	acquisTomb *tomb.Tomb,
) error {
	if source.GetMode() == configuration.CAT_MODE {
		if s, ok := source.(types.BatchFetcher); ok {
			// s.Logger.Info("Start OneShot")
			return runBatchFetcher(ctx, s, output, acquisTomb)
		}

		if s, ok := source.(types.Fetcher); ok {
			// s.Logger.Info("Start OneShotAcquisition")
			return s.OneShotAcquisition(ctx, output, acquisTomb)
		}

		return fmt.Errorf("%s: cat mode is set but OneShotAcquisition is not supported", source.GetName())
	}

	if s, ok := source.(types.Tailer); ok {
		// s.Logger.Info("Streaming Acquisition")
		return s.StreamingAcquisition(ctx, output, acquisTomb)
	}

	if s, ok := source.(types.RestartableStreamer); ok {
		return runRestartableStream(ctx, s, name, output, acquisTomb)
	}

	return fmt.Errorf("%s: tail mode is set but the datasource does not support streaming acquisition", source.GetName())
}

func StartAcquisition(
	ctx context.Context,
	sources []types.DataSource,
	output chan pipeline.Event,
	acquisTomb *tomb.Tomb,
) error {
	// Don't wait if we have no sources, as it will hang forever
	if len(sources) == 0 {
		return nil
	}

	for i := range sources {
		subsrc := sources[i] // ensure it's a copy
		log.Debugf("starting one source %d/%d ->> %T", i, len(sources), subsrc)

		acquisTomb.Go(func() error {
			defer trace.CatchPanic("crowdsec/acquis")

			outChan := output

			log.Debugf("datasource %s UUID: %s", subsrc.GetName(), subsrc.GetUuid())

			if transformRuntime, ok := transformRuntimes[subsrc.GetUuid()]; ok {
				log.Infof("transform expression found for datasource %s", subsrc.GetName())

				transformChan := make(chan pipeline.Event)
				outChan = transformChan
				transformLogger := log.WithFields(log.Fields{
					"component":  "transform",
					"datasource": subsrc.GetName(),
				})

				acquisTomb.Go(func() error {
					transform(outChan, output, acquisTomb, transformRuntime, transformLogger)
					return nil
				})
			}

			if err := acquireSource(ctx, subsrc, subsrc.GetName(), output, acquisTomb); err != nil {
				// if one of the acquisitions returns an error, we kill the others to properly shutdown
				acquisTomb.Kill(err)
			}

			return nil
		})
	}

	// return only when acquisition is over (cat) or never (tail)
	err := acquisTomb.Wait()

	return err
}
