package types

import (
	"fmt"

	logkit "gitlab.com/gitlab-org/labkit/log"

	"gitlab.com/gitlab-org/go/icu"
)

type ByteConverter struct {
	detector  *icu.CharsetDetector
	converter *icu.CharsetConverter
}

func NewByteConverter(limitFileSize int64) (*ByteConverter, error) {
	byteConverter := &ByteConverter{}
	detector, err := icu.NewCharsetDetector()
	if err != nil {
		return nil, fmt.Errorf("icu.NewCharsetDetector %w", err)
	}

	byteConverter.detector = detector
	byteConverter.converter = icu.NewCharsetConverter(int(limitFileSize))

	return byteConverter, nil
}

func (bc *ByteConverter) TryConvertBytesToString(b []byte) string {
	encoded, err := bc.convertBytesToString(b)
	if err != nil {
		logkit.WithError(err).Warn("Encode bytes failed")
		s := string(b)
		return s // TODO: Run it through the UTF-8 replacement encoder
	}

	return encoded
}

func (bc *ByteConverter) convertBytesToString(b []byte) (string, error) {
	if len(b) == 0 {
		return "", nil
	}

	matches, err := bc.detector.GuessCharset(b)
	if err != nil {
		return "", fmt.Errorf("couldn't guess charset: %w", err)
	}

	// Try encoding for each match, returning the first that succeeds
	for _, match := range matches {
		utf8, err := bc.converter.ConvertToUtf8(b, match.Charset)
		if err == nil {
			return string(utf8), nil
		}
	}

	// `detector.GuessCharset` may return err == nil && len(matches) == 0
	bestGuess := "unknown"
	if len(matches) > 0 {
		bestGuess = matches[0].Charset
	}

	return "", fmt.Errorf("failed to convert from %s to UTF-8", bestGuess)
}
