package size

import (
	"context"
	"fmt"
	"strings"

	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/chunker"
	"gitlab.com/gitlab-org/gitlab-elasticsearch-indexer/internal/mode/chunk/types"
)

// Chunker implements the chunker.Chunker interface using size-based chunking
type Chunker struct {
	options chunker.ChunkOptions
}

// New creates a new size-based chunker with the provided options
func New(options chunker.ChunkOptions) (chunker.Chunker, error) {
	chunkSize := options.ChunkSize
	if chunkSize <= 0 {
		chunkSize = chunker.DefaultChunkSize
	}

	// If chunk overlap is greater than chunk size, return an error
	if options.ChunkOverlap >= chunkSize {
		return nil, fmt.Errorf("chunk overlap (%d) must be less than chunk size (%d)", options.ChunkOverlap, chunkSize)
	}

	return &Chunker{
		options: chunker.ChunkOptions{
			ChunkSize:    chunkSize,
			ChunkOverlap: options.ChunkOverlap,
		},
	}, nil
}

// ChunkFiles implements the chunker.Chunker interface
func (c *Chunker) ChunkFiles(ctx context.Context, files []types.File) ([]chunker.Chunk, error) {
	if len(files) == 0 {
		return nil, nil
	}

	var chunks []chunker.Chunk

	for _, file := range files {
		select {
		case <-ctx.Done():
			// If the context is done (e.g., it was cancelled due to timeout)
			return nil, ctx.Err()
		default:
			fileChunks := c.chunkFile(file)
			chunks = append(chunks, fileChunks...)
		}
	}

	return chunks, nil
}

func (c *Chunker) chunkFile(file types.File) []chunker.Chunk {
	baseChunkInfo := chunker.Chunk{
		Path:     file.Path,
		Type:     chunker.FileContentType,
		Name:     file.GetFilename(),
		Language: "",
		OID:      file.OID,
	}
	return c.chunkContent(file.Content, baseChunkInfo)
}

func (c *Chunker) chunkContent(content string, baseChunkInfo chunker.Chunk) []chunker.Chunk {
	var chunks []chunker.Chunk

	chunkSize := int(c.options.ChunkSize)

	remainingContent := content
	startByte := 0
	for {
		contentLength := len(remainingContent)
		if contentLength == 0 {
			break
		}

		if contentLength <= chunkSize {
			chunks = append(chunks, c.buildChunk(baseChunkInfo, remainingContent, startByte))

			// we've reached the end of the content
			break
		}

		endIndex := chunkSize
		chunkText := remainingContent[0:endIndex]
		lastNewlineIndex := strings.LastIndex(chunkText, "\n")

		// if there is a new line, use the last new line as the actual end index
		// This way we ensure chunks break at natural line boundaries
		if lastNewlineIndex != -1 {
			endIndex = lastNewlineIndex + 1 // +1 to include the newline character in the current chunk
			chunkText = remainingContent[0:endIndex]
		}

		chunks = append(chunks, c.buildChunk(baseChunkInfo, chunkText, startByte))

		// Move the startByte, and set the remainingContent
		if c.options.ChunkOverlap > 0 {
			// If a ChunkOverlap is set, we make sure the start of the next chunk
			// overlaps with the end of this current chunk
			endIndexWithOverlap := endIndex - int(c.options.ChunkOverlap)
			startByte = startByte + endIndexWithOverlap
			remainingContent = remainingContent[endIndexWithOverlap:]
		} else {
			startByte = startByte + endIndex
			remainingContent = remainingContent[endIndex:]
		}
	}

	return chunks
}

func (c *Chunker) buildChunk(baseChunkInfo chunker.Chunk, chunkText string, startByte int) chunker.Chunk {
	return chunker.Chunk{
		Path:      baseChunkInfo.Path,
		Type:      baseChunkInfo.Type,
		Name:      baseChunkInfo.Name,
		Language:  baseChunkInfo.Language,
		Content:   chunkText,
		OID:       baseChunkInfo.OID,
		StartByte: startByte,
		Length:    len(chunkText),
	}
}
