package chunker

import (
	"slices"
	"testing"

	"github.com/stretchr/testify/require"
)

func TestSizeChunker(t *testing.T) {
	const SIZE_CHUNKER_TEST_CONTENT = `This is a test content for chunking.
We will split this content into multiple chunks of a certain size.
We will use the simple size chunker.

This is not meant for testing with a parser,
since the parser is specifically for code content.

The simple chunker should split this content given a chunk size.
If the split does not occur at a new line, the simple chunker
will try to split at the last new line index.
`

	const SIZE_CHUNKER_TEST_CHUNK1 = `This is a test content for chunking.
We will split this content into multiple chunks of a certain size.
We will use the simple size chunker.

This is not meant for testing with a parser,
`

	const SIZE_CHUNKER_TEST_CHUNK2 = `since the parser is specifically for code content.

The simple chunker should split this content given a chunk size.
If the split does not occur at a new line, the simple chunker
`

	const SIZE_CHUNKER_TEST_CHUNK3 = `will try to split at the last new line index.
`
	chunker, err := NewChunkerSize(200, 0)
	require.NoError(t, err)
	t.Cleanup(func() {
		chunker.Close()
	})

	chunker.AddFile("test-file.txt", SIZE_CHUNKER_TEST_CONTENT)

	err = chunker.ChunkFiles()
	require.NoError(t, err)

	result := slices.Collect(chunker.Chunks())

	require.Len(t, result, 3)

	chunk1, chunk2, chunk3 := result[0], result[1], result[2]

	require.Equal(t, SIZE_CHUNKER_TEST_CHUNK1, chunk1.Content)
	require.Equal(t, 0, chunk1.StartByte)
	require.Equal(t, chunk1.EndByte-chunk1.StartByte, chunk1.Length)
	require.Equal(t, len(chunk1.Content), chunk1.Length)

	require.Equal(t, SIZE_CHUNKER_TEST_CHUNK2, chunk2.Content)
	require.Equal(t, len(SIZE_CHUNKER_TEST_CHUNK1), chunk2.StartByte)
	require.Equal(t, chunk2.EndByte-chunk2.StartByte, chunk2.Length)
	require.Equal(t, len(chunk2.Content), chunk2.Length)

	require.Equal(t, SIZE_CHUNKER_TEST_CHUNK3, chunk3.Content)
	require.Equal(t,
		len(SIZE_CHUNKER_TEST_CHUNK1)+len(SIZE_CHUNKER_TEST_CHUNK2),
		chunk3.StartByte,
	)
	require.Equal(t, chunk3.EndByte-chunk3.StartByte, chunk3.Length)
	require.Equal(t, len(chunk3.Content), chunk3.Length)

	// Test line numbers
	require.Equal(t, 1, chunk1.StartLine)  // First chunk starts at line 1
	require.Equal(t, 6, chunk2.StartLine)  // Second chunk starts at line 6
	require.Equal(t, 10, chunk3.StartLine) // Third chunk starts at line 10

	chunker.Clear()

	result = slices.Collect(chunker.Chunks())
	require.Empty(t, result)

	chunker.AddFile("test-file.txt", SIZE_CHUNKER_TEST_CONTENT)

	err = chunker.ChunkFiles()
	require.NoError(t, err)

	result = slices.Collect(chunker.Chunks())
	require.Len(t, result, 3)
}

func TestSplitCodeChunkerForPython(t *testing.T) {
	const PYTHON_TEST_CONTENT = `from pydantic import BaseModel

__all__ = [
    "Token",
]


class Token(BaseModel):
    token: str
    expires_at: int

`

	chunker, err := NewChunkerSplitCode(200)
	require.NoError(t, err)
	t.Cleanup(func() {
		chunker.Close()
	})

	chunker.AddFile("typing.py", PYTHON_TEST_CONTENT)

	err = chunker.ChunkFiles()
	require.NoError(t, err)
	result := slices.Collect(chunker.Chunks())

	require.Len(t, result, 1)

	chunk1 := result[0]

	require.Equal(t, PYTHON_TEST_CONTENT, chunk1.Content)
	require.Equal(t, 0, chunk1.StartByte)
	require.Equal(t, chunk1.EndByte-chunk1.StartByte, chunk1.Length)
	require.Equal(t, len(chunk1.Content), chunk1.Length)
	require.Equal(t, "python", chunk1.Language)

	chunker.Clear()

	result = slices.Collect(chunker.Chunks())
	require.Empty(t, result)

	chunker.AddFile("typing.py", PYTHON_TEST_CONTENT)

	err = chunker.ChunkFiles()
	require.NoError(t, err)

	result = slices.Collect(chunker.Chunks())
	require.Len(t, result, 1)
}

func TestSplitCodeChunker_PreBert(t *testing.T) {
	const PYTHON_TEST_CONTENT = `from pydantic import BaseModel

__all__ = [
    "Token",
]


class Token(BaseModel):
    token: str
    expires_at: int

`

	chunker, err := NewChunkerSplitCodePreBert(200)
	require.NoError(t, err)
	t.Cleanup(chunker.Close)

	chunker.AddFile("typing.py", PYTHON_TEST_CONTENT)

	err = chunker.ChunkFiles()
	require.NoError(t, err)
	result := slices.Collect(chunker.Chunks())

	require.Len(t, result, 1)

	chunk1 := result[0]

	require.Equal(t, PYTHON_TEST_CONTENT, chunk1.Content)
	require.Equal(t, 0, chunk1.StartByte)
	require.Equal(t, chunk1.EndByte-chunk1.StartByte, chunk1.Length)
	require.Equal(t, len(chunk1.Content), chunk1.Length)
	require.Equal(t, "python", chunk1.Language)

	chunker.Clear()

	result = slices.Collect(chunker.Chunks())
	require.Empty(t, result)

	chunker.AddFile("typing.py", PYTHON_TEST_CONTENT)

	err = chunker.ChunkFiles()
	require.NoError(t, err)

	result = slices.Collect(chunker.Chunks())
	require.Len(t, result, 1)
}
