use crate::types::Sizer;
use crate::{Chunk, ChunkError, File, FileChunker, byte_to_line_number, sizers::ByteSizer};
use ast_grep_core::{Node, tree_sitter::StrDoc};
use ast_grep_language::{Language, LanguageExt, SupportLang};
use std::path::Path;

#[derive(Debug, Clone)]
struct SplitPoint {
    position: usize,
}

/// `SplitCodeChunker` analyzes source code to identify natural break points such as
/// function boundaries, class definitions, and other syntactic structures. This approach
/// ensures that chunks maintain semantic coherence and don't break in the middle of
/// logical code units.
pub struct SplitCodeChunker {
    sizer: Box<dyn Sizer>,
    fallback: Option<Box<dyn FileChunker>>,
}

// Create constants for each ast_grep_language::SupportLang variant
// We use the const here as a fallback because parser_core::SupportedLanguage
// does not support all ast_grep_language::SupportLang yet
const LANG_BASH: &str = "bash";
const LANG_C: &str = "c";
const LANG_CPP: &str = "cpp";
const LANG_CSHARP: &str = "csharp";
const LANG_CSS: &str = "css";
const LANG_GO: &str = "go";
const LANG_ELIXIR: &str = "elixir";
const LANG_HASKELL: &str = "haskell";
const LANG_HTML: &str = "html";
const LANG_JAVA: &str = "java";
const LANG_JAVASCRIPT: &str = "javascript";
const LANG_JSON: &str = "json";
const LANG_KOTLIN: &str = "kotlin";
const LANG_LUA: &str = "lua";
const LANG_NIX: &str = "nix";
const LANG_PHP: &str = "php";
const LANG_PYTHON: &str = "python";
const LANG_RUBY: &str = "ruby";
const LANG_RUST: &str = "rust";
const LANG_SCALA: &str = "scala";
const LANG_SOLIDITY: &str = "solidity";
const LANG_SWIFT: &str = "swift";
const LANG_TSX: &str = "tsx";
const LANG_TYPESCRIPT: &str = "typescript";
const LANG_YAML: &str = "yaml";
const LANG_HCL: &str = "hcl";

// Helper function to get the const string for a ast_grep_language::SupportLang
const fn support_lang_to_str(lang: SupportLang) -> &'static str {
    match lang {
        SupportLang::Bash => LANG_BASH,
        SupportLang::C => LANG_C,
        SupportLang::Cpp => LANG_CPP,
        SupportLang::CSharp => LANG_CSHARP,
        SupportLang::Css => LANG_CSS,
        SupportLang::Go => LANG_GO,
        SupportLang::Elixir => LANG_ELIXIR,
        SupportLang::Haskell => LANG_HASKELL,
        SupportLang::Html => LANG_HTML,
        SupportLang::Java => LANG_JAVA,
        SupportLang::JavaScript => LANG_JAVASCRIPT,
        SupportLang::Json => LANG_JSON,
        SupportLang::Kotlin => LANG_KOTLIN,
        SupportLang::Lua => LANG_LUA,
        SupportLang::Nix => LANG_NIX,
        SupportLang::Php => LANG_PHP,
        SupportLang::Python => LANG_PYTHON,
        SupportLang::Ruby => LANG_RUBY,
        SupportLang::Rust => LANG_RUST,
        SupportLang::Scala => LANG_SCALA,
        SupportLang::Solidity => LANG_SOLIDITY,
        SupportLang::Swift => LANG_SWIFT,
        SupportLang::Tsx => LANG_TSX,
        SupportLang::TypeScript => LANG_TYPESCRIPT,
        SupportLang::Yaml => LANG_YAML,
        SupportLang::Hcl => LANG_HCL,
    }
}

impl SplitCodeChunker {
    pub fn new(max_chunk_size: usize) -> Self {
        Self::new_sizer(Box::new(ByteSizer::new(max_chunk_size)))
    }

    pub fn new_sizer(sizer: Box<dyn Sizer>) -> Self {
        Self {
            sizer,
            fallback: None,
        }
    }

    pub fn with_fallback(mut self, fallback: Box<dyn FileChunker>) -> Self {
        self.fallback = Some(fallback);
        self
    }

    fn chunk_ast_grep<'a>(
        &self,
        target: &mut Vec<Chunk<'a>>,
        root: &Node<StrDoc<SupportLang>>,
        file: &File<'a>,
    ) -> Result<(), ChunkError> {
        let total_length = file.source_code.len();
        let split_points = self.find_split_points_ast_grep(root);

        if split_points.is_empty() {
            return self.chunk_files_fallback(target, file, 0, total_length, *root.lang());
        }

        let mut start_split_idx = 0;
        let mut start_split = SplitPoint { position: 0 };

        while start_split.position < total_length {
            let (end_byte, more) = self
                .sizer
                .find_end_byte(file.source_code, start_split.position)?;

            if !more {
                let last_chunk = Chunk {
                    file_path: file.file_path,
                    start_byte: start_split.position,
                    end_byte: file.source_code.len(),
                    start_line: byte_to_line_number(file.source_code, start_split.position),
                    content: &file.source_code[start_split.position..file.source_code.len()],
                    language: support_lang_to_str(*root.lang()),
                };

                target.push(last_chunk);
                break;
            }

            let mut end_split_idx = split_points
                .iter()
                .rposition(|split_point| split_point.position <= end_byte)
                .unwrap_or(0);
            let mut end_split = &split_points[end_split_idx];

            if start_split.position >= end_split.position {
                end_split_idx = start_split_idx + 1;
                if end_split_idx >= split_points.len() {
                    // There aren't any splits left.
                    return self.chunk_files_fallback(
                        target,
                        file,
                        start_split.position,
                        total_length,
                        *root.lang(),
                    );
                }
                end_split = &split_points[end_split_idx];

                self.chunk_files_fallback(
                    target,
                    file,
                    start_split.position,
                    end_split.position,
                    *root.lang(),
                )?;

                start_split = end_split.clone();
                start_split_idx = end_split_idx;
                continue;
            }

            let chunk = Chunk {
                file_path: file.file_path,
                start_byte: start_split.position,
                end_byte: end_split.position,
                start_line: byte_to_line_number(file.source_code, start_split.position),
                content: &file.source_code[start_split.position..end_split.position],
                language: support_lang_to_str(*root.lang()),
            };
            target.push(chunk);

            start_split = end_split.clone();
            start_split_idx = end_split_idx;
        }

        Ok(())
    }

    // Try to use the fallback chunker to split the file between two byte offsets.
    //
    // Assumes that fallback will only append to target.
    fn chunk_files_fallback<'a>(
        &self,
        target: &mut Vec<Chunk<'a>>,
        file: &File<'a>,
        start_byte: usize,
        end_byte: usize,
        language: SupportLang,
    ) -> Result<(), ChunkError> {
        let fallback = self
            .fallback
            .as_ref()
            .ok_or(ChunkError::NoSuitableSplitPoints)?;

        let n = target.len();
        fallback.chunk_file(
            target,
            &File {
                source_code: &file.source_code[start_byte..end_byte],
                file_path: file.file_path,
            },
        )?;

        for c in target.iter_mut().skip(n) {
            c.start_byte += start_byte;
            c.end_byte += start_byte;
            c.start_line = byte_to_line_number(file.source_code, c.start_byte);
            c.language = support_lang_to_str(language);
        }

        Ok(())
    }

    fn find_split_points_ast_grep(&self, root: &Node<StrDoc<SupportLang>>) -> Vec<SplitPoint> {
        let mut split_points = Vec::new();

        for node in root.dfs() {
            let node_kind = node.kind();
            let kind_str = &*node_kind;

            match kind_str {
                // Top-level declarations
                "function_declaration"
                | "method_declaration"
                | "class_declaration"
                | "module_declaration"
                | "struct_type_declaration"
                | "interface_declaration"
                | "function_item"
                | "struct_item"
                | "impl_item"
                | "trait_item"
                | "enum_item"
                | "mod_item"
                | "type_item"
                | "const_item"
                | "static_item"
                | "function_definition"
                | "class_definition"
                | "method_definition"
                | "function"
                | "class"
                | "method"
                | "interface"
                | "namespace"
                | "module" => {
                    split_points.push(SplitPoint {
                        position: node.range().start,
                    });
                    split_points.push(SplitPoint {
                        position: node.range().end,
                    });
                }

                // Import/require statements
                "import_declaration" | "import_statement" | "require_statement"
                | "use_declaration" | "extern_crate_item" | "import" | "include" => {
                    // Good to split after imports section
                    split_points.push(SplitPoint {
                        position: node.range().end,
                    });
                }

                // Variable and constant declarations
                "var_declaration"
                | "const_declaration"
                | "variable_declaration"
                | "let_declaration"
                | "variable_declarator" => {
                    split_points.push(SplitPoint {
                        position: node.range().end,
                    });
                }

                // Control flow statements
                "if_statement" | "for_statement" | "while_statement" | "switch_statement"
                | "case_statement" | "do_statement" | "begin_block" | "if_expression"
                | "while_expression" | "for_expression" | "loop_expression"
                | "match_expression" | "try_statement" | "with_statement" | "foreach_statement" => {
                    split_points.push(SplitPoint {
                        position: node.range().end,
                    });
                }

                // Comments often indicate logical breaks
                "comment" | "line_comment" | "block_comment" => {
                    split_points.push(SplitPoint {
                        position: node.range().start,
                    });
                }

                // Basic statements
                "expression_statement" | "assignment_expression" | "return_statement" => {
                    split_points.push(SplitPoint {
                        position: node.range().end,
                    });
                }

                _ => {
                    // Ignore anything else
                }
            }
        }

        split_points.sort_by_key(|sp| sp.position);

        split_points
    }
}

impl FileChunker for SplitCodeChunker {
    fn chunk_file<'a>(
        &self,
        target: &mut Vec<Chunk<'a>>,
        file: &File<'a>,
    ) -> Result<(), ChunkError> {
        if file.source_code.is_empty() {
            return Ok(());
        }

        let path = Path::new(file.file_path);

        if let Some(supported_lang) = SupportLang::from_path(path) {
            let ast_grep = supported_lang.ast_grep(file.source_code);
            let root = ast_grep.root();
            return self.chunk_ast_grep(target, &root, file);
        }

        if let Some(fallback) = &self.fallback {
            fallback.chunk_file(target, file)
        } else {
            Err(ChunkError::UnsupportedLanguage(file.file_path.to_string()))
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::size_chunker::SizeChunker;

    use super::*;

    const TEST_CHUNK_SIZE: usize = 50;

    fn create_code_chunker() -> SplitCodeChunker {
        SplitCodeChunker::new(TEST_CHUNK_SIZE)
    }

    fn assert_complete_chunks(chunks: Vec<Chunk>, source_code: &str, language: &str) {
        assert!(!chunks.is_empty());

        let mut reconstructed = String::from("");

        for chunk in chunks {
            assert!(
                (chunk.end_byte - chunk.start_byte) <= TEST_CHUNK_SIZE,
                "start_byte = {}, end_byte = {}, content = {}",
                chunk.start_byte,
                chunk.end_byte,
                chunk.content
            );
            assert_eq!(language, chunk.language);

            reconstructed.push_str(chunk.content);
        }

        assert_eq!(reconstructed, source_code);
    }

    #[test]
    fn test_chunk_file_unknown_language() {
        let chunker = create_code_chunker();
        let file = File {
            source_code: "1234567890",
            file_path: "unknown_language",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        match result {
            Ok(..) => panic!("expected an error"),
            Err(e) => assert!(matches!(e, ChunkError::UnsupportedLanguage(..))),
        }
    }

    #[test]
    fn test_chunk_file_unknown_language_with_fallback() {
        let fallback = Box::new(SizeChunker::new(TEST_CHUNK_SIZE, 0).unwrap());
        let chunker = create_code_chunker().with_fallback(fallback);

        let file = File {
            source_code: "1234567890",
            file_path: "unknown_language",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        assert!(result.is_ok());
        assert_complete_chunks(chunks, file.source_code, "");
    }

    #[test]
    fn test_chunk_file_too_large_chunk() {
        let chunker = create_code_chunker();
        let file = File {
            source_code: &("fn main() {\n    println!(\"Hell".to_owned()
                + &"o".repeat(100)
                + "\");\n}"),
            file_path: "large.rs",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        match result {
            Ok(..) => panic!("expected an error"),
            Err(e) => assert!(matches!(e, ChunkError::NoSuitableSplitPoints)),
        }
    }

    #[test]
    fn test_chunk_file_no_splits() {
        let chunker = create_code_chunker();
        let file = File {
            source_code: &"\n".repeat(100),
            file_path: "large.rb",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        match result {
            Ok(..) => panic!("expected an error"),
            Err(e) => assert!(matches!(e, ChunkError::NoSuitableSplitPoints)),
        }
    }

    #[test]
    fn test_chunk_file_too_large_chunk_with_fallback() {
        let fallback = Box::new(SizeChunker::new(TEST_CHUNK_SIZE, 0).unwrap());
        let chunker = create_code_chunker().with_fallback(fallback);

        let file = File {
            source_code: &("fn main() {\n    println!(\"Hell".to_owned()
                + &"o".repeat(100)
                + "\");\n}"),
            file_path: "large.rs",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        assert!(result.is_ok());
        assert_complete_chunks(chunks, file.source_code, "rust");
    }

    #[test]
    fn test_chunk_file_too_large_last_chunk() {
        let chunker = create_code_chunker();

        let file = File {
            source_code: &("fn main() {\n    println!(\"Hello\");\n".to_owned()
                + &" ".repeat(100)
                + "}"),
            file_path: "large.rs",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        match result {
            Ok(..) => panic!("expected an error"),
            Err(e) => assert!(matches!(e, ChunkError::NoSuitableSplitPoints)),
        }
    }

    #[test]
    fn test_chunk_file_too_large_after_content() {
        let chunker = create_code_chunker();

        let file = File {
            source_code: &("fn main() {\n    println!(\"Hello\");\n}\n".to_owned()
                + &" ".repeat(100)),
            file_path: "large.rs",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        match result {
            Ok(..) => panic!("expected an error"),
            Err(e) => assert!(matches!(e, ChunkError::NoSuitableSplitPoints)),
        }
    }

    #[test]
    fn test_chunk_file_too_large_last_chunk_with_fallback() {
        let fallback = Box::new(SizeChunker::new(TEST_CHUNK_SIZE, 0).unwrap());
        let chunker = create_code_chunker().with_fallback(fallback);

        let file = File {
            source_code: &("fn main() {\n    println!(\"Hello\");\n".to_owned()
                + &" ".repeat(100)
                + "}"),
            file_path: "large.rs",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        assert!(result.is_ok());
        assert_complete_chunks(chunks, file.source_code, "rust");
    }

    #[test]
    fn test_chunk_file_rust() {
        let chunker = create_code_chunker();
        let file = File {
            source_code: r#"
fn main() {
    println!("Hello, world!");
}
"#,
            file_path: "main.rs",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        assert!(result.is_ok());
        assert_complete_chunks(chunks, file.source_code, "rust");
    }

    #[test]
    fn test_chunk_file_ruby() {
        let chunker = create_code_chunker();
        let file = File {
            source_code: r#"
module Thing
  class Foo
    def bar
      puts("Hello, world!")
    end
  end
end
"#,
            file_path: "main.rb",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        assert!(result.is_ok());
        assert_complete_chunks(chunks, file.source_code, "ruby");
    }

    #[test]
    fn test_chunk_file_python() {
        let chunker = create_code_chunker();
        let file = File {
            source_code: r#"
def main():
    print("Hello, World!")

if __name__ == "__main__":
    main()
"#,
            file_path: "main.py",
        };

        let mut chunks = Vec::new();
        let result = chunker.chunk_file(&mut chunks, &file);

        assert!(result.is_ok());
        assert_complete_chunks(chunks, file.source_code, "python");
    }
}
