// Package binary provides utilities for detecting binary content in data.
package binary

import (
	"bytes"
	"path/filepath"
	"strings"
)

const (
	binarySearchLimit = 8 * 1024 // 8 KiB, same as git
)

// Extensions that should be treated as binary for indexing purposes.
// While some of these are technically text-based (like SVG),
// they contain data that creates noise in code search results.
var binaryExtensions = map[string]bool{
	// Images
	".jpg":  true,
	".jpeg": true,
	".png":  true,
	".gif":  true,
	".bmp":  true,
	".ico":  true,
	".svg":  true,
	".webp": true,

	// Media
	".mp3": true,
	".mp4": true,
	".mov": true,
	".avi": true,

	// Fonts
	".woff":  true,
	".woff2": true,
	".ttf":   true,
	".eot":   true,
	".otf":   true,

	// Documents
	".pdf":  true,
	".doc":  true,
	".docx": true,
	".xls":  true,
	".xlsx": true,
	".ppt":  true,
	".pptx": true,

	// Archives
	".zip": true,
	".tar": true,
	".gz":  true,
	".rar": true,
	".7z":  true,

	// Executables and Libraries
	".exe":   true,
	".dll":   true,
	".so":    true,
	".dylib": true,

	// Compiled bytecode and artifacts
	".class": true,
	".jar":   true,
	".pyc":   true,

	// System files
	".DS_Store": true,
}

// DetectBinary is used to filter out binary files (images, executables, etc.)
// since we only want to index text content. This checks whether the passed-in
// data contains a NUL byte. Only scan the start of large blobs. This is the
// same test performed by git to check text/binary.
//
// Files with certain extensions are treated directly as binary files.
func DetectBinary(filename string, data []byte) bool {
	if isBinaryExtension(filename) {
		return true
	}

	searchLimit := binarySearchLimit
	if len(data) < searchLimit {
		searchLimit = len(data)
	}

	return bytes.Contains(data[:searchLimit], []byte{0})
}

func isBinaryExtension(filename string) bool {
	ext := strings.ToLower(filepath.Ext(filename))
	return binaryExtensions[ext]
}
