{
    "$schema": "http://json-schema.org/draft-07/schema#",
    "title": "BM25 Embedding Function Schema",
    "description": "Schema for the BM25 sparse embedding function configuration",
    "version": "1.0.0",
    "type": "object",
    "properties": {
        "task": {
            "type": "string",
            "enum": [
                "document",
                "query"
            ],
            "description": "Task to perform, can be 'document' or 'query'"
        },
        "query_config": {
            "type": "object",
            "description": "Configuration for the query",
            "properties": {
                "task": {
                    "type": "string",
                    "enum": [
                        "document",
                        "query"
                    ],
                    "description": "Task to perform for query embedding"
                }
            },
            "additionalProperties": false
        },
        "cache_dir": {
            "type": [
                "string",
                "null"
            ],
            "description": "The path to the cache directory"
        },
        "k": {
            "type": [
                "number",
                "null"
            ],
            "description": "The k parameter in the BM25 formula. Defines the saturation of the term frequency"
        },
        "b": {
            "type": [
                "number",
                "null"
            ],
            "description": "The b parameter in the BM25 formula. Defines the importance of the document length"
        },
        "avg_len": {
            "type": [
                "number",
                "null"
            ],
            "description": "The average length of the documents in the corpus"
        },
        "language": {
            "type": [
                "string",
                "null"
            ],
            "description": "Specifies the language for the stemmer"
        },
        "token_max_length": {
            "type": [
                "integer",
                "null"
            ],
            "description": "The maximum length of the tokens"
        },
        "disable_stemmer": {
            "type": [
                "boolean",
                "null"
            ],
            "description": "Disable the stemmer"
        },
        "specific_model_path": {
            "type": [
                "string",
                "null"
            ],
            "description": "The path to the specific model"
        },
        "kwargs": {
            "type": "object",
            "description": "Additional arguments to pass to the BM25 model",
            "additionalProperties": {
                "type": [
                    "string",
                    "integer",
                    "number",
                    "boolean",
                    "array",
                    "object"
                ]
            }
        }
    },
    "additionalProperties": false
}
