{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"servers":[{"url":"/vllm"}],"paths":{"/health":{"get":{"summary":"Health","description":"Health check.","operationId":"health_health_get","responses":{"200":{"description":"Successful Response"}}}},"/load":{"get":{"summary":"Get Server Load Metrics","operationId":"get_server_load_metrics_load_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/ping":{"get":{"summary":"Ping","description":"Ping check. Endpoint required for SageMaker","operationId":"ping_ping_get","responses":{"200":{"description":"Successful Response"}}},"post":{"summary":"Ping","description":"Ping check. Endpoint required for SageMaker","operationId":"ping_ping_post","responses":{"200":{"description":"Successful Response"}}}},"/tokenize":{"post":{"summary":"Tokenize","operationId":"tokenize_tokenize_post","requestBody":{"content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/TokenizeCompletionRequest"},{"$ref":"#/components/schemas/TokenizeChatRequest"}],"title":"Request"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"501":{"description":"Not Implemented","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/detokenize":{"post":{"summary":"Detokenize","operationId":"detokenize_detokenize_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/DetokenizeRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models":{"get":{"summary":"Show Available Models","operationId":"show_available_models_v1_models_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/version":{"get":{"summary":"Show Version","operationId":"show_version_version_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v1/chat/completions":{"post":{"summary":"Create Chat Completion","operationId":"create_chat_completion_v1_chat_completions_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}},"text/event-stream":{}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/completions":{"post":{"summary":"Create Completion","operationId":"create_completion_v1_completions_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompletionRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}},"text/event-stream":{}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Not Found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/embeddings":{"post":{"summary":"Create Embedding","operationId":"create_embedding_v1_embeddings_post","requestBody":{"content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/EmbeddingCompletionRequest"},{"$ref":"#/components/schemas/EmbeddingChatRequest"}],"title":"Request"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/pooling":{"post":{"summary":"Create Pooling","operationId":"create_pooling_pooling_post","requestBody":{"content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/EmbeddingCompletionRequest"},{"$ref":"#/components/schemas/EmbeddingChatRequest"}],"title":"Request"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":"Create Classify","operationId":"create_classify_classify_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassificationRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/score":{"post":{"summary":"Create Score","operationId":"create_score_score_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ScoreRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/score":{"post":{"summary":"Create Score V1","operationId":"create_score_v1_v1_score_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ScoreRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/transcriptions":{"post":{"summary":"Create Transcriptions","operationId":"create_transcriptions_v1_audio_transcriptions_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/TranscriptionRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}},"text/event-stream":{}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Unprocessable Entity","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/v1/audio/translations":{"post":{"summary":"Create Translations","operationId":"create_translations_v1_audio_translations_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/TranslationRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}},"text/event-stream":{}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Unprocessable Entity","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/rerank":{"post":{"summary":"Do Rerank","operationId":"do_rerank_rerank_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/rerank":{"post":{"summary":"Do Rerank V1","operationId":"do_rerank_v1_v1_rerank_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/rerank":{"post":{"summary":"Do Rerank V2","operationId":"do_rerank_v2_v2_rerank_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/server_info":{"get":{"summary":"Show Server Info","operationId":"show_server_info_server_info_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/reset_prefix_cache":{"post":{"summary":"Reset Prefix Cache","description":"Reset the prefix cache. Note that we currently do not check if the\nprefix cache is successfully reset in the API server.","operationId":"reset_prefix_cache_reset_prefix_cache_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/sleep":{"post":{"summary":"Sleep","operationId":"sleep_sleep_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/wake_up":{"post":{"summary":"Wake Up","operationId":"wake_up_wake_up_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/is_sleeping":{"get":{"summary":"Is Sleeping","operationId":"is_sleeping_is_sleeping_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/invocations":{"post":{"summary":"Invocations","description":"For SageMaker, routes requests to other handlers based on model `task`.","operationId":"invocations_invocations_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"400":{"description":"Bad Request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"415":{"description":"Unsupported Media Type","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Internal Server Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"text/plain; version=0.0.4; charset=utf-8":{"schema":{"type":"string"}}}}}}}},"components":{"schemas":{"Audio":{"properties":{"id":{"type":"string","title":"Id"}},"type":"object","required":["id"],"title":"Audio"},"AudioURL":{"properties":{"url":{"type":"string","title":"Url"}},"type":"object","required":["url"],"title":"AudioURL"},"BaseModel":{"properties":{},"type":"object","title":"BaseModel"},"ChatCompletionAssistantMessageParam":{"properties":{"role":{"type":"string","const":"assistant","title":"Role"},"audio":{"anyOf":[{"$ref":"#/components/schemas/Audio"},{"type":"null"}]},"content":{"anyOf":[{"type":"string"},{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartRefusalParam"}]},"type":"array"},{"type":"null"}],"title":"Content"},"function_call":{"anyOf":[{"$ref":"#/components/schemas/FunctionCall"},{"type":"null"}]},"name":{"type":"string","title":"Name"},"refusal":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Refusal"},"tool_calls":{"items":{"$ref":"#/components/schemas/ChatCompletionMessageToolCallParam"},"type":"array","title":"Tool Calls"}},"type":"object","required":["role"],"title":"ChatCompletionAssistantMessageParam"},"ChatCompletionContentPartAudioParam":{"properties":{"audio_url":{"$ref":"#/components/schemas/AudioURL"},"type":{"type":"string","const":"audio_url","title":"Type"}},"type":"object","required":["audio_url","type"],"title":"ChatCompletionContentPartAudioParam"},"ChatCompletionContentPartImageEmbedsParam":{"properties":{"image_embeds":{"anyOf":[{"type":"string"},{"additionalProperties":{"type":"string"},"type":"object"}],"title":"Image Embeds"},"type":{"type":"string","const":"image_embeds","title":"Type"}},"type":"object","required":["image_embeds","type"],"title":"ChatCompletionContentPartImageEmbedsParam"},"ChatCompletionContentPartImageParam":{"properties":{"image_url":{"$ref":"#/components/schemas/ImageURL"},"type":{"type":"string","const":"image_url","title":"Type"}},"type":"object","required":["image_url","type"],"title":"ChatCompletionContentPartImageParam"},"ChatCompletionContentPartInputAudioParam":{"properties":{"input_audio":{"$ref":"#/components/schemas/InputAudio"},"type":{"type":"string","const":"input_audio","title":"Type"}},"type":"object","required":["input_audio","type"],"title":"ChatCompletionContentPartInputAudioParam"},"ChatCompletionContentPartRefusalParam":{"properties":{"refusal":{"type":"string","title":"Refusal"},"type":{"type":"string","const":"refusal","title":"Type"}},"type":"object","required":["refusal","type"],"title":"ChatCompletionContentPartRefusalParam"},"ChatCompletionContentPartTextParam":{"properties":{"text":{"type":"string","title":"Text"},"type":{"type":"string","const":"text","title":"Type"}},"type":"object","required":["text","type"],"title":"ChatCompletionContentPartTextParam"},"ChatCompletionContentPartVideoParam":{"properties":{"video_url":{"$ref":"#/components/schemas/VideoURL"},"type":{"type":"string","const":"video_url","title":"Type"}},"type":"object","required":["video_url","type"],"title":"ChatCompletionContentPartVideoParam"},"ChatCompletionDeveloperMessageParam":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},"type":"array"}],"title":"Content"},"role":{"type":"string","const":"developer","title":"Role"},"name":{"type":"string","title":"Name"}},"type":"object","required":["content","role"],"title":"ChatCompletionDeveloperMessageParam"},"ChatCompletionFunctionMessageParam":{"properties":{"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"type":"string","title":"Name"},"role":{"type":"string","const":"function","title":"Role"}},"type":"object","required":["content","name","role"],"title":"ChatCompletionFunctionMessageParam"},"ChatCompletionMessageToolCallParam":{"properties":{"id":{"type":"string","title":"Id"},"function":{"$ref":"#/components/schemas/Function"},"type":{"type":"string","const":"function","title":"Type"}},"type":"object","required":["id","function","type"],"title":"ChatCompletionMessageToolCallParam"},"ChatCompletionNamedFunction":{"properties":{"name":{"type":"string","title":"Name"}},"additionalProperties":true,"type":"object","required":["name"],"title":"ChatCompletionNamedFunction"},"ChatCompletionNamedToolChoiceParam":{"properties":{"function":{"$ref":"#/components/schemas/ChatCompletionNamedFunction"},"type":{"type":"string","const":"function","title":"Type","default":"function"}},"additionalProperties":true,"type":"object","required":["function"],"title":"ChatCompletionNamedToolChoiceParam"},"ChatCompletionRequest":{"properties":{"messages":{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionDeveloperMessageParam"},{"$ref":"#/components/schemas/ChatCompletionSystemMessageParam"},{"$ref":"#/components/schemas/ChatCompletionUserMessageParam"},{"$ref":"#/components/schemas/ChatCompletionAssistantMessageParam"},{"$ref":"#/components/schemas/ChatCompletionToolMessageParam"},{"$ref":"#/components/schemas/ChatCompletionFunctionMessageParam"},{"$ref":"#/components/schemas/CustomChatCompletionMessageParam"}]},"type":"array","title":"Messages"},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"frequency_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Frequency Penalty","default":0.0},"logit_bias":{"anyOf":[{"additionalProperties":{"type":"number"},"type":"object"},{"type":"null"}],"title":"Logit Bias"},"logprobs":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Logprobs","default":false},"top_logprobs":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Top Logprobs","default":0},"max_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Tokens","deprecated":true},"max_completion_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Completion Tokens"},"n":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"N","default":1},"presence_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Presence Penalty","default":0.0},"response_format":{"anyOf":[{"$ref":"#/components/schemas/ResponseFormat"},{"$ref":"#/components/schemas/StructuralTagResponseFormat"},{"type":"null"}],"title":"Response Format"},"seed":{"anyOf":[{"type":"integer","maximum":9.223372036854776e+18,"minimum":-9.223372036854776e+18},{"type":"null"}],"title":"Seed"},"stop":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Stop","default":[]},"stream":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream","default":false},"stream_options":{"anyOf":[{"$ref":"#/components/schemas/StreamOptions"},{"type":"null"}]},"temperature":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Temperature"},"top_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Top P"},"tools":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChatCompletionToolsParam"},"type":"array"},{"type":"null"}],"title":"Tools"},"tool_choice":{"anyOf":[{"type":"string","const":"none"},{"type":"string","const":"auto"},{"type":"string","const":"required"},{"$ref":"#/components/schemas/ChatCompletionNamedToolChoiceParam"},{"type":"null"}],"title":"Tool Choice","default":"none"},"parallel_tool_calls":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Parallel Tool Calls","default":false},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"best_of":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Best Of"},"use_beam_search":{"type":"boolean","title":"Use Beam Search","default":false},"top_k":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Top K"},"min_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Min P"},"repetition_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Repetition Penalty"},"length_penalty":{"type":"number","title":"Length Penalty","default":1.0},"stop_token_ids":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Stop Token Ids","default":[]},"include_stop_str_in_output":{"type":"boolean","title":"Include Stop Str In Output","default":false},"ignore_eos":{"type":"boolean","title":"Ignore Eos","default":false},"min_tokens":{"type":"integer","title":"Min Tokens","default":0},"skip_special_tokens":{"type":"boolean","title":"Skip Special Tokens","default":true},"spaces_between_special_tokens":{"type":"boolean","title":"Spaces Between Special Tokens","default":true},"truncate_prompt_tokens":{"anyOf":[{"type":"integer","minimum":1.0},{"type":"null"}],"title":"Truncate Prompt Tokens"},"prompt_logprobs":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Prompt Logprobs"},"allowed_token_ids":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Allowed Token Ids"},"bad_words":{"items":{"type":"string"},"type":"array","title":"Bad Words"},"echo":{"type":"boolean","title":"Echo","description":"If true, the new message will be prepended with the last message if they belong to the same role.","default":false},"add_generation_prompt":{"type":"boolean","title":"Add Generation Prompt","description":"If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.","default":true},"continue_final_message":{"type":"boolean","title":"Continue Final Message","description":"If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to \"prefill\" part of the model's response for it. Cannot be used at the same time as `add_generation_prompt`.","default":false},"add_special_tokens":{"type":"boolean","title":"Add Special Tokens","description":"If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).","default":false},"documents":{"anyOf":[{"items":{"additionalProperties":{"type":"string"},"type":"object"},"type":"array"},{"type":"null"}],"title":"Documents","description":"A list of dicts representing documents that will be accessible to the model if it is performing RAG (retrieval-augmented generation). If the template does not support RAG, this argument will have no effect. We recommend that each document should be a dict containing \"title\" and \"text\" keys."},"chat_template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Chat Template","description":"A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one."},"chat_template_kwargs":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Chat Template Kwargs","description":"Additional keyword args to pass to the template renderer. Will be accessible by the chat template."},"mm_processor_kwargs":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Mm Processor Kwargs","description":"Additional kwargs to pass to the HF processor."},"guided_json":{"anyOf":[{"type":"string"},{"additionalProperties":true,"type":"object"},{"$ref":"#/components/schemas/BaseModel"},{"type":"null"}],"title":"Guided Json","description":"If specified, the output will follow the JSON schema."},"guided_regex":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Regex","description":"If specified, the output will follow the regex pattern."},"guided_choice":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Guided Choice","description":"If specified, the output will be exactly one of the choices."},"guided_grammar":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Grammar","description":"If specified, the output will follow the context free grammar."},"structural_tag":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Structural Tag","description":"If specified, the output will follow the structural tag schema."},"guided_decoding_backend":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Decoding Backend","description":"If specified, will override the default guided decoding backend of the server for this specific request. If set, must be either 'outlines' / 'lm-format-enforcer'"},"guided_whitespace_pattern":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Whitespace Pattern","description":"If specified, will override the default whitespace pattern for guided json decoding."},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0},"request_id":{"type":"string","title":"Request Id","description":"The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response."},"logits_processors":{"anyOf":[{"items":{"anyOf":[{"type":"string"},{"$ref":"#/components/schemas/LogitsProcessorConstructor"}]},"type":"array"},{"type":"null"}],"title":"Logits Processors","description":"A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}."},"return_tokens_as_token_ids":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Return Tokens As Token Ids","description":"If specified with 'logprobs', tokens are represented  as strings of the form 'token_id:{token_id}' so that tokens that are not JSON-encodable can be identified."},"cache_salt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cache Salt","description":"If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit). Not supported by vLLM engine V0."},"kv_transfer_params":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Kv Transfer Params","description":"KVTransfer parameters used for disaggregated serving."},"vllm_xargs":{"anyOf":[{"additionalProperties":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"}]},"type":"object"},{"type":"null"}],"title":"Vllm Xargs","description":"Additional request parameters with string or numeric values, used by custom extensions."}},"additionalProperties":true,"type":"object","required":["messages"],"title":"ChatCompletionRequest"},"ChatCompletionSystemMessageParam":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},"type":"array"}],"title":"Content"},"role":{"type":"string","const":"system","title":"Role"},"name":{"type":"string","title":"Name"}},"type":"object","required":["content","role"],"title":"ChatCompletionSystemMessageParam"},"ChatCompletionToolMessageParam":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},"type":"array"}],"title":"Content"},"role":{"type":"string","const":"tool","title":"Role"},"tool_call_id":{"type":"string","title":"Tool Call Id"}},"type":"object","required":["content","role","tool_call_id"],"title":"ChatCompletionToolMessageParam"},"ChatCompletionToolsParam":{"properties":{"type":{"type":"string","const":"function","title":"Type","default":"function"},"function":{"$ref":"#/components/schemas/FunctionDefinition"}},"additionalProperties":true,"type":"object","required":["function"],"title":"ChatCompletionToolsParam"},"ChatCompletionUserMessageParam":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartImageParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartInputAudioParam"},{"$ref":"#/components/schemas/File"}]},"type":"array"}],"title":"Content"},"role":{"type":"string","const":"user","title":"Role"},"name":{"type":"string","title":"Name"}},"type":"object","required":["content","role"],"title":"ChatCompletionUserMessageParam"},"ClassificationRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"input":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"}],"title":"Input"},"truncate_prompt_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Truncate Prompt Tokens"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"additional_data":{"anyOf":[{},{"type":"null"}],"title":"Additional Data"},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0}},"additionalProperties":true,"type":"object","required":["input"],"title":"ClassificationRequest"},"CompletionRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"prompt":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"items":{"items":{"type":"integer"},"type":"array"},"type":"array"},{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Prompt"},"prompt_embeds":{"anyOf":[{"type":"string","format":"binary"},{"items":{"type":"string","format":"binary"},"type":"array"},{"type":"null"}],"title":"Prompt Embeds"},"best_of":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Best Of"},"echo":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Echo","default":false},"frequency_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Frequency Penalty","default":0.0},"logit_bias":{"anyOf":[{"additionalProperties":{"type":"number"},"type":"object"},{"type":"null"}],"title":"Logit Bias"},"logprobs":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Logprobs"},"max_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Tokens","default":16},"n":{"type":"integer","title":"N","default":1},"presence_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Presence Penalty","default":0.0},"seed":{"anyOf":[{"type":"integer","maximum":9.223372036854776e+18,"minimum":-9.223372036854776e+18},{"type":"null"}],"title":"Seed"},"stop":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Stop","default":[]},"stream":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream","default":false},"stream_options":{"anyOf":[{"$ref":"#/components/schemas/StreamOptions"},{"type":"null"}]},"suffix":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Suffix"},"temperature":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Temperature"},"top_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Top P"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"use_beam_search":{"type":"boolean","title":"Use Beam Search","default":false},"top_k":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Top K"},"min_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Min P"},"repetition_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Repetition Penalty"},"length_penalty":{"type":"number","title":"Length Penalty","default":1.0},"stop_token_ids":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Stop Token Ids","default":[]},"include_stop_str_in_output":{"type":"boolean","title":"Include Stop Str In Output","default":false},"ignore_eos":{"type":"boolean","title":"Ignore Eos","default":false},"min_tokens":{"type":"integer","title":"Min Tokens","default":0},"skip_special_tokens":{"type":"boolean","title":"Skip Special Tokens","default":true},"spaces_between_special_tokens":{"type":"boolean","title":"Spaces Between Special Tokens","default":true},"truncate_prompt_tokens":{"anyOf":[{"type":"integer","minimum":1.0},{"type":"null"}],"title":"Truncate Prompt Tokens"},"allowed_token_ids":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Allowed Token Ids"},"prompt_logprobs":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Prompt Logprobs"},"add_special_tokens":{"type":"boolean","title":"Add Special Tokens","description":"If true (the default), special tokens (e.g. BOS) will be added to the prompt.","default":true},"response_format":{"anyOf":[{"$ref":"#/components/schemas/ResponseFormat"},{"$ref":"#/components/schemas/StructuralTagResponseFormat"},{"type":"null"}],"title":"Response Format","description":"Similar to chat completion, this parameter specifies the format of output. Only {'type': 'json_object'}, {'type': 'json_schema'}, {'type': 'structural_tag'}, or {'type': 'text' } is supported."},"guided_json":{"anyOf":[{"type":"string"},{"additionalProperties":true,"type":"object"},{"$ref":"#/components/schemas/BaseModel"},{"type":"null"}],"title":"Guided Json","description":"If specified, the output will follow the JSON schema."},"guided_regex":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Regex","description":"If specified, the output will follow the regex pattern."},"guided_choice":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Guided Choice","description":"If specified, the output will be exactly one of the choices."},"guided_grammar":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Grammar","description":"If specified, the output will follow the context free grammar."},"guided_decoding_backend":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Decoding Backend","description":"If specified, will override the default guided decoding backend of the server for this specific request. If set, must be one of 'outlines' / 'lm-format-enforcer'"},"guided_whitespace_pattern":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Whitespace Pattern","description":"If specified, will override the default whitespace pattern for guided json decoding."},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0},"logits_processors":{"anyOf":[{"items":{"anyOf":[{"type":"string"},{"$ref":"#/components/schemas/LogitsProcessorConstructor"}]},"type":"array"},{"type":"null"}],"title":"Logits Processors","description":"A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}."},"return_tokens_as_token_ids":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Return Tokens As Token Ids","description":"If specified with 'logprobs', tokens are represented  as strings of the form 'token_id:{token_id}' so that tokens that are not JSON-encodable can be identified."},"kv_transfer_params":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Kv Transfer Params","description":"KVTransfer parameters used for disaggregated serving."},"vllm_xargs":{"anyOf":[{"additionalProperties":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"}]},"type":"object"},{"type":"null"}],"title":"Vllm Xargs","description":"Additional request parameters with string or numeric values, used by custom extensions."}},"additionalProperties":true,"type":"object","title":"CompletionRequest"},"CustomChatCompletionContentSimpleAudioParam":{"properties":{"audio_url":{"type":"string","title":"Audio Url"}},"type":"object","required":["audio_url"],"title":"CustomChatCompletionContentSimpleAudioParam","description":"A simpler version of the param that only accepts a plain audio_url.\n\nExample:\n{\n    \"audio_url\": \"https://example.com/audio.mp3\"\n}"},"CustomChatCompletionContentSimpleImageParam":{"properties":{"image_url":{"type":"string","title":"Image Url"}},"type":"object","required":["image_url"],"title":"CustomChatCompletionContentSimpleImageParam","description":"A simpler version of the param that only accepts a plain image_url.\nThis is supported by OpenAI API, although it is not documented.\n\nExample:\n{\n    \"image_url\": \"https://example.com/image.jpg\"\n}"},"CustomChatCompletionContentSimpleVideoParam":{"properties":{"video_url":{"type":"string","title":"Video Url"}},"type":"object","required":["video_url"],"title":"CustomChatCompletionContentSimpleVideoParam","description":"A simpler version of the param that only accepts a plain audio_url.\n\nExample:\n{\n    \"video_url\": \"https://example.com/video.mp4\"\n}"},"CustomChatCompletionMessageParam":{"properties":{"role":{"type":"string","title":"Role"},"content":{"anyOf":[{"type":"string"},{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartImageParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartInputAudioParam"},{"$ref":"#/components/schemas/File"},{"$ref":"#/components/schemas/ChatCompletionContentPartAudioParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartVideoParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartRefusalParam"},{"$ref":"#/components/schemas/CustomChatCompletionContentSimpleImageParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartImageEmbedsParam"},{"$ref":"#/components/schemas/CustomChatCompletionContentSimpleAudioParam"},{"$ref":"#/components/schemas/CustomChatCompletionContentSimpleVideoParam"},{"type":"string"}]},"type":"array"}],"title":"Content"},"name":{"type":"string","title":"Name"},"tool_call_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Tool Call Id"},"tool_calls":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChatCompletionMessageToolCallParam"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"CustomChatCompletionMessageParam","description":"Enables custom roles in the Chat Completion API."},"DetokenizeRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"tokens":{"items":{"type":"integer"},"type":"array","title":"Tokens"}},"additionalProperties":true,"type":"object","required":["tokens"],"title":"DetokenizeRequest"},"EmbeddingChatRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"messages":{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionDeveloperMessageParam"},{"$ref":"#/components/schemas/ChatCompletionSystemMessageParam"},{"$ref":"#/components/schemas/ChatCompletionUserMessageParam"},{"$ref":"#/components/schemas/ChatCompletionAssistantMessageParam"},{"$ref":"#/components/schemas/ChatCompletionToolMessageParam"},{"$ref":"#/components/schemas/ChatCompletionFunctionMessageParam"},{"$ref":"#/components/schemas/CustomChatCompletionMessageParam"}]},"type":"array","title":"Messages"},"encoding_format":{"type":"string","enum":["float","base64"],"title":"Encoding Format","default":"float"},"dimensions":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Dimensions"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"truncate_prompt_tokens":{"anyOf":[{"type":"integer","minimum":-1.0},{"type":"null"}],"title":"Truncate Prompt Tokens"},"additional_data":{"anyOf":[{},{"type":"null"}],"title":"Additional Data"},"add_special_tokens":{"type":"boolean","title":"Add Special Tokens","description":"If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).","default":false},"chat_template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Chat Template","description":"A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one."},"chat_template_kwargs":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Chat Template Kwargs","description":"Additional keyword args to pass to the template renderer. Will be accessible by the chat template."},"mm_processor_kwargs":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Mm Processor Kwargs","description":"Additional kwargs to pass to the HF processor."},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0}},"additionalProperties":true,"type":"object","required":["messages"],"title":"EmbeddingChatRequest"},"EmbeddingCompletionRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"input":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"items":{"items":{"type":"integer"},"type":"array"},"type":"array"},{"type":"string"},{"items":{"type":"string"},"type":"array"}],"title":"Input"},"encoding_format":{"type":"string","enum":["float","base64"],"title":"Encoding Format","default":"float"},"dimensions":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Dimensions"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"truncate_prompt_tokens":{"anyOf":[{"type":"integer","minimum":-1.0},{"type":"null"}],"title":"Truncate Prompt Tokens"},"additional_data":{"anyOf":[{},{"type":"null"}],"title":"Additional Data"},"add_special_tokens":{"type":"boolean","title":"Add Special Tokens","description":"If true (the default), special tokens (e.g. BOS) will be added to the prompt.","default":true},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0}},"additionalProperties":true,"type":"object","required":["input"],"title":"EmbeddingCompletionRequest"},"ErrorResponse":{"properties":{"object":{"type":"string","title":"Object","default":"error"},"message":{"type":"string","title":"Message"},"type":{"type":"string","title":"Type"},"param":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Param"},"code":{"type":"integer","title":"Code"}},"additionalProperties":true,"type":"object","required":["message","type","code"],"title":"ErrorResponse"},"File":{"properties":{"file":{"$ref":"#/components/schemas/FileFile"},"type":{"type":"string","const":"file","title":"Type"}},"type":"object","required":["file","type"],"title":"File"},"FileFile":{"properties":{"file_data":{"type":"string","title":"File Data"},"file_id":{"type":"string","title":"File Id"},"filename":{"type":"string","title":"Filename"}},"type":"object","title":"FileFile"},"Function":{"properties":{"arguments":{"type":"string","title":"Arguments"},"name":{"type":"string","title":"Name"}},"type":"object","required":["arguments","name"],"title":"Function"},"FunctionCall":{"properties":{"arguments":{"type":"string","title":"Arguments"},"name":{"type":"string","title":"Name"}},"type":"object","required":["arguments","name"],"title":"FunctionCall"},"FunctionDefinition":{"properties":{"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"parameters":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Parameters"}},"additionalProperties":true,"type":"object","required":["name"],"title":"FunctionDefinition"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageURL":{"properties":{"url":{"type":"string","title":"Url"},"detail":{"type":"string","enum":["auto","low","high"],"title":"Detail"}},"type":"object","required":["url"],"title":"ImageURL"},"InputAudio":{"properties":{"data":{"type":"string","title":"Data"},"format":{"type":"string","enum":["wav","mp3"],"title":"Format"}},"type":"object","required":["data","format"],"title":"InputAudio"},"JsonSchemaResponseFormat":{"properties":{"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"schema":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Schema"},"strict":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Strict"}},"additionalProperties":true,"type":"object","required":["name"],"title":"JsonSchemaResponseFormat"},"LogitsProcessorConstructor":{"properties":{"qualname":{"type":"string","title":"Qualname"},"args":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}],"title":"Args"},"kwargs":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Kwargs"}},"additionalProperties":false,"type":"object","required":["qualname"],"title":"LogitsProcessorConstructor"},"RerankRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"query":{"type":"string","title":"Query"},"documents":{"items":{"type":"string"},"type":"array","title":"Documents"},"top_n":{"type":"integer","title":"Top N"},"truncate_prompt_tokens":{"anyOf":[{"type":"integer","minimum":-1.0},{"type":"null"}],"title":"Truncate Prompt Tokens"},"additional_data":{"anyOf":[{},{"type":"null"}],"title":"Additional Data"},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0}},"additionalProperties":true,"type":"object","required":["query","documents"],"title":"RerankRequest"},"ResponseFormat":{"properties":{"type":{"type":"string","enum":["text","json_object","json_schema"],"title":"Type"},"json_schema":{"anyOf":[{"$ref":"#/components/schemas/JsonSchemaResponseFormat"},{"type":"null"}]}},"additionalProperties":true,"type":"object","required":["type"],"title":"ResponseFormat"},"ScoreRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"text_1":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"}],"title":"Text 1"},"text_2":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"}],"title":"Text 2"},"truncate_prompt_tokens":{"anyOf":[{"type":"integer","minimum":-1.0},{"type":"null"}],"title":"Truncate Prompt Tokens"},"additional_data":{"anyOf":[{},{"type":"null"}],"title":"Additional Data"},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0}},"additionalProperties":true,"type":"object","required":["text_1","text_2"],"title":"ScoreRequest"},"StreamOptions":{"properties":{"include_usage":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Include Usage","default":true},"continuous_usage_stats":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Continuous Usage Stats","default":false}},"additionalProperties":true,"type":"object","title":"StreamOptions"},"StructuralTag":{"properties":{"begin":{"type":"string","title":"Begin"},"schema":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Schema"},"end":{"type":"string","title":"End"}},"additionalProperties":true,"type":"object","required":["begin","end"],"title":"StructuralTag"},"StructuralTagResponseFormat":{"properties":{"type":{"type":"string","const":"structural_tag","title":"Type"},"structures":{"items":{"$ref":"#/components/schemas/StructuralTag"},"type":"array","title":"Structures"},"triggers":{"items":{"type":"string"},"type":"array","title":"Triggers"}},"additionalProperties":true,"type":"object","required":["type","structures","triggers"],"title":"StructuralTagResponseFormat"},"TokenizeChatRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"messages":{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionDeveloperMessageParam"},{"$ref":"#/components/schemas/ChatCompletionSystemMessageParam"},{"$ref":"#/components/schemas/ChatCompletionUserMessageParam"},{"$ref":"#/components/schemas/ChatCompletionAssistantMessageParam"},{"$ref":"#/components/schemas/ChatCompletionToolMessageParam"},{"$ref":"#/components/schemas/ChatCompletionFunctionMessageParam"},{"$ref":"#/components/schemas/CustomChatCompletionMessageParam"}]},"type":"array","title":"Messages"},"add_generation_prompt":{"type":"boolean","title":"Add Generation Prompt","description":"If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.","default":true},"return_token_strs":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Return Token Strs","description":"If true, also return the token strings corresponding to the token ids.","default":false},"continue_final_message":{"type":"boolean","title":"Continue Final Message","description":"If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to \"prefill\" part of the model's response for it. Cannot be used at the same time as `add_generation_prompt`.","default":false},"add_special_tokens":{"type":"boolean","title":"Add Special Tokens","description":"If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).","default":false},"chat_template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Chat Template","description":"A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one."},"chat_template_kwargs":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Chat Template Kwargs","description":"Additional keyword args to pass to the template renderer. Will be accessible by the chat template."},"mm_processor_kwargs":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Mm Processor Kwargs","description":"Additional kwargs to pass to the HF processor."},"tools":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChatCompletionToolsParam"},"type":"array"},{"type":"null"}],"title":"Tools","description":"A list of tools the model may call."}},"additionalProperties":true,"type":"object","required":["messages"],"title":"TokenizeChatRequest"},"TokenizeCompletionRequest":{"properties":{"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"prompt":{"type":"string","title":"Prompt"},"add_special_tokens":{"type":"boolean","title":"Add Special Tokens","description":"If true (the default), special tokens (e.g. BOS) will be added to the prompt.","default":true},"return_token_strs":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Return Token Strs","description":"If true, also return the token strings corresponding to the token ids.","default":false}},"additionalProperties":true,"type":"object","required":["prompt"],"title":"TokenizeCompletionRequest"},"TranscriptionRequest":{"properties":{"file":{"type":"string","format":"binary","title":"File"},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"language":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Language"},"prompt":{"type":"string","title":"Prompt","default":""},"response_format":{"type":"string","enum":["json","text","srt","verbose_json","vtt"],"title":"Response Format","default":"json"},"timestamp_granularities[]":{"items":{"type":"string","enum":["word","segment"]},"type":"array","title":"Timestamp Granularities[]","default":[]},"stream":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream","default":false},"stream_include_usage":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream Include Usage","default":false},"stream_continuous_usage_stats":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream Continuous Usage Stats","default":false},"vllm_xargs":{"anyOf":[{"additionalProperties":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"}]},"type":"object"},{"type":"null"}],"title":"Vllm Xargs","description":"Additional request parameters with string or numeric values, used by custom extensions."},"temperature":{"type":"number","title":"Temperature","default":0.0},"top_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Top P"},"top_k":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Top K"},"min_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Min P"},"seed":{"anyOf":[{"type":"integer","maximum":9.223372036854776e+18,"minimum":-9.223372036854776e+18},{"type":"null"}],"title":"Seed"},"frequency_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Frequency Penalty","default":0.0},"repetition_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Repetition Penalty"},"presence_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Presence Penalty","default":0.0}},"additionalProperties":true,"type":"object","required":["file"],"title":"TranscriptionRequest"},"TranslationRequest":{"properties":{"file":{"type":"string","format":"binary","title":"File"},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"prompt":{"type":"string","title":"Prompt","default":""},"response_format":{"type":"string","enum":["json","text","srt","verbose_json","vtt"],"title":"Response Format","default":"json"},"temperature":{"type":"number","title":"Temperature","default":0.0},"language":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Language"},"stream":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream","default":false},"stream_include_usage":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream Include Usage","default":false},"stream_continuous_usage_stats":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream Continuous Usage Stats","default":false}},"additionalProperties":true,"type":"object","required":["file"],"title":"TranslationRequest"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VideoURL":{"properties":{"url":{"type":"string","title":"Url"}},"type":"object","required":["url"],"title":"VideoURL"}}}}