Create Agent

client.agents.create(, ?): AgentState { id, agent_type, blocks, 42 more }

post/v1/agents/

Create an agent.

ParametersExpand Collapse

body: AgentCreateParams { agent_type, base_template_id, block_ids, 44 more }

agent_type?: AgentType

The type of agent.

Accepts one of the following:

"memgpt_agent"

"memgpt_v2_agent"

"letta_v1_agent"

"react_agent"

"workflow_agent"

"split_thread_agent"

"sleeptime_agent"

"voice_convo_agent"

"voice_sleeptime_agent"

Deprecatedbase_template_id?: string | null

Deprecated: No longer used. The base template id of the agent.

block_ids?: Array<string> | null

The ids of the blocks used by the agent.

compaction_settings?: CompactionSettings | null

Configuration for conversation compaction / summarization.

Per-model settings (temperature, max tokens, etc.) are derived from the default configuration for that handle.

clip_chars?: number | null

The maximum length of the summary in characters. If none, no clipping is performed.

mode?: "all" | "sliding_window" | "self_compact_all" | "self_compact_sliding_window"

The type of summarization technique use.

Accepts one of the following:

"all"

"sliding_window"

"self_compact_all"

"self_compact_sliding_window"

model?: string | null

Model handle to use for sliding_window/all summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults.

model_settings?: OpenAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 4 more } | SgLangModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 5 more } | AnthropicModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more } | 14 more | null

Optional model settings used to override defaults for the summarizer model.

Accepts one of the following:

OpenAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 4 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openai"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

SgLangModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 5 more }

SGLang model configuration (OpenAI-compatible runtime with SGLang-specific parsing).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "sglang"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

tool_call_parser?: string | null

SGLang tool call parser name (for example 'glm47', 'qwen25', or 'hermes').

AnthropicModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "anthropic"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GoogleAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_ai"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

GoogleVertexModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_vertex"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

AzureModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Azure OpenAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "azure"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

XaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

xAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "xai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

MoonshotModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Moonshot/Kimi model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

ZaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Z.ai (ZhipuAI) model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "zai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking?: Thinking { clear_thinking, type }

The thinking configuration for GLM-4.5+ models.

clear_thinking?: boolean

If False, preserved thinking is used (recommended for agents).

type?: "enabled" | "disabled"

Whether thinking is enabled or disabled.

Accepts one of the following:

"enabled"

"disabled"

MoonshotCodingModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

Kimi Code model configuration (Anthropic-compatible).

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot_coding"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GroqModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Groq model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "groq"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

DeepseekModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Deepseek model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "deepseek"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

TogetherModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Together AI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "together"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BedrockModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

AWS Bedrock model configuration.

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "bedrock"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BasetenModelSettings { max_output_tokens, parallel_tool_calls, provider_type, temperature }

Baseten model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "baseten"

The type of the provider.

temperature?: number

The temperature of the model.

OpenRouterModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

OpenRouter model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openrouter"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

ChatGptoAuthModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

ChatGPT OAuth model configuration (uses ChatGPT backend API).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "chatgpt_oauth"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "low" | "medium" | 2 more

The reasoning effort level for GPT-5.x and o-series models.

Accepts one of the following:

"none"

"low"

"medium"

"high"

"xhigh"

temperature?: number

The temperature of the model.

prompt?: string | null

The prompt to use for summarization. If None, uses mode-specific default.

prompt_acknowledgement?: boolean

Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).

sliding_window_percentage?: number

The percentage of the context window to keep post-summarization (only used in sliding window modes).

context_window_limit?: number | null

The context window limit used by the agent.

description?: string | null

The description of the agent.

embedding?: string | null

The embedding model handle used by the agent (format: provider/model-name).

Deprecatedembedding_chunk_size?: number | null

Deprecated: No longer used. The embedding chunk size used by the agent.

Deprecatedembedding_config?: EmbeddingConfig { embedding_dim, embedding_endpoint_type, embedding_model, 7 more } | null

Configuration for embedding model connection and processing parameters.

embedding_dim: number

The dimension of the embedding.

embedding_endpoint_type: "openai" | "anthropic" | "bedrock" | 16 more

The endpoint type for the model.

Accepts one of the following:

"openai"

"anthropic"

"bedrock"

"google_ai"

"google_vertex"

"azure"

"groq"

"ollama"

"webui"

"webui-legacy"

"lmstudio"

"lmstudio-legacy"

"llamacpp"

"koboldcpp"

"vllm"

"hugging-face"

"mistral"

"together"

"pinecone"

embedding_model: string

The model for the embedding.

azure_deployment?: string | null

The Azure deployment for the model.

azure_endpoint?: string | null

The Azure endpoint for the model.

azure_version?: string | null

The Azure version for the model.

batch_size?: number

The maximum batch size for processing embeddings.

embedding_chunk_size?: number | null

The chunk size of the embedding.

embedding_endpoint?: string | null

The endpoint for the model (None if local).

handle?: string | null

The handle for this config, in the format provider/model-name.

Deprecatedenable_reasoner?: boolean | null

Deprecated: Use model field to configure reasoning instead. Whether to enable internal extended thinking step for a reasoner model.

enable_sleeptime?: boolean | null

If set to True, memory management will move to a background agent thread.

folder_ids?: Array<string> | null

The ids of the folders used by the agent.

Deprecatedfrom_template?: string | null

Deprecated: please use the 'create agents from a template' endpoint instead.

Deprecatedhidden?: boolean | null

Deprecated: No longer used. If set to True, the agent will be hidden.

identity_ids?: Array<string> | null

The ids of the identities associated with this agent.

include_base_tool_rules?: boolean | null

If true, attaches the Letta base tool rules (e.g. deny all tools not explicitly allowed).

include_base_tools?: boolean

If true, attaches the Letta core tools (e.g. core_memory related functions).

Deprecatedinclude_default_source?: boolean

If true, automatically creates and attaches a default data source for this agent.

initial_message_sequence?: Array<MessageCreate { content, role, batch_item_id, 5 more } > | null

The initial set of messages to put in the agent's in-context memory.

content: Array<LettaMessageContentUnion> | string

The content of the message.

Accepts one of the following:

Array<LettaMessageContentUnion>

TextContent { text, signature, type }

text: string

The text content of the message.

signature?: string | null

Stores a unique identifier for any reasoning associated with this text content.

type?: "text"

The type of the message.

ImageContent { source, type }

source: URLImage { url, type } | Base64Image { data, media_type, detail, type } | LettaImage { file_id, data, detail, 2 more }

The source of the image.

Accepts one of the following:

URLImage { url, type }

url: string

The URL of the image.

type?: "url"

The source type for the image.

Base64Image { data, media_type, detail, type }

data: string

The base64 encoded image data.

media_type: string

The media type for the image.

detail?: string | null

What level of detail to use when processing and understanding the image (low, high, or auto to let the model decide)

type?: "base64"

The source type for the image.

LettaImage { file_id, data, detail, 2 more }

file_id: string

The unique identifier of the image file persisted in storage.

data?: string | null

The base64 encoded image data.

detail?: string | null

What level of detail to use when processing and understanding the image (low, high, or auto to let the model decide)

media_type?: string | null

The media type for the image.

type?: "letta"

The source type for the image.

type?: "image"

The type of the message.

ToolCallContent { id, input, name, 2 more }

id: string

A unique identifier for this specific tool call instance.

input: Record<string, unknown>

The parameters being passed to the tool, structured as a dictionary of parameter names to values.

The name of the tool being called.

signature?: string | null

Stores a unique identifier for any reasoning associated with this tool call.

type?: "tool_call"

Indicates this content represents a tool call event.

ToolReturnContent { content, is_error, tool_call_id, type }

content: string

The content returned by the tool execution.

is_error: boolean

Indicates whether the tool execution resulted in an error.

tool_call_id: string

References the ID of the ToolCallContent that initiated this tool call.

type?: "tool_return"

Indicates this content represents a tool return event.

ReasoningContent { is_native, reasoning, signature, type }

Sent via the Anthropic Messages API

is_native: boolean

Whether the reasoning content was generated by a reasoner model that processed this step.

reasoning: string

The intermediate reasoning or thought process content.

signature?: string | null

A unique identifier for this reasoning step.

type?: "reasoning"

Indicates this is a reasoning/intermediate step.

RedactedReasoningContent { data, type }

Sent via the Anthropic Messages API

data: string

The redacted or filtered intermediate reasoning content.

type?: "redacted_reasoning"

Indicates this is a redacted thinking step.

OmittedReasoningContent { signature, type }

A placeholder for reasoning content we know is present, but isn't returned by the provider (e.g. OpenAI GPT-5 on ChatCompletions)

signature?: string | null

A unique identifier for this reasoning step.

type?: "omitted_reasoning"

Indicates this is an omitted reasoning step.

string

role: "user" | "system" | "assistant"

The role of the participant.

Accepts one of the following:

"user"

"system"

"assistant"

batch_item_id?: string | null

The id of the LLMBatchItem that this message is associated with

group_id?: string | null

The multi-agent group that the message was sent in

name?: string | null

The name of the participant.

otid?: string | null

The offline threading id (OTID). Set by the client to deduplicate requests. Used for idempotency in background streaming mode — each message in a request must have a unique OTID. Retries of the same request should reuse the same OTIDs.

sender_id?: string | null

The id of the sender of the message, can be an identity id or agent id

type?: "message" | null

The message type to be created.

Deprecatedllm_config?: LlmConfig { context_window, model, model_endpoint_type, 24 more } | null

Configuration for Language Model (LLM) connection and generation parameters.

.. deprecated:: LLMConfig is deprecated and should not be used as an input or return type in API calls. Use the schemas in letta.schemas.model (ModelSettings, OpenAIModelSettings, etc.) instead. For conversion, use the _to_model() method or Model._from_llm_config() method.

context_window: number

The context window size for the model.

model: string

LLM model name.

model_endpoint_type: "openai" | "anthropic" | "google_ai" | 27 more

The endpoint type for the model.

Accepts one of the following:

"openai"

"anthropic"

"google_ai"

"google_vertex"

"azure"

"groq"

"ollama"

"webui"

"webui-legacy"

"lmstudio"

"lmstudio-legacy"

"lmstudio-chatcompletions"

"llamacpp"

"koboldcpp"

"vllm"

"hugging-face"

"minimax"

"moonshot"

"moonshot_coding"

"mistral"

"together"

"bedrock"

"deepseek"

"xai"

"zai"

"zai_coding"

"baseten"

"fireworks"

"openrouter"

"chatgpt_oauth"

compatibility_type?: "gguf" | "mlx" | null

The framework compatibility type for the model.

Accepts one of the following:

"gguf"

"mlx"

display_name?: string | null

A human-friendly display name for the model.

effort?: "low" | "medium" | "high" | 2 more | null

The effort level for Anthropic models that support it (Opus 4.5+). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

enable_reasoner?: boolean

Whether or not the model should use extended thinking if it is a 'reasoning' style model

frequency_penalty?: number | null

Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.

handle?: string | null

The handle for this config, in the format provider/model-name.

max_reasoning_tokens?: number

Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.

max_tokens?: number | null

The maximum number of tokens to generate. If not set, the model will use its default value.

model_endpoint?: string | null

The endpoint for the model.

model_wrapper?: string | null

The wrapper for the model.

Deprecatedparallel_tool_calls?: boolean | null

Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.

provider_category?: ProviderCategory | null

The provider category for the model.

Accepts one of the following:

"base"

"byok"

provider_name?: string | null

The provider name for the model.

put_inner_thoughts_in_kwargs?: boolean | null

Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.

reasoning_effort?: "none" | "minimal" | "low" | 3 more | null

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

return_logprobs?: boolean

Whether to return log probabilities of the output tokens. Useful for RL training.

return_token_ids?: boolean

Whether to return token IDs for all LLM generations via SGLang native endpoint. Required for multi-turn RL training with loss masking. Only works with SGLang provider.

strict?: boolean

Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.

temperature?: number

The temperature to use when generating text with the model. A higher temperature will result in more random text.

tier?: string | null

The cost tier for the model (cloud only).

tool_call_parser?: string | null

SGLang tool call parser name (e.g. 'glm47', 'qwen25', 'hermes'). Used by the SGLang native adapter to parse tool calls from raw model output.

top_logprobs?: number | null

Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

max_files_open?: number | null

Maximum number of files that can be open at once for this agent. Setting this too high may exceed the context window, which will break the agent.

Deprecatedmax_reasoning_tokens?: number | null

Deprecated: Use model field to configure reasoning tokens instead. The maximum number of tokens to generate for reasoning step.

Deprecatedmax_tokens?: number | null

Deprecated: Use model field to configure max output tokens instead. The maximum number of tokens to generate, including reasoning step.

memory_blocks?: Array<CreateBlock { label, value, base_template_id, 13 more } > | null

The blocks to create in the agent's in-context memory.

label: string

Label of the block.

value: string

Value of the block.

base_template_id?: string | null

The base template id of the block.

deployment_id?: string | null

The id of the deployment.

description?: string | null

Description of the block.

entity_id?: string | null

The id of the entity within the template.

hidden?: boolean | null

If set to True, the block will be hidden.

is_template?: boolean

limit?: number

Character limit of the block.

metadata?: Record<string, unknown> | null

Metadata of the block.

preserve_on_migration?: boolean | null

Preserve the block on template migration.

project_id?: string | null

The associated project id.

read_only?: boolean

Whether the agent has read-only access to the block.

tags?: Array<string> | null

The tags to associate with the block.

template_id?: string | null

The id of the template.

template_name?: string | null

Name of the block if it is a template.

Deprecatedmemory_variables?: Record<string, string> | null

Deprecated: Only relevant for creating agents from a template. Use the 'create agents from a template' endpoint instead.

message_buffer_autoclear?: boolean

If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.

metadata?: Record<string, unknown> | null

The metadata of the agent.

model?: string | null

The model handle for the agent to use (format: provider/model-name).

The model settings for the agent.

Accepts one of the following:

OpenAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 4 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openai"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

SgLangModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 5 more }

SGLang model configuration (OpenAI-compatible runtime with SGLang-specific parsing).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "sglang"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

tool_call_parser?: string | null

SGLang tool call parser name (for example 'glm47', 'qwen25', or 'hermes').

AnthropicModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "anthropic"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GoogleAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_ai"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

GoogleVertexModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_vertex"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

AzureModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Azure OpenAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "azure"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

XaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

xAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "xai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

MoonshotModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Moonshot/Kimi model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

ZaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Z.ai (ZhipuAI) model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "zai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking?: Thinking { clear_thinking, type }

The thinking configuration for GLM-4.5+ models.

clear_thinking?: boolean

If False, preserved thinking is used (recommended for agents).

type?: "enabled" | "disabled"

Whether thinking is enabled or disabled.

Accepts one of the following:

"enabled"

"disabled"

MoonshotCodingModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

Kimi Code model configuration (Anthropic-compatible).

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot_coding"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GroqModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Groq model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "groq"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

DeepseekModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Deepseek model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "deepseek"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

TogetherModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Together AI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "together"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BedrockModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

AWS Bedrock model configuration.

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "bedrock"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BasetenModelSettings { max_output_tokens, parallel_tool_calls, provider_type, temperature }

Baseten model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "baseten"

The type of the provider.

temperature?: number

The temperature of the model.

OpenRouterModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

OpenRouter model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openrouter"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

ChatGptoAuthModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

ChatGPT OAuth model configuration (uses ChatGPT backend API).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "chatgpt_oauth"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "low" | "medium" | 2 more

The reasoning effort level for GPT-5.x and o-series models.

Accepts one of the following:

"none"

"low"

"medium"

"high"

"xhigh"

temperature?: number

The temperature of the model.

name?: string

The name of the agent.

Deprecatedparallel_tool_calls?: boolean | null

Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling.

per_file_view_window_char_limit?: number | null

The per-file view window character limit for this agent. Setting this too high may exceed the context window, which will break the agent.

Deprecatedproject?: string | null

Deprecated: Project should now be passed via the X-Project header instead of in the request body. If using the SDK, this can be done via the x_project parameter.

Deprecatedproject_id?: string | null

Deprecated: No longer used. The id of the project the agent belongs to.

Deprecatedreasoning?: boolean | null

Deprecated: Use model field to configure reasoning instead. Whether to enable reasoning for this agent.

Deprecatedresponse_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

Deprecated: Use model_settings field to configure response format instead. The response format for the agent.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

secrets?: Record<string, string> | null

The environment variables for tool execution specific to this agent.

Deprecatedsource_ids?: Array<string> | null

Deprecated: Use folder_ids field instead. The ids of the sources used by the agent.

system?: string | null

The system prompt used by the agent.

tags?: Array<string> | null

The tags associated with the agent.

Deprecatedtemplate?: boolean

Deprecated: No longer used.

Deprecatedtemplate_id?: string | null

Deprecated: No longer used. The id of the template the agent belongs to.

timezone?: string | null

The timezone of the agent (IANA format).

Deprecatedtool_exec_environment_variables?: Record<string, string> | null

Deprecated: Use secrets field instead. Environment variables for tool execution.

tool_ids?: Array<string> | null

The ids of the tools used by the agent.

tool_rules?: Array<ChildToolRule { children, tool_name, child_arg_nodes, 2 more } | InitToolRule { tool_name, args, prompt_template, type } | TerminalToolRule { tool_name, prompt_template, type } | 6 more> | null

The tool rules governing the agent.

Accepts one of the following:

ChildToolRule { children, tool_name, child_arg_nodes, 2 more }

A ToolRule represents a tool that can be invoked by the agent.

children: Array<string>

The children tools that can be invoked.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

child_arg_nodes?: Array<ChildArgNode> | null

Optional list of typed child argument overrides. Each node must reference a child in 'children'.

The name of the child tool to invoke next.

args?: Record<string, unknown> | null

Optional prefilled arguments for this child tool. Keys must match the tool's parameter names and values must satisfy the tool's JSON schema. Supports partial prefill; non-overlapping parameters are left to the model.

prompt_template?: string | null

Optional template string (ignored).

type?: "constrain_child_tools"

InitToolRule { tool_name, args, prompt_template, type }

Represents the initial tool rule configuration.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

args?: Record<string, unknown> | null

Optional prefilled arguments for this tool. When present, these values will override any LLM-provided arguments with the same keys during invocation. Keys must match the tool's parameter names and values must satisfy the tool's JSON schema. Supports partial prefill; non-overlapping parameters are left to the model.

prompt_template?: string | null

Optional template string (ignored). Rendering uses fast built-in formatting for performance.

type?: "run_first"

TerminalToolRule { tool_name, prompt_template, type }

Represents a terminal tool rule configuration where if this tool gets called, it must end the agent loop.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "exit_loop"

ConditionalToolRule { child_output_mapping, tool_name, default_child, 3 more }

A ToolRule that conditionally maps to different child tools based on the output.

child_output_mapping: Record<string, string>

The output case to check for mapping

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

default_child?: string | null

The default child tool to be called. If None, any tool can be called.

prompt_template?: string | null

Optional template string (ignored).

require_output_mapping?: boolean

Whether to throw an error when output doesn't match any case

type?: "conditional"

ContinueToolRule { tool_name, prompt_template, type }

Represents a tool rule configuration where if this tool gets called, it must continue the agent loop.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "continue_loop"

RequiredBeforeExitToolRule { tool_name, prompt_template, type }

Represents a tool rule configuration where this tool must be called before the agent loop can exit.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "required_before_exit"

MaxCountPerStepToolRule { max_count_limit, tool_name, prompt_template, type }

Represents a tool rule configuration which constrains the total number of times this tool can be invoked in a single step.

max_count_limit: number

The max limit for the total number of times this tool can be invoked in a single step.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "max_count_per_step"

ParentToolRule { children, tool_name, prompt_template, type }

A ToolRule that only allows a child tool to be called if the parent has been called.

children: Array<string>

The children tools that can be invoked.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "parent_last_tool"

RequiresApprovalToolRule { tool_name, prompt_template, type }

Represents a tool rule configuration which requires approval before the tool can be invoked.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored). Rendering uses fast built-in formatting for performance.

type?: "requires_approval"

tools?: Array<string> | null

The tools used by the agent.

ReturnsExpand Collapse

AgentState { id, agent_type, blocks, 42 more }

Representation of an agent's state. This is the state of the agent at a given time, and is persisted in the DB backend. The state has all the information needed to recreate a persisted agent.

id: string

The id of the agent. Assigned by the database.

agent_type: AgentType

The type of agent.

Accepts one of the following:

"memgpt_agent"

"memgpt_v2_agent"

"letta_v1_agent"

"react_agent"

"workflow_agent"

"split_thread_agent"

"sleeptime_agent"

"voice_convo_agent"

"voice_sleeptime_agent"

blocks: Array<Block { value, id, base_template_id, 16 more } >

The memory blocks used by the agent.

value: string

Value of the block.

id?: string

The human-friendly ID of the Block

base_template_id?: string | null

The base template id of the block.

created_by_id?: string | null

The id of the user that made this Block.

deployment_id?: string | null

The id of the deployment.

description?: string | null

Description of the block.

entity_id?: string | null

The id of the entity within the template.

hidden?: boolean | null

If set to True, the block will be hidden.

is_template?: boolean

Whether the block is a template (e.g. saved human/persona options).

label?: string | null

Label of the block (e.g. 'human', 'persona') in the context window.

last_updated_by_id?: string | null

The id of the user that last updated this Block.

limit?: number

Character limit of the block.

metadata?: Record<string, unknown> | null

Metadata of the block.

preserve_on_migration?: boolean | null

Preserve the block on template migration.

project_id?: string | null

The associated project id.

read_only?: boolean

Whether the agent has read-only access to the block.

tags?: Array<string> | null

The tags associated with the block.

template_id?: string | null

The id of the template.

template_name?: string | null

Name of the block if it is a template.

Deprecatedllm_config: LlmConfig { context_window, model, model_endpoint_type, 24 more }

Deprecated: Use model field instead. The LLM configuration used by the agent.

context_window: number

The context window size for the model.

model: string

LLM model name.

model_endpoint_type: "openai" | "anthropic" | "google_ai" | 27 more

The endpoint type for the model.

Accepts one of the following:

"openai"

"anthropic"

"google_ai"

"google_vertex"

"azure"

"groq"

"ollama"

"webui"

"webui-legacy"

"lmstudio"

"lmstudio-legacy"

"lmstudio-chatcompletions"

"llamacpp"

"koboldcpp"

"vllm"

"hugging-face"

"minimax"

"moonshot"

"moonshot_coding"

"mistral"

"together"

"bedrock"

"deepseek"

"xai"

"zai"

"zai_coding"

"baseten"

"fireworks"

"openrouter"

"chatgpt_oauth"

compatibility_type?: "gguf" | "mlx" | null

The framework compatibility type for the model.

Accepts one of the following:

"gguf"

"mlx"

display_name?: string | null

A human-friendly display name for the model.

effort?: "low" | "medium" | "high" | 2 more | null

The effort level for Anthropic models that support it (Opus 4.5+). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

enable_reasoner?: boolean

Whether or not the model should use extended thinking if it is a 'reasoning' style model

frequency_penalty?: number | null

Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.

handle?: string | null

The handle for this config, in the format provider/model-name.

max_reasoning_tokens?: number

Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.

max_tokens?: number | null

The maximum number of tokens to generate. If not set, the model will use its default value.

model_endpoint?: string | null

The endpoint for the model.

model_wrapper?: string | null

The wrapper for the model.

Deprecatedparallel_tool_calls?: boolean | null

Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.

provider_category?: ProviderCategory | null

The provider category for the model.

Accepts one of the following:

"base"

"byok"

provider_name?: string | null

The provider name for the model.

put_inner_thoughts_in_kwargs?: boolean | null

Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.

reasoning_effort?: "none" | "minimal" | "low" | 3 more | null

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

return_logprobs?: boolean

Whether to return log probabilities of the output tokens. Useful for RL training.

return_token_ids?: boolean

Whether to return token IDs for all LLM generations via SGLang native endpoint. Required for multi-turn RL training with loss masking. Only works with SGLang provider.

strict?: boolean

Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.

temperature?: number

The temperature to use when generating text with the model. A higher temperature will result in more random text.

tier?: string | null

The cost tier for the model (cloud only).

tool_call_parser?: string | null

SGLang tool call parser name (e.g. 'glm47', 'qwen25', 'hermes'). Used by the SGLang native adapter to parse tool calls from raw model output.

top_logprobs?: number | null

Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

Deprecatedmemory: Memory { blocks, agent_type, file_blocks, 2 more }

Deprecated: Use blocks field instead. The in-context memory of the agent.

blocks: Array<Block { value, id, base_template_id, 16 more } >

Memory blocks contained in the agent's in-context memory

value: string

Value of the block.

id?: string

The human-friendly ID of the Block

base_template_id?: string | null

The base template id of the block.

created_by_id?: string | null

The id of the user that made this Block.

deployment_id?: string | null

The id of the deployment.

description?: string | null

Description of the block.

entity_id?: string | null

The id of the entity within the template.

hidden?: boolean | null

If set to True, the block will be hidden.

is_template?: boolean

Whether the block is a template (e.g. saved human/persona options).

label?: string | null

Label of the block (e.g. 'human', 'persona') in the context window.

last_updated_by_id?: string | null

The id of the user that last updated this Block.

limit?: number

Character limit of the block.

metadata?: Record<string, unknown> | null

Metadata of the block.

preserve_on_migration?: boolean | null

Preserve the block on template migration.

project_id?: string | null

The associated project id.

read_only?: boolean

Whether the agent has read-only access to the block.

tags?: Array<string> | null

The tags associated with the block.

template_id?: string | null

The id of the template.

template_name?: string | null

Name of the block if it is a template.

agent_type?: AgentType | (string & {}) | null

Agent type controlling prompt rendering.

Accepts one of the following:

AgentType = "memgpt_agent" | "memgpt_v2_agent" | "letta_v1_agent" | 6 more

Enum to represent the type of agent.

Accepts one of the following:

"memgpt_agent"

"memgpt_v2_agent"

"letta_v1_agent"

"react_agent"

"workflow_agent"

"split_thread_agent"

"sleeptime_agent"

"voice_convo_agent"

"voice_sleeptime_agent"

(string & {})

file_blocks?: Array<FileBlock>

Special blocks representing the agent's in-context memory of an attached file

file_id: string

Unique identifier of the file.

is_open: boolean

True if the agent currently has the file open.

Deprecatedsource_id: string

Deprecated: Use folder_id field instead. Unique identifier of the source.

value: string

Value of the block.

id?: string

The human-friendly ID of the Block

base_template_id?: string | null

The base template id of the block.

created_by_id?: string | null

The id of the user that made this Block.

deployment_id?: string | null

The id of the deployment.

description?: string | null

Description of the block.

entity_id?: string | null

The id of the entity within the template.

hidden?: boolean | null

If set to True, the block will be hidden.

is_template?: boolean

Whether the block is a template (e.g. saved human/persona options).

label?: string | null

Label of the block (e.g. 'human', 'persona') in the context window.

last_accessed_at?: string | null

UTC timestamp of the agent’s most recent access to this file. Any operations from the open, close, or search tools will update this field.

formatdate-time

last_updated_by_id?: string | null

The id of the user that last updated this Block.

limit?: number

Character limit of the block.

metadata?: Record<string, unknown> | null

Metadata of the block.

preserve_on_migration?: boolean | null

Preserve the block on template migration.

project_id?: string | null

The associated project id.

read_only?: boolean

Whether the agent has read-only access to the block.

tags?: Array<string> | null

The tags associated with the block.

template_id?: string | null

The id of the template.

template_name?: string | null

Name of the block if it is a template.

git_enabled?: boolean

Whether this agent uses git-backed memory with structured labels.

prompt_template?: string

Deprecated. Ignored for performance.

The name of the agent.

Deprecatedsources: Array<Source>

Deprecated: Use folders field instead. The sources used by the agent.

id: string

The human-friendly ID of the Source

embedding_config: EmbeddingConfig { embedding_dim, embedding_endpoint_type, embedding_model, 7 more }

The embedding configuration used by the source.

embedding_dim: number

The dimension of the embedding.

embedding_endpoint_type: "openai" | "anthropic" | "bedrock" | 16 more

The endpoint type for the model.

Accepts one of the following:

"openai"

"anthropic"

"bedrock"

"google_ai"

"google_vertex"

"azure"

"groq"

"ollama"

"webui"

"webui-legacy"

"lmstudio"

"lmstudio-legacy"

"llamacpp"

"koboldcpp"

"vllm"

"hugging-face"

"mistral"

"together"

"pinecone"

embedding_model: string

The model for the embedding.

azure_deployment?: string | null

The Azure deployment for the model.

azure_endpoint?: string | null

The Azure endpoint for the model.

azure_version?: string | null

The Azure version for the model.

batch_size?: number

The maximum batch size for processing embeddings.

embedding_chunk_size?: number | null

The chunk size of the embedding.

embedding_endpoint?: string | null

The endpoint for the model (None if local).

handle?: string | null

The handle for this config, in the format provider/model-name.

The name of the source.

created_at?: string | null

The timestamp when the source was created.

formatdate-time

created_by_id?: string | null

The id of the user that made this Tool.

description?: string | null

The description of the source.

instructions?: string | null

Instructions for how to use the source.

last_updated_by_id?: string | null

The id of the user that made this Tool.

metadata?: Record<string, unknown> | null

Metadata associated with the source.

updated_at?: string | null

The timestamp when the source was last updated.

formatdate-time

vector_db_provider?: VectorDBProvider

The vector database provider used for this source's passages

Accepts one of the following:

"native"

"tpuf"

"pinecone"

system: string

The system prompt used by the agent.

tags: Array<string>

The tags associated with the agent.

tools: Array<Tool { id, args_json_schema, created_by_id, 15 more } >

The tools used by the agent.

id: string

The human-friendly ID of the Tool

args_json_schema?: Record<string, unknown> | null

The args JSON schema of the function.

created_by_id?: string | null

The id of the user that made this Tool.

default_requires_approval?: boolean | null

Default value for whether or not executing this tool requires approval.

description?: string | null

The description of the tool.

enable_parallel_execution?: boolean | null

If set to True, then this tool will potentially be executed concurrently with other tools. Default False.

json_schema?: Record<string, unknown> | null

The JSON schema of the function.

last_updated_by_id?: string | null

The id of the user that made this Tool.

metadata_?: Record<string, unknown> | null

A dictionary of additional metadata for the tool.

name?: string | null

The name of the function.

npm_requirements?: Array<NpmRequirement { name, version } > | null

Optional list of npm packages required by this tool.

Name of the npm package.

minLength1

version?: string | null

Optional version of the package, following semantic versioning.

pip_requirements?: Array<PipRequirement { name, version } > | null

Optional list of pip packages required by this tool.

Name of the pip package.

minLength1

version?: string | null

Optional version of the package, following semantic versioning.

project_id?: string | null

The project id of the tool.

return_char_limit?: number

The maximum number of characters in the response.

maximum1000000

minimum1

source_code?: string | null

The source code of the function.

source_type?: string | null

The type of the source code.

tags?: Array<string>

Metadata tags.

tool_type?: ToolType

The type of the tool.

Accepts one of the following:

"custom"

"letta_core"

"letta_memory_core"

"letta_multi_agent_core"

"letta_sleeptime_core"

"letta_voice_sleeptime_core"

"letta_builtin"

"letta_files_core"

"external_langchain"

"external_composio"

"external_mcp"

base_template_id?: string | null

The base template id of the agent.

compaction_settings?: CompactionSettings | null

Configuration for conversation compaction / summarization.

Per-model settings (temperature, max tokens, etc.) are derived from the default configuration for that handle.

clip_chars?: number | null

The maximum length of the summary in characters. If none, no clipping is performed.

mode?: "all" | "sliding_window" | "self_compact_all" | "self_compact_sliding_window"

The type of summarization technique use.

Accepts one of the following:

"all"

"sliding_window"

"self_compact_all"

"self_compact_sliding_window"

model?: string | null

Model handle to use for sliding_window/all summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults.

Optional model settings used to override defaults for the summarizer model.

Accepts one of the following:

OpenAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 4 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openai"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

SgLangModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 5 more }

SGLang model configuration (OpenAI-compatible runtime with SGLang-specific parsing).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "sglang"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

tool_call_parser?: string | null

SGLang tool call parser name (for example 'glm47', 'qwen25', or 'hermes').

AnthropicModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "anthropic"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GoogleAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_ai"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

GoogleVertexModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_vertex"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

AzureModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Azure OpenAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "azure"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

XaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

xAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "xai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

MoonshotModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Moonshot/Kimi model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

ZaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Z.ai (ZhipuAI) model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "zai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking?: Thinking { clear_thinking, type }

The thinking configuration for GLM-4.5+ models.

clear_thinking?: boolean

If False, preserved thinking is used (recommended for agents).

type?: "enabled" | "disabled"

Whether thinking is enabled or disabled.

Accepts one of the following:

"enabled"

"disabled"

MoonshotCodingModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

Kimi Code model configuration (Anthropic-compatible).

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot_coding"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GroqModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Groq model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "groq"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

DeepseekModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Deepseek model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "deepseek"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

TogetherModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Together AI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "together"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BedrockModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

AWS Bedrock model configuration.

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "bedrock"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BasetenModelSettings { max_output_tokens, parallel_tool_calls, provider_type, temperature }

Baseten model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "baseten"

The type of the provider.

temperature?: number

The temperature of the model.

OpenRouterModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

OpenRouter model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openrouter"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

ChatGptoAuthModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

ChatGPT OAuth model configuration (uses ChatGPT backend API).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "chatgpt_oauth"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "low" | "medium" | 2 more

The reasoning effort level for GPT-5.x and o-series models.

Accepts one of the following:

"none"

"low"

"medium"

"high"

"xhigh"

temperature?: number

The temperature of the model.

prompt?: string | null

The prompt to use for summarization. If None, uses mode-specific default.

prompt_acknowledgement?: boolean

Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).

sliding_window_percentage?: number

The percentage of the context window to keep post-summarization (only used in sliding window modes).

created_at?: string | null

The timestamp when the object was created.

formatdate-time

created_by_id?: string | null

The id of the user that made this object.

deployment_id?: string | null

The id of the deployment.

description?: string | null

The description of the agent.

embedding?: string | null

The embedding model handle used by the agent (format: provider/model-name).

Deprecatedembedding_config?: EmbeddingConfig { embedding_dim, embedding_endpoint_type, embedding_model, 7 more } | null

Configuration for embedding model connection and processing parameters.

embedding_dim: number

The dimension of the embedding.

embedding_endpoint_type: "openai" | "anthropic" | "bedrock" | 16 more

The endpoint type for the model.

Accepts one of the following:

"openai"

"anthropic"

"bedrock"

"google_ai"

"google_vertex"

"azure"

"groq"

"ollama"

"webui"

"webui-legacy"

"lmstudio"

"lmstudio-legacy"

"llamacpp"

"koboldcpp"

"vllm"

"hugging-face"

"mistral"

"together"

"pinecone"

embedding_model: string

The model for the embedding.

azure_deployment?: string | null

The Azure deployment for the model.

azure_endpoint?: string | null

The Azure endpoint for the model.

azure_version?: string | null

The Azure version for the model.

batch_size?: number

The maximum batch size for processing embeddings.

embedding_chunk_size?: number | null

The chunk size of the embedding.

embedding_endpoint?: string | null

The endpoint for the model (None if local).

handle?: string | null

The handle for this config, in the format provider/model-name.

enable_sleeptime?: boolean | null

If set to True, memory management will move to a background agent thread.

entity_id?: string | null

The id of the entity within the template.

hidden?: boolean | null

If set to True, the agent will be hidden.

identities?: Array<Identity>

The identities associated with this agent.

id: string

The human-friendly ID of the Identity

Deprecatedagent_ids: Array<string>

The IDs of the agents associated with the identity.

Deprecatedblock_ids: Array<string>

The IDs of the blocks associated with the identity.

identifier_key: string

External, user-generated identifier key of the identity.

identity_type: "org" | "user" | "other"

The type of the identity.

Accepts one of the following:

"org"

"user"

"other"

The name of the identity.

project_id?: string | null

The project id of the identity, if applicable.

properties?: Array<Property>

List of properties associated with the identity

key: string

The key of the property

type: "string" | "number" | "boolean" | "json"

The type of the property

Accepts one of the following:

"string"

"number"

"boolean"

"json"

value: string | number | boolean | Record<string, unknown>

The value of the property

Accepts one of the following:

string

number

boolean

Record<string, unknown>

Deprecatedidentity_ids?: Array<string>

Deprecated: Use identities field instead. The ids of the identities associated with this agent.

last_run_completion?: string | null

The timestamp when the agent last completed a run.

formatdate-time

last_run_duration_ms?: number | null

The duration in milliseconds of the agent's last run.

last_stop_reason?: StopReasonType | null

The stop reason from the agent's last run.

Accepts one of the following:

"end_turn"

"error"

"llm_api_error"

"invalid_llm_response"

"invalid_tool_call"

"max_steps"

"max_tokens_exceeded"

"no_tool_call"

"tool_rule"

"cancelled"

"insufficient_credits"

"requires_approval"

"context_window_overflow_in_system_prompt"

last_updated_by_id?: string | null

The id of the user that made this object.

managed_group?: ManagedGroup | null

The multi-agent group that this agent manages

id: string

The id of the group. Assigned by the database.

agent_ids: Array<string>

description: string

manager_type: "round_robin" | "supervisor" | "dynamic" | 3 more

Accepts one of the following:

"round_robin"

"supervisor"

"dynamic"

"sleeptime"

"voice_sleeptime"

"swarm"

base_template_id?: string | null

The base template id.

deployment_id?: string | null

The id of the deployment.

hidden?: boolean | null

If set to True, the group will be hidden.

last_processed_message_id?: string | null

manager_agent_id?: string | null

max_message_buffer_length?: number | null

The desired maximum length of messages in the context window of the convo agent. This is a best effort, and may be off slightly due to user/assistant interleaving.

max_turns?: number | null

min_message_buffer_length?: number | null

The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving.

project_id?: string | null

The associated project id.

Deprecatedshared_block_ids?: Array<string>

sleeptime_agent_frequency?: number | null

template_id?: string | null

The id of the template.

termination_token?: string | null

turns_counter?: number | null

max_files_open?: number | null

Maximum number of files that can be open at once for this agent. Setting this too high may exceed the context window, which will break the agent.

message_buffer_autoclear?: boolean

message_ids?: Array<string> | null

The ids of the messages in the agent's in-context memory.

metadata?: Record<string, unknown> | null

The metadata of the agent.

model?: string | null

The model handle used by the agent (format: provider/model-name).

The model settings used by the agent.

Accepts one of the following:

OpenAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 4 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openai"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

SgLangModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 5 more }

SGLang model configuration (OpenAI-compatible runtime with SGLang-specific parsing).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "sglang"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "minimal" | "low" | 3 more

The reasoning effort to use when generating text reasoning models

Accepts one of the following:

"none"

"minimal"

"low"

"medium"

"high"

"xhigh"

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

tool_call_parser?: string | null

SGLang tool call parser name (for example 'glm47', 'qwen25', or 'hermes').

AnthropicModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "anthropic"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GoogleAIModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_ai"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

GoogleVertexModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "google_vertex"

The type of the provider.

response_schema?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response schema for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking_config?: ThinkingConfig { include_thoughts, thinking_budget }

The thinking configuration for the model.

include_thoughts?: boolean

Whether to include thoughts in the model's response.

thinking_budget?: number

The thinking budget for the model.

AzureModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Azure OpenAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "azure"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

XaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

xAI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "xai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

MoonshotModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Moonshot/Kimi model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

ZaiModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 3 more }

Z.ai (ZhipuAI) model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "zai"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

thinking?: Thinking { clear_thinking, type }

The thinking configuration for GLM-4.5+ models.

clear_thinking?: boolean

If False, preserved thinking is used (recommended for agents).

type?: "enabled" | "disabled"

Whether thinking is enabled or disabled.

Accepts one of the following:

"enabled"

"disabled"

MoonshotCodingModelSettings { effort, max_output_tokens, parallel_tool_calls, 6 more }

Kimi Code model configuration (Anthropic-compatible).

effort?: "low" | "medium" | "high" | 2 more | null

Effort level for supported Anthropic models (controls token spending). 'xhigh' and 'max' are available on Opus 4.6+. Not setting this gives similar performance to 'high'.

Accepts one of the following:

"low"

"medium"

"high"

"xhigh"

"max"

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "moonshot_coding"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

strict?: boolean

Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.

temperature?: number

The temperature of the model.

thinking?: Thinking { budget_tokens, type }

The thinking configuration for the model.

budget_tokens?: number

The maximum number of tokens the model can use for extended thinking.

type?: "enabled" | "disabled"

The type of thinking to use.

Accepts one of the following:

"enabled"

"disabled"

verbosity?: "low" | "medium" | "high" | null

Soft control for how verbose model output should be, used for GPT-5 models.

Accepts one of the following:

"low"

"medium"

"high"

GroqModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Groq model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "groq"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

DeepseekModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Deepseek model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "deepseek"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

TogetherModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

Together AI model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "together"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BedrockModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

AWS Bedrock model configuration.

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "bedrock"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

BasetenModelSettings { max_output_tokens, parallel_tool_calls, provider_type, temperature }

Baseten model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "baseten"

The type of the provider.

temperature?: number

The temperature of the model.

OpenRouterModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

OpenRouter model configuration (OpenAI-compatible).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "openrouter"

The type of the provider.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format for the model.

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

temperature?: number

The temperature of the model.

ChatGptoAuthModelSettings { max_output_tokens, parallel_tool_calls, provider_type, 2 more }

ChatGPT OAuth model configuration (uses ChatGPT backend API).

max_output_tokens?: number

The maximum number of tokens the model can generate.

parallel_tool_calls?: boolean

Whether to enable parallel tool calling.

provider_type?: "chatgpt_oauth"

The type of the provider.

reasoning?: Reasoning { reasoning_effort }

The reasoning configuration for the model.

reasoning_effort?: "none" | "low" | "medium" | 2 more

The reasoning effort level for GPT-5.x and o-series models.

Accepts one of the following:

"none"

"low"

"medium"

"high"

"xhigh"

temperature?: number

The temperature of the model.

Deprecatedmulti_agent_group?: MultiAgentGroup | null

Deprecated: Use managed_group field instead. The multi-agent group that this agent manages.

id: string

The id of the group. Assigned by the database.

agent_ids: Array<string>

description: string

manager_type: "round_robin" | "supervisor" | "dynamic" | 3 more

Accepts one of the following:

"round_robin"

"supervisor"

"dynamic"

"sleeptime"

"voice_sleeptime"

"swarm"

base_template_id?: string | null

The base template id.

deployment_id?: string | null

The id of the deployment.

hidden?: boolean | null

If set to True, the group will be hidden.

last_processed_message_id?: string | null

manager_agent_id?: string | null

max_message_buffer_length?: number | null

The desired maximum length of messages in the context window of the convo agent. This is a best effort, and may be off slightly due to user/assistant interleaving.

max_turns?: number | null

min_message_buffer_length?: number | null

The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving.

project_id?: string | null

The associated project id.

Deprecatedshared_block_ids?: Array<string>

sleeptime_agent_frequency?: number | null

template_id?: string | null

The id of the template.

termination_token?: string | null

turns_counter?: number | null

pending_approval?: ApprovalRequestMessage { id, date, tool_call, 9 more } | null

A message representing a request for approval to call a tool (generated by the LLM to trigger tool execution).

Args: id (str): The ID of the message date (datetime): The date the message was created in ISO format name (Optional[str]): The name of the sender of the message tool_call (ToolCall): The tool call

id: string

date: string

Deprecatedtool_call: ToolCall { arguments, name, tool_call_id } | ToolCallDelta { arguments, name, tool_call_id }

The tool call that has been requested by the llm to run

Accepts one of the following:

ToolCall { arguments, name, tool_call_id }

arguments: string

tool_call_id: string

ToolCallDelta { arguments, name, tool_call_id }

arguments?: string | null

name?: string | null

tool_call_id?: string | null

is_err?: boolean | null

message_type?: "approval_request_message"

The type of the message.

name?: string | null

otid?: string | null

run_id?: string | null

sender_id?: string | null

seq_id?: number | null

step_id?: string | null

tool_calls?: Array<ToolCall { arguments, name, tool_call_id } > | ToolCallDelta { arguments, name, tool_call_id } | null

The tool calls that have been requested by the llm to run, which are pending approval

Accepts one of the following:

Array<ToolCall { arguments, name, tool_call_id } >

arguments: string

tool_call_id: string

ToolCallDelta { arguments, name, tool_call_id }

arguments?: string | null

name?: string | null

tool_call_id?: string | null

per_file_view_window_char_limit?: number | null

The per-file view window character limit for this agent. Setting this too high may exceed the context window, which will break the agent.

project_id?: string | null

The id of the project the agent belongs to.

response_format?: TextResponseFormat { type } | JsonSchemaResponseFormat { json_schema, type } | JsonObjectResponseFormat { type } | null

The response format used by the agent

Accepts one of the following:

TextResponseFormat { type }

Response format for plain text responses.

type?: "text"

The type of the response format.

JsonSchemaResponseFormat { json_schema, type }

Response format for JSON schema-based responses.

json_schema: Record<string, unknown>

The JSON schema of the response.

type?: "json_schema"

The type of the response format.

JsonObjectResponseFormat { type }

Response format for JSON object responses.

type?: "json_object"

The type of the response format.

secrets?: Array<AgentEnvironmentVariable { agent_id, key, value, 7 more } >

The environment variables for tool execution specific to this agent.

agent_id: string

The ID of the agent this environment variable belongs to.

key: string

The name of the environment variable.

value: string

The value of the environment variable.

id?: string

The human-friendly ID of the Agent-env

created_at?: string | null

The timestamp when the object was created.

formatdate-time

created_by_id?: string | null

The id of the user that made this object.

description?: string | null

An optional description of the environment variable.

last_updated_by_id?: string | null

The id of the user that made this object.

updated_at?: string | null

The timestamp when the object was last updated.

formatdate-time

value_enc?: string | null

Encrypted secret value (stored as encrypted string)

template_id?: string | null

The id of the template the agent belongs to.

timezone?: string | null

The timezone of the agent (IANA format).

Deprecatedtool_exec_environment_variables?: Array<AgentEnvironmentVariable { agent_id, key, value, 7 more } >

Deprecated: use secrets field instead.

agent_id: string

The ID of the agent this environment variable belongs to.

key: string

The name of the environment variable.

value: string

The value of the environment variable.

id?: string

The human-friendly ID of the Agent-env

created_at?: string | null

The timestamp when the object was created.

formatdate-time

created_by_id?: string | null

The id of the user that made this object.

description?: string | null

An optional description of the environment variable.

last_updated_by_id?: string | null

The id of the user that made this object.

updated_at?: string | null

The timestamp when the object was last updated.

formatdate-time

value_enc?: string | null

Encrypted secret value (stored as encrypted string)

The list of tool rules.

Accepts one of the following:

ChildToolRule { children, tool_name, child_arg_nodes, 2 more }

A ToolRule represents a tool that can be invoked by the agent.

children: Array<string>

The children tools that can be invoked.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

child_arg_nodes?: Array<ChildArgNode> | null

Optional list of typed child argument overrides. Each node must reference a child in 'children'.

The name of the child tool to invoke next.

args?: Record<string, unknown> | null

prompt_template?: string | null

Optional template string (ignored).

type?: "constrain_child_tools"

InitToolRule { tool_name, args, prompt_template, type }

Represents the initial tool rule configuration.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

args?: Record<string, unknown> | null

prompt_template?: string | null

Optional template string (ignored). Rendering uses fast built-in formatting for performance.

type?: "run_first"

TerminalToolRule { tool_name, prompt_template, type }

Represents a terminal tool rule configuration where if this tool gets called, it must end the agent loop.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "exit_loop"

ConditionalToolRule { child_output_mapping, tool_name, default_child, 3 more }

A ToolRule that conditionally maps to different child tools based on the output.

child_output_mapping: Record<string, string>

The output case to check for mapping

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

default_child?: string | null

The default child tool to be called. If None, any tool can be called.

prompt_template?: string | null

Optional template string (ignored).

require_output_mapping?: boolean

Whether to throw an error when output doesn't match any case

type?: "conditional"

ContinueToolRule { tool_name, prompt_template, type }

Represents a tool rule configuration where if this tool gets called, it must continue the agent loop.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "continue_loop"

RequiredBeforeExitToolRule { tool_name, prompt_template, type }

Represents a tool rule configuration where this tool must be called before the agent loop can exit.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "required_before_exit"

MaxCountPerStepToolRule { max_count_limit, tool_name, prompt_template, type }

Represents a tool rule configuration which constrains the total number of times this tool can be invoked in a single step.

max_count_limit: number

The max limit for the total number of times this tool can be invoked in a single step.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "max_count_per_step"

ParentToolRule { children, tool_name, prompt_template, type }

A ToolRule that only allows a child tool to be called if the parent has been called.

children: Array<string>

The children tools that can be invoked.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored).

type?: "parent_last_tool"

RequiresApprovalToolRule { tool_name, prompt_template, type }

Represents a tool rule configuration which requires approval before the tool can be invoked.

tool_name: string

The name of the tool. Must exist in the database for the user's organization.

prompt_template?: string | null

Optional template string (ignored). Rendering uses fast built-in formatting for performance.

type?: "requires_approval"

updated_at?: string | null

The timestamp when the object was last updated.

formatdate-time

Create Agent

import Letta from '@letta-ai/letta-client';

const client = new Letta({
  apiKey: process.env['LETTA_API_KEY'], // This is the default and can be omitted
});

const agentState = await client.agents.create();

console.log(agentState.id);

{
  "id": "id",
  "agent_type": "memgpt_agent",
  "blocks": [
    {
      "value": "value",
      "id": "block-123e4567-e89b-12d3-a456-426614174000",
      "base_template_id": "base_template_id",
      "created_by_id": "created_by_id",
      "deployment_id": "deployment_id",
      "description": "description",
      "entity_id": "entity_id",
      "hidden": true,
      "is_template": true,
      "label": "label",
      "last_updated_by_id": "last_updated_by_id",
      "limit": 0,
      "metadata": {
        "foo": "bar"
      },
      "preserve_on_migration": true,
      "project_id": "project_id",
      "read_only": true,
      "tags": [
        "string"
      ],
      "template_id": "template_id",
      "template_name": "template_name"
    }
  ],
  "llm_config": {
    "context_window": 0,
    "model": "model",
    "model_endpoint_type": "openai",
    "compatibility_type": "gguf",
    "display_name": "display_name",
    "effort": "low",
    "enable_reasoner": true,
    "frequency_penalty": 0,
    "handle": "handle",
    "max_reasoning_tokens": 0,
    "max_tokens": 0,
    "model_endpoint": "model_endpoint",
    "model_wrapper": "model_wrapper",
    "parallel_tool_calls": true,
    "provider_category": "base",
    "provider_name": "provider_name",
    "put_inner_thoughts_in_kwargs": true,
    "reasoning_effort": "none",
    "response_format": {
      "type": "text"
    },
    "return_logprobs": true,
    "return_token_ids": true,
    "strict": true,
    "temperature": 0,
    "tier": "tier",
    "tool_call_parser": "tool_call_parser",
    "top_logprobs": 0,
    "verbosity": "low"
  },
  "memory": {
    "blocks": [
      {
        "value": "value",
        "id": "block-123e4567-e89b-12d3-a456-426614174000",
        "base_template_id": "base_template_id",
        "created_by_id": "created_by_id",
        "deployment_id": "deployment_id",
        "description": "description",
        "entity_id": "entity_id",
        "hidden": true,
        "is_template": true,
        "label": "label",
        "last_updated_by_id": "last_updated_by_id",
        "limit": 0,
        "metadata": {
          "foo": "bar"
        },
        "preserve_on_migration": true,
        "project_id": "project_id",
        "read_only": true,
        "tags": [
          "string"
        ],
        "template_id": "template_id",
        "template_name": "template_name"
      }
    ],
    "agent_type": "memgpt_agent",
    "file_blocks": [
      {
        "file_id": "file_id",
        "is_open": true,
        "source_id": "source_id",
        "value": "value",
        "id": "block-123e4567-e89b-12d3-a456-426614174000",
        "base_template_id": "base_template_id",
        "created_by_id": "created_by_id",
        "deployment_id": "deployment_id",
        "description": "description",
        "entity_id": "entity_id",
        "hidden": true,
        "is_template": true,
        "label": "label",
        "last_accessed_at": "2019-12-27T18:11:19.117Z",
        "last_updated_by_id": "last_updated_by_id",
        "limit": 0,
        "metadata": {
          "foo": "bar"
        },
        "preserve_on_migration": true,
        "project_id": "project_id",
        "read_only": true,
        "tags": [
          "string"
        ],
        "template_id": "template_id",
        "template_name": "template_name"
      }
    ],
    "git_enabled": true,
    "prompt_template": "prompt_template"
  },
  "name": "name",
  "sources": [
    {
      "id": "source-123e4567-e89b-12d3-a456-426614174000",
      "embedding_config": {
        "embedding_dim": 0,
        "embedding_endpoint_type": "openai",
        "embedding_model": "embedding_model",
        "azure_deployment": "azure_deployment",
        "azure_endpoint": "azure_endpoint",
        "azure_version": "azure_version",
        "batch_size": 0,
        "embedding_chunk_size": 0,
        "embedding_endpoint": "embedding_endpoint",
        "handle": "handle"
      },
      "name": "name",
      "created_at": "2019-12-27T18:11:19.117Z",
      "created_by_id": "created_by_id",
      "description": "description",
      "instructions": "instructions",
      "last_updated_by_id": "last_updated_by_id",
      "metadata": {
        "foo": "bar"
      },
      "updated_at": "2019-12-27T18:11:19.117Z",
      "vector_db_provider": "native"
    }
  ],
  "system": "system",
  "tags": [
    "string"
  ],
  "tools": [
    {
      "id": "tool-123e4567-e89b-12d3-a456-426614174000",
      "args_json_schema": {
        "foo": "bar"
      },
      "created_by_id": "created_by_id",
      "default_requires_approval": true,
      "description": "description",
      "enable_parallel_execution": true,
      "json_schema": {
        "foo": "bar"
      },
      "last_updated_by_id": "last_updated_by_id",
      "metadata_": {
        "foo": "bar"
      },
      "name": "name",
      "npm_requirements": [
        {
          "name": "x",
          "version": "version"
        }
      ],
      "pip_requirements": [
        {
          "name": "x",
          "version": "version"
        }
      ],
      "project_id": "project_id",
      "return_char_limit": 1,
      "source_code": "source_code",
      "source_type": "source_type",
      "tags": [
        "string"
      ],
      "tool_type": "custom"
    }
  ],
  "base_template_id": "base_template_id",
  "compaction_settings": {
    "clip_chars": 0,
    "mode": "all",
    "model": "model",
    "model_settings": {
      "max_output_tokens": 0,
      "parallel_tool_calls": true,
      "provider_type": "openai",
      "reasoning": {
        "reasoning_effort": "none"
      },
      "response_format": {
        "type": "text"
      },
      "strict": true,
      "temperature": 0
    },
    "prompt": "prompt",
    "prompt_acknowledgement": true,
    "sliding_window_percentage": 0
  },
  "created_at": "2019-12-27T18:11:19.117Z",
  "created_by_id": "created_by_id",
  "deployment_id": "deployment_id",
  "description": "description",
  "embedding": "embedding",
  "embedding_config": {
    "embedding_dim": 0,
    "embedding_endpoint_type": "openai",
    "embedding_model": "embedding_model",
    "azure_deployment": "azure_deployment",
    "azure_endpoint": "azure_endpoint",
    "azure_version": "azure_version",
    "batch_size": 0,
    "embedding_chunk_size": 0,
    "embedding_endpoint": "embedding_endpoint",
    "handle": "handle"
  },
  "enable_sleeptime": true,
  "entity_id": "entity_id",
  "hidden": true,
  "identities": [
    {
      "id": "identity-123e4567-e89b-12d3-a456-426614174000",
      "agent_ids": [
        "string"
      ],
      "block_ids": [
        "string"
      ],
      "identifier_key": "identifier_key",
      "identity_type": "org",
      "name": "name",
      "project_id": "project_id",
      "properties": [
        {
          "key": "key",
          "type": "string",
          "value": "string"
        }
      ]
    }
  ],
  "identity_ids": [
    "string"
  ],
  "last_run_completion": "2019-12-27T18:11:19.117Z",
  "last_run_duration_ms": 0,
  "last_stop_reason": "end_turn",
  "last_updated_by_id": "last_updated_by_id",
  "managed_group": {
    "id": "id",
    "agent_ids": [
      "string"
    ],
    "description": "description",
    "manager_type": "round_robin",
    "base_template_id": "base_template_id",
    "deployment_id": "deployment_id",
    "hidden": true,
    "last_processed_message_id": "last_processed_message_id",
    "manager_agent_id": "manager_agent_id",
    "max_message_buffer_length": 0,
    "max_turns": 0,
    "min_message_buffer_length": 0,
    "project_id": "project_id",
    "shared_block_ids": [
      "string"
    ],
    "sleeptime_agent_frequency": 0,
    "template_id": "template_id",
    "termination_token": "termination_token",
    "turns_counter": 0
  },
  "max_files_open": 0,
  "message_buffer_autoclear": true,
  "message_ids": [
    "string"
  ],
  "metadata": {
    "foo": "bar"
  },
  "model": "model",
  "model_settings": {
    "max_output_tokens": 0,
    "parallel_tool_calls": true,
    "provider_type": "openai",
    "reasoning": {
      "reasoning_effort": "none"
    },
    "response_format": {
      "type": "text"
    },
    "strict": true,
    "temperature": 0
  },
  "multi_agent_group": {
    "id": "id",
    "agent_ids": [
      "string"
    ],
    "description": "description",
    "manager_type": "round_robin",
    "base_template_id": "base_template_id",
    "deployment_id": "deployment_id",
    "hidden": true,
    "last_processed_message_id": "last_processed_message_id",
    "manager_agent_id": "manager_agent_id",
    "max_message_buffer_length": 0,
    "max_turns": 0,
    "min_message_buffer_length": 0,
    "project_id": "project_id",
    "shared_block_ids": [
      "string"
    ],
    "sleeptime_agent_frequency": 0,
    "template_id": "template_id",
    "termination_token": "termination_token",
    "turns_counter": 0
  },
  "pending_approval": {
    "id": "id",
    "date": "2019-12-27T18:11:19.117Z",
    "tool_call": {
      "arguments": "arguments",
      "name": "name",
      "tool_call_id": "tool_call_id"
    },
    "is_err": true,
    "message_type": "approval_request_message",
    "name": "name",
    "otid": "otid",
    "run_id": "run_id",
    "sender_id": "sender_id",
    "seq_id": 0,
    "step_id": "step_id",
    "tool_calls": [
      {
        "arguments": "arguments",
        "name": "name",
        "tool_call_id": "tool_call_id"
      }
    ]
  },
  "per_file_view_window_char_limit": 0,
  "project_id": "project_id",
  "response_format": {
    "type": "text"
  },
  "secrets": [
    {
      "agent_id": "agent_id",
      "key": "key",
      "value": "value",
      "id": "agent-env-123e4567-e89b-12d3-a456-426614174000",
      "created_at": "2019-12-27T18:11:19.117Z",
      "created_by_id": "created_by_id",
      "description": "description",
      "last_updated_by_id": "last_updated_by_id",
      "updated_at": "2019-12-27T18:11:19.117Z",
      "value_enc": "value_enc"
    }
  ],
  "template_id": "template_id",
  "timezone": "timezone",
  "tool_exec_environment_variables": [
    {
      "agent_id": "agent_id",
      "key": "key",
      "value": "value",
      "id": "agent-env-123e4567-e89b-12d3-a456-426614174000",
      "created_at": "2019-12-27T18:11:19.117Z",
      "created_by_id": "created_by_id",
      "description": "description",
      "last_updated_by_id": "last_updated_by_id",
      "updated_at": "2019-12-27T18:11:19.117Z",
      "value_enc": "value_enc"
    }
  ],
  "tool_rules": [
    {
      "children": [
        "string"
      ],
      "tool_name": "tool_name",
      "child_arg_nodes": [
        {
          "name": "name",
          "args": {
            "foo": "bar"
          }
        }
      ],
      "prompt_template": "prompt_template",
      "type": "constrain_child_tools"
    }
  ],
  "updated_at": "2019-12-27T18:11:19.117Z"
}

Returns Examples

{
  "id": "id",
  "agent_type": "memgpt_agent",
  "blocks": [
    {
      "value": "value",
      "id": "block-123e4567-e89b-12d3-a456-426614174000",
      "base_template_id": "base_template_id",
      "created_by_id": "created_by_id",
      "deployment_id": "deployment_id",
      "description": "description",
      "entity_id": "entity_id",
      "hidden": true,
      "is_template": true,
      "label": "label",
      "last_updated_by_id": "last_updated_by_id",
      "limit": 0,
      "metadata": {
        "foo": "bar"
      },
      "preserve_on_migration": true,
      "project_id": "project_id",
      "read_only": true,
      "tags": [
        "string"
      ],
      "template_id": "template_id",
      "template_name": "template_name"
    }
  ],
  "llm_config": {
    "context_window": 0,
    "model": "model",
    "model_endpoint_type": "openai",
    "compatibility_type": "gguf",
    "display_name": "display_name",
    "effort": "low",
    "enable_reasoner": true,
    "frequency_penalty": 0,
    "handle": "handle",
    "max_reasoning_tokens": 0,
    "max_tokens": 0,
    "model_endpoint": "model_endpoint",
    "model_wrapper": "model_wrapper",
    "parallel_tool_calls": true,
    "provider_category": "base",
    "provider_name": "provider_name",
    "put_inner_thoughts_in_kwargs": true,
    "reasoning_effort": "none",
    "response_format": {
      "type": "text"
    },
    "return_logprobs": true,
    "return_token_ids": true,
    "strict": true,
    "temperature": 0,
    "tier": "tier",
    "tool_call_parser": "tool_call_parser",
    "top_logprobs": 0,
    "verbosity": "low"
  },
  "memory": {
    "blocks": [
      {
        "value": "value",
        "id": "block-123e4567-e89b-12d3-a456-426614174000",
        "base_template_id": "base_template_id",
        "created_by_id": "created_by_id",
        "deployment_id": "deployment_id",
        "description": "description",
        "entity_id": "entity_id",
        "hidden": true,
        "is_template": true,
        "label": "label",
        "last_updated_by_id": "last_updated_by_id",
        "limit": 0,
        "metadata": {
          "foo": "bar"
        },
        "preserve_on_migration": true,
        "project_id": "project_id",
        "read_only": true,
        "tags": [
          "string"
        ],
        "template_id": "template_id",
        "template_name": "template_name"
      }
    ],
    "agent_type": "memgpt_agent",
    "file_blocks": [
      {
        "file_id": "file_id",
        "is_open": true,
        "source_id": "source_id",
        "value": "value",
        "id": "block-123e4567-e89b-12d3-a456-426614174000",
        "base_template_id": "base_template_id",
        "created_by_id": "created_by_id",
        "deployment_id": "deployment_id",
        "description": "description",
        "entity_id": "entity_id",
        "hidden": true,
        "is_template": true,
        "label": "label",
        "last_accessed_at": "2019-12-27T18:11:19.117Z",
        "last_updated_by_id": "last_updated_by_id",
        "limit": 0,
        "metadata": {
          "foo": "bar"
        },
        "preserve_on_migration": true,
        "project_id": "project_id",
        "read_only": true,
        "tags": [
          "string"
        ],
        "template_id": "template_id",
        "template_name": "template_name"
      }
    ],
    "git_enabled": true,
    "prompt_template": "prompt_template"
  },
  "name": "name",
  "sources": [
    {
      "id": "source-123e4567-e89b-12d3-a456-426614174000",
      "embedding_config": {
        "embedding_dim": 0,
        "embedding_endpoint_type": "openai",
        "embedding_model": "embedding_model",
        "azure_deployment": "azure_deployment",
        "azure_endpoint": "azure_endpoint",
        "azure_version": "azure_version",
        "batch_size": 0,
        "embedding_chunk_size": 0,
        "embedding_endpoint": "embedding_endpoint",
        "handle": "handle"
      },
      "name": "name",
      "created_at": "2019-12-27T18:11:19.117Z",
      "created_by_id": "created_by_id",
      "description": "description",
      "instructions": "instructions",
      "last_updated_by_id": "last_updated_by_id",
      "metadata": {
        "foo": "bar"
      },
      "updated_at": "2019-12-27T18:11:19.117Z",
      "vector_db_provider": "native"
    }
  ],
  "system": "system",
  "tags": [
    "string"
  ],
  "tools": [
    {
      "id": "tool-123e4567-e89b-12d3-a456-426614174000",
      "args_json_schema": {
        "foo": "bar"
      },
      "created_by_id": "created_by_id",
      "default_requires_approval": true,
      "description": "description",
      "enable_parallel_execution": true,
      "json_schema": {
        "foo": "bar"
      },
      "last_updated_by_id": "last_updated_by_id",
      "metadata_": {
        "foo": "bar"
      },
      "name": "name",
      "npm_requirements": [
        {
          "name": "x",
          "version": "version"
        }
      ],
      "pip_requirements": [
        {
          "name": "x",
          "version": "version"
        }
      ],
      "project_id": "project_id",
      "return_char_limit": 1,
      "source_code": "source_code",
      "source_type": "source_type",
      "tags": [
        "string"
      ],
      "tool_type": "custom"
    }
  ],
  "base_template_id": "base_template_id",
  "compaction_settings": {
    "clip_chars": 0,
    "mode": "all",
    "model": "model",
    "model_settings": {
      "max_output_tokens": 0,
      "parallel_tool_calls": true,
      "provider_type": "openai",
      "reasoning": {
        "reasoning_effort": "none"
      },
      "response_format": {
        "type": "text"
      },
      "strict": true,
      "temperature": 0
    },
    "prompt": "prompt",
    "prompt_acknowledgement": true,
    "sliding_window_percentage": 0
  },
  "created_at": "2019-12-27T18:11:19.117Z",
  "created_by_id": "created_by_id",
  "deployment_id": "deployment_id",
  "description": "description",
  "embedding": "embedding",
  "embedding_config": {
    "embedding_dim": 0,
    "embedding_endpoint_type": "openai",
    "embedding_model": "embedding_model",
    "azure_deployment": "azure_deployment",
    "azure_endpoint": "azure_endpoint",
    "azure_version": "azure_version",
    "batch_size": 0,
    "embedding_chunk_size": 0,
    "embedding_endpoint": "embedding_endpoint",
    "handle": "handle"
  },
  "enable_sleeptime": true,
  "entity_id": "entity_id",
  "hidden": true,
  "identities": [
    {
      "id": "identity-123e4567-e89b-12d3-a456-426614174000",
      "agent_ids": [
        "string"
      ],
      "block_ids": [
        "string"
      ],
      "identifier_key": "identifier_key",
      "identity_type": "org",
      "name": "name",
      "project_id": "project_id",
      "properties": [
        {
          "key": "key",
          "type": "string",
          "value": "string"
        }
      ]
    }
  ],
  "identity_ids": [
    "string"
  ],
  "last_run_completion": "2019-12-27T18:11:19.117Z",
  "last_run_duration_ms": 0,
  "last_stop_reason": "end_turn",
  "last_updated_by_id": "last_updated_by_id",
  "managed_group": {
    "id": "id",
    "agent_ids": [
      "string"
    ],
    "description": "description",
    "manager_type": "round_robin",
    "base_template_id": "base_template_id",
    "deployment_id": "deployment_id",
    "hidden": true,
    "last_processed_message_id": "last_processed_message_id",
    "manager_agent_id": "manager_agent_id",
    "max_message_buffer_length": 0,
    "max_turns": 0,
    "min_message_buffer_length": 0,
    "project_id": "project_id",
    "shared_block_ids": [
      "string"
    ],
    "sleeptime_agent_frequency": 0,
    "template_id": "template_id",
    "termination_token": "termination_token",
    "turns_counter": 0
  },
  "max_files_open": 0,
  "message_buffer_autoclear": true,
  "message_ids": [
    "string"
  ],
  "metadata": {
    "foo": "bar"
  },
  "model": "model",
  "model_settings": {
    "max_output_tokens": 0,
    "parallel_tool_calls": true,
    "provider_type": "openai",
    "reasoning": {
      "reasoning_effort": "none"
    },
    "response_format": {
      "type": "text"
    },
    "strict": true,
    "temperature": 0
  },
  "multi_agent_group": {
    "id": "id",
    "agent_ids": [
      "string"
    ],
    "description": "description",
    "manager_type": "round_robin",
    "base_template_id": "base_template_id",
    "deployment_id": "deployment_id",
    "hidden": true,
    "last_processed_message_id": "last_processed_message_id",
    "manager_agent_id": "manager_agent_id",
    "max_message_buffer_length": 0,
    "max_turns": 0,
    "min_message_buffer_length": 0,
    "project_id": "project_id",
    "shared_block_ids": [
      "string"
    ],
    "sleeptime_agent_frequency": 0,
    "template_id": "template_id",
    "termination_token": "termination_token",
    "turns_counter": 0
  },
  "pending_approval": {
    "id": "id",
    "date": "2019-12-27T18:11:19.117Z",
    "tool_call": {
      "arguments": "arguments",
      "name": "name",
      "tool_call_id": "tool_call_id"
    },
    "is_err": true,
    "message_type": "approval_request_message",
    "name": "name",
    "otid": "otid",
    "run_id": "run_id",
    "sender_id": "sender_id",
    "seq_id": 0,
    "step_id": "step_id",
    "tool_calls": [
      {
        "arguments": "arguments",
        "name": "name",
        "tool_call_id": "tool_call_id"
      }
    ]
  },
  "per_file_view_window_char_limit": 0,
  "project_id": "project_id",
  "response_format": {
    "type": "text"
  },
  "secrets": [
    {
      "agent_id": "agent_id",
      "key": "key",
      "value": "value",
      "id": "agent-env-123e4567-e89b-12d3-a456-426614174000",
      "created_at": "2019-12-27T18:11:19.117Z",
      "created_by_id": "created_by_id",
      "description": "description",
      "last_updated_by_id": "last_updated_by_id",
      "updated_at": "2019-12-27T18:11:19.117Z",
      "value_enc": "value_enc"
    }
  ],
  "template_id": "template_id",
  "timezone": "timezone",
  "tool_exec_environment_variables": [
    {
      "agent_id": "agent_id",
      "key": "key",
      "value": "value",
      "id": "agent-env-123e4567-e89b-12d3-a456-426614174000",
      "created_at": "2019-12-27T18:11:19.117Z",
      "created_by_id": "created_by_id",
      "description": "description",
      "last_updated_by_id": "last_updated_by_id",
      "updated_at": "2019-12-27T18:11:19.117Z",
      "value_enc": "value_enc"
    }
  ],
  "tool_rules": [
    {
      "children": [
        "string"
      ],
      "tool_name": "tool_name",
      "child_arg_nodes": [
        {
          "name": "name",
          "args": {
            "foo": "bar"
          }
        }
      ],
      "prompt_template": "prompt_template",
      "type": "constrain_child_tools"
    }
  ],
  "updated_at": "2019-12-27T18:11:19.117Z"
}