From 841dd88c386cc76d346784509e05e01a9c8b344d Mon Sep 17 00:00:00 2001
From: fern-api <115122769+fern-api[bot]@users.noreply.github.com>
Date: Wed, 18 Sep 2024 13:03:09 +0000
Subject: [PATCH] SDK regeneration

---
 poetry.lock                                   |  233 +-
 pyproject.toml                                |    2 +-
 reference.md                                  | 3646 +++-----
 src/cohere/__init__.py                        |  504 +-
 src/cohere/base_client.py                     | 6116 +------------
 src/cohere/connectors/__init__.py             |    2 -
 src/cohere/core/client_wrapper.py             |    2 +-
 src/cohere/datasets/__init__.py               |   17 -
 src/cohere/datasets/client.py                 | 1846 ----
 src/cohere/datasets/types/__init__.py         |   15 -
 ...sets_create_response_dataset_parts_item.py |   37 -
 src/cohere/embed_jobs/__init__.py             |    5 -
 src/cohere/embed_jobs/client.py               | 1421 ---
 src/cohere/embed_jobs/types/__init__.py       |    5 -
 .../create_embed_job_request_truncate.py      |    5 -
 src/cohere/finetuning/client.py               | 1873 ----
 src/cohere/models/__init__.py                 |    2 -
 src/cohere/models/client.py                   |  729 --
 src/cohere/types/__init__.py                  |  276 +-
 src/cohere/types/chat_data_metrics.py         |   32 -
 src/cohere/types/chat_stream_event.py         |   17 -
 src/cohere/types/classify_data_metrics.py     |   20 -
 .../types/components_schemas_text_content.py  |    5 +
 src/cohere/types/dataset.py                   |   69 -
 src/cohere/types/dataset_part.py              |   57 -
 src/cohere/types/dataset_type.py              |   17 -
 src/cohere/types/dataset_validation_status.py |    7 -
 src/cohere/types/embed_job.py                 |   63 -
 src/cohere/types/embed_job_status.py          |    5 -
 src/cohere/types/embed_response.py            |   51 -
 src/cohere/types/finetune_dataset_metrics.py  |   47 -
 src/cohere/types/generate_stream_event.py     |   17 -
 .../types/generate_streamed_response.py       |   82 -
 src/cohere/types/label_metric.py              |   32 -
 src/cohere/types/list_connectors_response.py  |   24 -
 src/cohere/types/list_embed_job_response.py   |   20 -
 src/cohere/types/message.py                   |   75 -
 src/cohere/types/metrics.py                   |   22 -
 src/cohere/types/metrics_embed_data.py        |   23 -
 .../types/metrics_embed_data_fields_item.py   |   27 -
 src/cohere/types/parse_info.py                |   20 -
 src/cohere/types/reranker_data_metrics.py     |   47 -
 src/cohere/types/response_format.py           |   66 -
 src/cohere/types/streamed_chat_response.py    |  179 -
 src/cohere/v1/__init__.py                     |  236 +
 src/cohere/{connectors => v1}/client.py       | 1160 ++-
 src/cohere/{ => v1}/errors/__init__.py        |    0
 .../{ => v1}/errors/bad_request_error.py      |    2 +-
 .../errors/client_closed_request_error.py     |    2 +-
 src/cohere/{ => v1}/errors/forbidden_error.py |    2 +-
 .../{ => v1}/errors/gateway_timeout_error.py  |    2 +-
 .../{ => v1}/errors/internal_server_error.py  |    2 +-
 src/cohere/{ => v1}/errors/not_found_error.py |    2 +-
 .../{ => v1}/errors/not_implemented_error.py  |    2 +-
 .../errors/service_unavailable_error.py       |    2 +-
 .../errors/too_many_requests_error.py         |    2 +-
 .../{ => v1}/errors/unauthorized_error.py     |    2 +-
 .../errors/unprocessable_entity_error.py      |    2 +-
 src/cohere/{ => v1}/finetuning/__init__.py    |    0
 .../finetuning/finetuning/__init__.py         |    0
 .../finetuning/finetuning/types/__init__.py   |    0
 .../finetuning/finetuning/types/base_model.py |    4 +-
 .../finetuning/finetuning/types/base_type.py  |    0
 .../types/create_finetuned_model_response.py  |    4 +-
 .../types/delete_finetuned_model_response.py  |    0
 .../finetuning/finetuning/types/event.py      |    4 +-
 .../finetuning/types/finetuned_model.py       |    4 +-
 .../types/get_finetuned_model_response.py     |    4 +-
 .../finetuning/types/hyperparameters.py       |    4 +-
 .../finetuning/types/list_events_response.py  |    4 +-
 .../types/list_finetuned_models_response.py   |    4 +-
 .../list_training_step_metrics_response.py    |    4 +-
 .../finetuning/finetuning/types/settings.py   |    4 +-
 .../finetuning/finetuning/types/status.py     |    0
 .../finetuning/finetuning/types/strategy.py   |    0
 .../finetuning/types/training_step_metrics.py |    4 +-
 .../types/update_finetuned_model_response.py  |    4 +-
 .../finetuning/types/wandb_config.py          |    4 +-
 src/cohere/v1/types/__init__.py               |  145 +
 src/cohere/{ => v1}/types/api_meta.py         |    4 +-
 .../{ => v1}/types/api_meta_api_version.py    |    4 +-
 .../{ => v1}/types/api_meta_billed_units.py   |    4 +-
 src/cohere/{ => v1}/types/api_meta_tokens.py  |    4 +-
 src/cohere/{ => v1}/types/auth_token_type.py  |    0
 src/cohere/{ => v1}/types/chat_citation.py    |    4 +-
 .../types/chat_citation_generation_event.py   |    2 +-
 src/cohere/{ => v1}/types/chat_connector.py   |    4 +-
 src/cohere/{ => v1}/types/chat_message.py     |    7 +-
 .../chat_search_queries_generation_event.py   |    2 +-
 .../{ => v1}/types/chat_search_query.py       |    4 +-
 .../{ => v1}/types/chat_search_result.py      |    4 +-
 .../types/chat_search_result_connector.py     |    4 +-
 .../types/chat_search_results_event.py        |    4 +-
 .../{ => v1}/types/chat_stream_end_event.py   |    2 +-
 .../chat_stream_end_event_finish_reason.py    |    0
 .../types/chat_stream_event.py}               |   10 +-
 .../v1/types/chat_stream_event_event_type.py  |   15 +
 .../{ => v1}/types/chat_stream_start_event.py |    2 +-
 .../types/chat_text_generation_event.py       |    2 +-
 .../types/chat_tool_calls_chunk_event.py      |    2 +-
 .../types/chat_tool_calls_generation_event.py |    2 +-
 src/cohere/{ => v1}/types/classify_example.py |    4 +-
 .../client_closed_request_error_body.py}      |    7 +-
 .../{ => v1}/types/compatible_endpoint.py     |    0
 src/cohere/{ => v1}/types/connector.py        |    4 +-
 .../{ => v1}/types/connector_auth_status.py   |    0
 src/cohere/{ => v1}/types/connector_o_auth.py |    4 +-
 .../{ => v1}/types/create_connector_o_auth.py |    4 +-
 .../types/create_connector_response.py        |    4 +-
 .../types/create_connector_service_auth.py    |    4 +-
 .../types/datasets_get_usage_response.py      |    0
 .../types/delete_connector_response.py        |    0
 .../{ => v1}/types/embed_by_type_response.py  |    7 +-
 .../embed_by_type_response_embeddings.py      |    6 +-
 .../{ => v1}/types/embed_floats_response.py   |    5 +-
 src/cohere/{ => v1}/types/embed_input_type.py |    0
 src/cohere/{ => v1}/types/embedding_type.py   |    0
 src/cohere/{ => v1}/types/finish_reason.py    |    0
 .../types/gateway_timeout_error_body.py}      |    8 +-
 .../{ => v1}/types/generate_stream_end.py     |    2 +-
 .../types/generate_stream_end_response.py     |    4 +-
 .../{ => v1}/types/generate_stream_error.py   |    2 +-
 .../types/generate_stream_event.py}           |    8 +-
 .../types/generate_stream_event_event_type.py |    5 +
 .../{ => v1}/types/generate_stream_text.py    |    2 +-
 .../v1/types/generate_streamed_response.py    |    8 +
 src/cohere/{ => v1}/types/generation.py       |    4 +-
 .../{ => v1}/types/get_model_response.py      |    4 +-
 .../{ => v1}/types/json_response_format.py    |    9 +-
 .../{ => v1}/types/list_models_response.py    |    4 +-
 src/cohere/v1/types/message.py                |    7 +
 .../types/non_streamed_chat_response.py       |    6 +-
 .../types/not_implemented_error_body.py       |    4 +-
 .../types/o_auth_authorize_response.py        |    4 +-
 src/cohere/v1/types/response_format.py        |    7 +
 .../{ => v1}/types/single_generation.py       |    4 +-
 .../types/single_generation_in_stream.py      |    4 +-
 ...ingle_generation_token_likelihoods_item.py |    4 +-
 src/cohere/v1/types/streamed_chat_response.py |   22 +
 .../types/text_response_format.py}            |    5 +-
 .../types/too_many_requests_error_body.py     |    4 +-
 src/cohere/{ => v1}/types/tool.py             |    4 +-
 src/cohere/{ => v1}/types/tool_call.py        |    4 +-
 src/cohere/{ => v1}/types/tool_call_delta.py  |    4 +-
 src/cohere/{ => v1}/types/tool_message.py     |    5 +-
 .../types/tool_parameter_definitions_value.py |    4 +-
 src/cohere/{ => v1}/types/tool_result.py      |    4 +-
 .../types/unprocessable_entity_error_body.py  |    4 +-
 .../types/update_connector_response.py        |    4 +-
 src/cohere/v1/v1/__init__.py                  |   67 +
 src/cohere/v1/v1/client.py                    | 7870 +++++++++++++++++
 src/cohere/v1/v1/types/__init__.py            |   67 +
 .../types/chat_request_citation_quality.py    |    0
 .../chat_request_connectors_search_options.py |    4 +-
 .../types/chat_request_prompt_truncation.py   |    0
 .../v1}/types/chat_request_safety_mode.py     |    0
 .../chat_stream_request_citation_quality.py   |    0
 ...tream_request_connectors_search_options.py |    4 +-
 .../chat_stream_request_prompt_truncation.py  |    0
 .../types/chat_stream_request_safety_mode.py  |    0
 .../v1}/types/check_api_key_response.py       |    4 +-
 .../v1}/types/classify_request_truncate.py    |    0
 .../{ => v1/v1}/types/classify_response.py    |    6 +-
 .../classify_response_classifications_item.py |    4 +-
 ...lassifications_item_classification_type.py |    0
 ...ponse_classifications_item_labels_value.py |    4 +-
 .../{ => v1/v1}/types/detokenize_response.py  |    6 +-
 .../v1}/types/embed_request_truncate.py       |    0
 src/cohere/v1/v1/types/embed_response.py      |    7 +
 .../generate_request_return_likelihoods.py    |    0
 .../v1}/types/generate_request_truncate.py    |    0
 ...erate_stream_request_return_likelihoods.py |    0
 .../types/generate_stream_request_truncate.py |    0
 .../types/rerank_request_documents_item.py    |    2 +-
 .../{ => v1/v1}/types/rerank_response.py      |    6 +-
 .../v1}/types/rerank_response_results_item.py |    4 +-
 .../rerank_response_results_item_document.py  |    4 +-
 .../types/summarize_request_extractiveness.py |    0
 .../v1}/types/summarize_request_format.py     |    0
 .../v1}/types/summarize_request_length.py     |    0
 .../{ => v1/v1}/types/summarize_response.py   |    6 +-
 .../{ => v1/v1}/types/tokenize_response.py    |    6 +-
 src/cohere/v2/__init__.py                     |  174 +-
 src/cohere/v2/client.py                       |  361 +-
 src/cohere/v2/errors/__init__.py              |   27 +
 src/cohere/v2/errors/bad_request_error.py     |    9 +
 .../v2/errors/client_closed_request_error.py  |    9 +
 src/cohere/v2/errors/forbidden_error.py       |    9 +
 src/cohere/v2/errors/gateway_timeout_error.py |    9 +
 src/cohere/v2/errors/internal_server_error.py |    9 +
 src/cohere/v2/errors/not_found_error.py       |    9 +
 src/cohere/v2/errors/not_implemented_error.py |    9 +
 .../v2/errors/service_unavailable_error.py    |    9 +
 .../v2/errors/too_many_requests_error.py      |    9 +
 src/cohere/v2/errors/unauthorized_error.py    |    9 +
 .../v2/errors/unprocessable_entity_error.py   |    9 +
 src/cohere/v2/types/__init__.py               |  199 +-
 .../v2/types/assistant_message_content.py     |    6 -
 .../types/assistant_message_content_item.py   |   28 -
 ...assistant_message_response_content_item.py |   28 -
 src/cohere/v2/types/bad_request_error_body.py |   19 +
 src/cohere/v2/types/chat_message2.py          |   98 -
 src/cohere/v2/types/chat_messages.py          |    6 -
 .../types/client_closed_request_error_body.py |    4 +-
 src/cohere/v2/types/content.py                |   30 -
 src/cohere/v2/types/forbidden_error_body.py   |   19 +
 .../types/gateway_timeout_error_body.py       |    4 +-
 .../v2/types/internal_server_error_body.py    |   19 +
 ...nse_format2.py => not_found_error_body.py} |    6 +-
 .../v2/types/not_implemented_error_body.py    |   19 +
 src/cohere/v2/types/response_format2.py       |   63 -
 .../types/service_unavailable_error_body.py   |   19 +
 src/cohere/v2/types/source.py                 |   50 -
 .../v2/types/streamed_chat_response2.py       |  240 -
 src/cohere/v2/types/system_message_content.py |    6 -
 .../v2/types/system_message_content_item.py   |   28 -
 .../v2/types/too_many_requests_error_body.py  |   19 +
 .../v2/types/unauthorized_error_body.py       |   19 +
 .../types/unprocessable_entity_error_body.py  |   19 +
 src/cohere/v2/types/user_message_content.py   |    6 -
 .../v2/types/v2chat_request_citation_mode.py  |    5 -
 .../v2/types/v2chat_request_documents_item.py |    6 +
 .../v2chat_stream_request_citation_mode.py    |    5 -
 .../v2chat_stream_request_documents_item.py   |    6 +
 src/cohere/v2/v2/__init__.py                  |  137 +
 src/cohere/v2/v2/types/__init__.py            |  135 +
 .../v2/{ => v2}/types/assistant_message.py    |    9 +-
 .../v2/v2/types/assistant_message_content.py  |    6 +
 .../types/assistant_message_response.py       |   12 +-
 .../types/chat_content_delta_event.py         |    2 +-
 .../types/chat_content_delta_event_delta.py   |    4 +-
 .../chat_content_delta_event_delta_message.py |    4 +-
 ...ntent_delta_event_delta_message_content.py |    4 +-
 .../{ => v2}/types/chat_content_end_event.py  |    2 +-
 .../types/chat_content_start_event.py         |    2 +-
 .../types/chat_content_start_event_delta.py   |    4 +-
 .../chat_content_start_event_delta_message.py |    4 +-
 ...ntent_start_event_delta_message_content.py |    4 +-
 .../v2/{ => v2}/types/chat_finish_reason.py   |    0
 src/cohere/v2/v2/types/chat_message.py        |    9 +
 .../{ => v2}/types/chat_message_end_event.py  |    2 +-
 .../types/chat_message_end_event_delta.py     |    4 +-
 .../types/chat_message_start_event.py         |    2 +-
 .../types/chat_message_start_event_delta.py   |    4 +-
 .../chat_message_start_event_delta_message.py |    4 +-
 .../v2/types/chat_messages.py}                |    3 +-
 .../v2/types/chat_stream_event_type.py}       |   13 +-
 .../v2/types/chat_stream_event_type_type.py   |   20 +
 .../types/chat_tool_call_delta_event.py       |    2 +-
 .../types/chat_tool_call_delta_event_delta.py |    4 +-
 ...t_tool_call_delta_event_delta_tool_call.py |    4 +-
 ...ll_delta_event_delta_tool_call_function.py |    4 +-
 .../types/chat_tool_call_end_event.py         |    2 +-
 .../types/chat_tool_call_start_event.py       |    2 +-
 .../types/chat_tool_call_start_event_delta.py |    4 +-
 ...t_tool_call_start_event_delta_tool_call.py |    4 +-
 ...ll_start_event_delta_tool_call_function.py |    4 +-
 .../types/chat_tool_plan_delta_event.py       |    2 +-
 .../types/chat_tool_plan_delta_event_delta.py |    4 +-
 src/cohere/v2/{ => v2}/types/citation.py      |    4 +-
 .../v2/{ => v2}/types/citation_end_event.py   |    2 +-
 src/cohere/v2/v2/types/citation_options.py    |   28 +
 .../v2/v2/types/citation_options_mode.py      |    5 +
 .../v2/{ => v2}/types/citation_start_event.py |    2 +-
 .../types/citation_start_event_delta.py       |    4 +-
 .../citation_start_event_delta_message.py     |    4 +-
 src/cohere/v2/v2/types/document.py            |   33 +
 src/cohere/v2/v2/types/document_content.py    |   25 +
 .../v2/{ => v2}/types/document_source.py      |    4 +-
 .../types/json_response_format.py}            |    7 +-
 .../types/non_streamed_chat_response.py}      |    6 +-
 src/cohere/v2/v2/types/response_format.py     |    7 +
 src/cohere/v2/v2/types/source.py              |    7 +
 .../v2/v2/types/streamed_chat_response.py     |   28 +
 .../v2/{ => v2}/types/system_message.py       |    7 +-
 .../v2/v2/types/system_message_content.py     |    6 +
 src/cohere/v2/{ => v2}/types/text_content.py  |    5 +-
 .../{ => v2/v2}/types/text_response_format.py |    6 +-
 .../v2/{types/tool2.py => v2/types/tool.py}   |   10 +-
 .../tool_call2.py => v2/types/tool_call.py}   |   10 +-
 .../v2/types/tool_call_function.py}           |   10 +-
 src/cohere/v2/v2/types/tool_content.py        |    7 +
 .../types/tool_function.py}                   |    6 +-
 .../types/tool_message.py}                    |   14 +-
 .../v2/v2/types/tool_message_tool_content.py  |    6 +
 src/cohere/v2/{ => v2}/types/tool_source.py   |    4 +-
 src/cohere/v2/{ => v2}/types/usage.py         |    4 +-
 .../v2/{ => v2}/types/usage_billed_units.py   |    4 +-
 src/cohere/v2/{ => v2}/types/usage_tokens.py  |    4 +-
 src/cohere/v2/{ => v2}/types/user_message.py  |   13 +-
 .../v2/v2/types/user_message_content.py       |    6 +
 291 files changed, 12356 insertions(+), 17740 deletions(-)
 delete mode 100644 src/cohere/connectors/__init__.py
 delete mode 100644 src/cohere/datasets/__init__.py
 delete mode 100644 src/cohere/datasets/client.py
 delete mode 100644 src/cohere/datasets/types/__init__.py
 delete mode 100644 src/cohere/datasets/types/datasets_create_response_dataset_parts_item.py
 delete mode 100644 src/cohere/embed_jobs/__init__.py
 delete mode 100644 src/cohere/embed_jobs/client.py
 delete mode 100644 src/cohere/embed_jobs/types/__init__.py
 delete mode 100644 src/cohere/embed_jobs/types/create_embed_job_request_truncate.py
 delete mode 100644 src/cohere/finetuning/client.py
 delete mode 100644 src/cohere/models/__init__.py
 delete mode 100644 src/cohere/models/client.py
 delete mode 100644 src/cohere/types/chat_data_metrics.py
 delete mode 100644 src/cohere/types/chat_stream_event.py
 delete mode 100644 src/cohere/types/classify_data_metrics.py
 create mode 100644 src/cohere/types/components_schemas_text_content.py
 delete mode 100644 src/cohere/types/dataset.py
 delete mode 100644 src/cohere/types/dataset_part.py
 delete mode 100644 src/cohere/types/dataset_type.py
 delete mode 100644 src/cohere/types/dataset_validation_status.py
 delete mode 100644 src/cohere/types/embed_job.py
 delete mode 100644 src/cohere/types/embed_job_status.py
 delete mode 100644 src/cohere/types/embed_response.py
 delete mode 100644 src/cohere/types/finetune_dataset_metrics.py
 delete mode 100644 src/cohere/types/generate_stream_event.py
 delete mode 100644 src/cohere/types/generate_streamed_response.py
 delete mode 100644 src/cohere/types/label_metric.py
 delete mode 100644 src/cohere/types/list_connectors_response.py
 delete mode 100644 src/cohere/types/list_embed_job_response.py
 delete mode 100644 src/cohere/types/message.py
 delete mode 100644 src/cohere/types/metrics.py
 delete mode 100644 src/cohere/types/metrics_embed_data.py
 delete mode 100644 src/cohere/types/metrics_embed_data_fields_item.py
 delete mode 100644 src/cohere/types/parse_info.py
 delete mode 100644 src/cohere/types/reranker_data_metrics.py
 delete mode 100644 src/cohere/types/response_format.py
 delete mode 100644 src/cohere/types/streamed_chat_response.py
 create mode 100644 src/cohere/v1/__init__.py
 rename src/cohere/{connectors => v1}/client.py (73%)
 rename src/cohere/{ => v1}/errors/__init__.py (100%)
 rename src/cohere/{ => v1}/errors/bad_request_error.py (85%)
 rename src/cohere/{ => v1}/errors/client_closed_request_error.py (88%)
 rename src/cohere/{ => v1}/errors/forbidden_error.py (85%)
 rename src/cohere/{ => v1}/errors/gateway_timeout_error.py (87%)
 rename src/cohere/{ => v1}/errors/internal_server_error.py (85%)
 rename src/cohere/{ => v1}/errors/not_found_error.py (85%)
 rename src/cohere/{ => v1}/errors/not_implemented_error.py (87%)
 rename src/cohere/{ => v1}/errors/service_unavailable_error.py (85%)
 rename src/cohere/{ => v1}/errors/too_many_requests_error.py (88%)
 rename src/cohere/{ => v1}/errors/unauthorized_error.py (85%)
 rename src/cohere/{ => v1}/errors/unprocessable_entity_error.py (88%)
 rename src/cohere/{ => v1}/finetuning/__init__.py (100%)
 rename src/cohere/{ => v1}/finetuning/finetuning/__init__.py (100%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/__init__.py (100%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/base_model.py (89%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/base_type.py (100%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/create_finetuned_model_response.py (85%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/delete_finetuned_model_response.py (100%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/event.py (88%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/finetuned_model.py (94%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/get_finetuned_model_response.py (85%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/hyperparameters.py (91%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/list_events_response.py (89%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/list_finetuned_models_response.py (89%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/list_training_step_metrics_response.py (89%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/settings.py (91%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/status.py (100%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/strategy.py (100%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/training_step_metrics.py (88%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/update_finetuned_model_response.py (85%)
 rename src/cohere/{ => v1}/finetuning/finetuning/types/wandb_config.py (87%)
 create mode 100644 src/cohere/v1/types/__init__.py
 rename src/cohere/{ => v1}/types/api_meta.py (87%)
 rename src/cohere/{ => v1}/types/api_meta_api_version.py (82%)
 rename src/cohere/{ => v1}/types/api_meta_billed_units.py (89%)
 rename src/cohere/{ => v1}/types/api_meta_tokens.py (86%)
 rename src/cohere/{ => v1}/types/auth_token_type.py (100%)
 rename src/cohere/{ => v1}/types/chat_citation.py (92%)
 rename src/cohere/{ => v1}/types/chat_citation_generation_event.py (92%)
 rename src/cohere/{ => v1}/types/chat_connector.py (92%)
 rename src/cohere/{ => v1}/types/chat_message.py (87%)
 rename src/cohere/{ => v1}/types/chat_search_queries_generation_event.py (92%)
 rename src/cohere/{ => v1}/types/chat_search_query.py (87%)
 rename src/cohere/{ => v1}/types/chat_search_result.py (91%)
 rename src/cohere/{ => v1}/types/chat_search_result_connector.py (83%)
 rename src/cohere/{ => v1}/types/chat_search_results_event.py (89%)
 rename src/cohere/{ => v1}/types/chat_stream_end_event.py (96%)
 rename src/cohere/{ => v1}/types/chat_stream_end_event_finish_reason.py (100%)
 rename src/cohere/{datasets/types/datasets_create_response.py => v1/types/chat_stream_event.py} (76%)
 create mode 100644 src/cohere/v1/types/chat_stream_event_event_type.py
 rename src/cohere/{ => v1}/types/chat_stream_start_event.py (91%)
 rename src/cohere/{ => v1}/types/chat_text_generation_event.py (91%)
 rename src/cohere/{ => v1}/types/chat_tool_calls_chunk_event.py (91%)
 rename src/cohere/{ => v1}/types/chat_tool_calls_generation_event.py (92%)
 rename src/cohere/{ => v1}/types/classify_example.py (81%)
 rename src/cohere/{datasets/types/datasets_get_response.py => v1/types/client_closed_request_error_body.py} (82%)
 rename src/cohere/{ => v1}/types/compatible_endpoint.py (100%)
 rename src/cohere/{ => v1}/types/connector.py (96%)
 rename src/cohere/{ => v1}/types/connector_auth_status.py (100%)
 rename src/cohere/{ => v1}/types/connector_o_auth.py (91%)
 rename src/cohere/{ => v1}/types/create_connector_o_auth.py (91%)
 rename src/cohere/{ => v1}/types/create_connector_response.py (81%)
 rename src/cohere/{ => v1}/types/create_connector_service_auth.py (86%)
 rename src/cohere/{datasets => v1}/types/datasets_get_usage_response.py (100%)
 rename src/cohere/{ => v1}/types/delete_connector_response.py (100%)
 rename src/cohere/{ => v1}/types/embed_by_type_response.py (83%)
 rename src/cohere/{ => v1}/types/embed_by_type_response_embeddings.py (91%)
 rename src/cohere/{ => v1}/types/embed_floats_response.py (82%)
 rename src/cohere/{ => v1}/types/embed_input_type.py (100%)
 rename src/cohere/{ => v1}/types/embedding_type.py (100%)
 rename src/cohere/{ => v1}/types/finish_reason.py (100%)
 rename src/cohere/{v2/types/chat_stream_event_type.py => v1/types/gateway_timeout_error_body.py} (84%)
 rename src/cohere/{ => v1}/types/generate_stream_end.py (93%)
 rename src/cohere/{ => v1}/types/generate_stream_end_response.py (84%)
 rename src/cohere/{ => v1}/types/generate_stream_error.py (93%)
 rename src/cohere/{datasets/types/datasets_list_response.py => v1/types/generate_stream_event.py} (75%)
 create mode 100644 src/cohere/v1/types/generate_stream_event_event_type.py
 rename src/cohere/{ => v1}/types/generate_stream_text.py (93%)
 create mode 100644 src/cohere/v1/types/generate_streamed_response.py
 rename src/cohere/{ => v1}/types/generation.py (87%)
 rename src/cohere/{ => v1}/types/get_model_response.py (93%)
 rename src/cohere/{ => v1}/types/json_response_format.py (84%)
 rename src/cohere/{ => v1}/types/list_models_response.py (86%)
 create mode 100644 src/cohere/v1/types/message.py
 rename src/cohere/{ => v1}/types/non_streamed_chat_response.py (93%)
 rename src/cohere/{ => v1}/types/not_implemented_error_body.py (80%)
 rename src/cohere/{ => v1}/types/o_auth_authorize_response.py (84%)
 create mode 100644 src/cohere/v1/types/response_format.py
 rename src/cohere/{ => v1}/types/single_generation.py (92%)
 rename src/cohere/{ => v1}/types/single_generation_in_stream.py (87%)
 rename src/cohere/{ => v1}/types/single_generation_token_likelihoods_item.py (81%)
 create mode 100644 src/cohere/v1/types/streamed_chat_response.py
 rename src/cohere/{v2/types/tool_call2function.py => v1/types/text_response_format.py} (80%)
 rename src/cohere/{ => v1}/types/too_many_requests_error_body.py (80%)
 rename src/cohere/{ => v1}/types/tool.py (92%)
 rename src/cohere/{ => v1}/types/tool_call.py (87%)
 rename src/cohere/{ => v1}/types/tool_call_delta.py (89%)
 rename src/cohere/{ => v1}/types/tool_message.py (79%)
 rename src/cohere/{ => v1}/types/tool_parameter_definitions_value.py (88%)
 rename src/cohere/{ => v1}/types/tool_result.py (82%)
 rename src/cohere/{ => v1}/types/unprocessable_entity_error_body.py (80%)
 rename src/cohere/{ => v1}/types/update_connector_response.py (81%)
 create mode 100644 src/cohere/v1/v1/__init__.py
 create mode 100644 src/cohere/v1/v1/client.py
 create mode 100644 src/cohere/v1/v1/types/__init__.py
 rename src/cohere/{ => v1/v1}/types/chat_request_citation_quality.py (100%)
 rename src/cohere/{ => v1/v1}/types/chat_request_connectors_search_options.py (90%)
 rename src/cohere/{ => v1/v1}/types/chat_request_prompt_truncation.py (100%)
 rename src/cohere/{ => v1/v1}/types/chat_request_safety_mode.py (100%)
 rename src/cohere/{ => v1/v1}/types/chat_stream_request_citation_quality.py (100%)
 rename src/cohere/{ => v1/v1}/types/chat_stream_request_connectors_search_options.py (90%)
 rename src/cohere/{ => v1/v1}/types/chat_stream_request_prompt_truncation.py (100%)
 rename src/cohere/{ => v1/v1}/types/chat_stream_request_safety_mode.py (100%)
 rename src/cohere/{ => v1/v1}/types/check_api_key_response.py (82%)
 rename src/cohere/{ => v1/v1}/types/classify_request_truncate.py (100%)
 rename src/cohere/{ => v1/v1}/types/classify_response.py (80%)
 rename src/cohere/{ => v1/v1}/types/classify_response_classifications_item.py (94%)
 rename src/cohere/{ => v1/v1}/types/classify_response_classifications_item_classification_type.py (100%)
 rename src/cohere/{ => v1/v1}/types/classify_response_classifications_item_labels_value.py (81%)
 rename src/cohere/{ => v1/v1}/types/detokenize_response.py (78%)
 rename src/cohere/{ => v1/v1}/types/embed_request_truncate.py (100%)
 create mode 100644 src/cohere/v1/v1/types/embed_response.py
 rename src/cohere/{ => v1/v1}/types/generate_request_return_likelihoods.py (100%)
 rename src/cohere/{ => v1/v1}/types/generate_request_truncate.py (100%)
 rename src/cohere/{ => v1/v1}/types/generate_stream_request_return_likelihoods.py (100%)
 rename src/cohere/{ => v1/v1}/types/generate_stream_request_truncate.py (100%)
 rename src/cohere/{ => v1/v1}/types/rerank_request_documents_item.py (72%)
 rename src/cohere/{ => v1/v1}/types/rerank_response.py (82%)
 rename src/cohere/{ => v1/v1}/types/rerank_response_results_item.py (92%)
 rename src/cohere/{ => v1/v1}/types/rerank_response_results_item_document.py (85%)
 rename src/cohere/{ => v1/v1}/types/summarize_request_extractiveness.py (100%)
 rename src/cohere/{ => v1/v1}/types/summarize_request_format.py (100%)
 rename src/cohere/{ => v1/v1}/types/summarize_request_length.py (100%)
 rename src/cohere/{ => v1/v1}/types/summarize_response.py (82%)
 rename src/cohere/{ => v1/v1}/types/tokenize_response.py (80%)
 create mode 100644 src/cohere/v2/errors/__init__.py
 create mode 100644 src/cohere/v2/errors/bad_request_error.py
 create mode 100644 src/cohere/v2/errors/client_closed_request_error.py
 create mode 100644 src/cohere/v2/errors/forbidden_error.py
 create mode 100644 src/cohere/v2/errors/gateway_timeout_error.py
 create mode 100644 src/cohere/v2/errors/internal_server_error.py
 create mode 100644 src/cohere/v2/errors/not_found_error.py
 create mode 100644 src/cohere/v2/errors/not_implemented_error.py
 create mode 100644 src/cohere/v2/errors/service_unavailable_error.py
 create mode 100644 src/cohere/v2/errors/too_many_requests_error.py
 create mode 100644 src/cohere/v2/errors/unauthorized_error.py
 create mode 100644 src/cohere/v2/errors/unprocessable_entity_error.py
 delete mode 100644 src/cohere/v2/types/assistant_message_content.py
 delete mode 100644 src/cohere/v2/types/assistant_message_content_item.py
 delete mode 100644 src/cohere/v2/types/assistant_message_response_content_item.py
 create mode 100644 src/cohere/v2/types/bad_request_error_body.py
 delete mode 100644 src/cohere/v2/types/chat_message2.py
 delete mode 100644 src/cohere/v2/types/chat_messages.py
 rename src/cohere/{ => v2}/types/client_closed_request_error_body.py (80%)
 delete mode 100644 src/cohere/v2/types/content.py
 create mode 100644 src/cohere/v2/types/forbidden_error_body.py
 rename src/cohere/{ => v2}/types/gateway_timeout_error_body.py (80%)
 create mode 100644 src/cohere/v2/types/internal_server_error_body.py
 rename src/cohere/v2/types/{text_response_format2.py => not_found_error_body.py} (85%)
 create mode 100644 src/cohere/v2/types/not_implemented_error_body.py
 delete mode 100644 src/cohere/v2/types/response_format2.py
 create mode 100644 src/cohere/v2/types/service_unavailable_error_body.py
 delete mode 100644 src/cohere/v2/types/source.py
 delete mode 100644 src/cohere/v2/types/streamed_chat_response2.py
 delete mode 100644 src/cohere/v2/types/system_message_content.py
 delete mode 100644 src/cohere/v2/types/system_message_content_item.py
 create mode 100644 src/cohere/v2/types/too_many_requests_error_body.py
 create mode 100644 src/cohere/v2/types/unauthorized_error_body.py
 create mode 100644 src/cohere/v2/types/unprocessable_entity_error_body.py
 delete mode 100644 src/cohere/v2/types/user_message_content.py
 delete mode 100644 src/cohere/v2/types/v2chat_request_citation_mode.py
 create mode 100644 src/cohere/v2/types/v2chat_request_documents_item.py
 delete mode 100644 src/cohere/v2/types/v2chat_stream_request_citation_mode.py
 create mode 100644 src/cohere/v2/types/v2chat_stream_request_documents_item.py
 create mode 100644 src/cohere/v2/v2/__init__.py
 create mode 100644 src/cohere/v2/v2/types/__init__.py
 rename src/cohere/v2/{ => v2}/types/assistant_message.py (75%)
 create mode 100644 src/cohere/v2/v2/types/assistant_message_content.py
 rename src/cohere/v2/{ => v2}/types/assistant_message_response.py (65%)
 rename src/cohere/v2/{ => v2}/types/chat_content_delta_event.py (93%)
 rename src/cohere/v2/{ => v2}/types/chat_content_delta_event_delta.py (83%)
 rename src/cohere/v2/{ => v2}/types/chat_content_delta_event_delta_message.py (84%)
 rename src/cohere/v2/{ => v2}/types/chat_content_delta_event_delta_message_content.py (81%)
 rename src/cohere/v2/{ => v2}/types/chat_content_end_event.py (91%)
 rename src/cohere/v2/{ => v2}/types/chat_content_start_event.py (93%)
 rename src/cohere/v2/{ => v2}/types/chat_content_start_event_delta.py (83%)
 rename src/cohere/v2/{ => v2}/types/chat_content_start_event_delta_message.py (84%)
 rename src/cohere/v2/{ => v2}/types/chat_content_start_event_delta_message_content.py (82%)
 rename src/cohere/v2/{ => v2}/types/chat_finish_reason.py (100%)
 create mode 100644 src/cohere/v2/v2/types/chat_message.py
 rename src/cohere/v2/{ => v2}/types/chat_message_end_event.py (93%)
 rename src/cohere/v2/{ => v2}/types/chat_message_end_event_delta.py (84%)
 rename src/cohere/v2/{ => v2}/types/chat_message_start_event.py (93%)
 rename src/cohere/v2/{ => v2}/types/chat_message_start_event_delta.py (83%)
 rename src/cohere/v2/{ => v2}/types/chat_message_start_event_delta_message.py (83%)
 rename src/cohere/{types/embed_job_truncate.py => v2/v2/types/chat_messages.py} (50%)
 rename src/cohere/{types/create_embed_job_response.py => v2/v2/types/chat_stream_event_type.py} (58%)
 create mode 100644 src/cohere/v2/v2/types/chat_stream_event_type_type.py
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_delta_event.py (93%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_delta_event_delta.py (84%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_delta_event_delta_tool_call.py (84%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_delta_event_delta_tool_call_function.py (81%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_end_event.py (91%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_start_event.py (93%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_start_event_delta.py (84%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_start_event_delta_tool_call.py (86%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_call_start_event_delta_tool_call_function.py (82%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_plan_delta_event.py (92%)
 rename src/cohere/v2/{ => v2}/types/chat_tool_plan_delta_event_delta.py (80%)
 rename src/cohere/v2/{ => v2}/types/citation.py (85%)
 rename src/cohere/v2/{ => v2}/types/citation_end_event.py (91%)
 create mode 100644 src/cohere/v2/v2/types/citation_options.py
 create mode 100644 src/cohere/v2/v2/types/citation_options_mode.py
 rename src/cohere/v2/{ => v2}/types/citation_start_event.py (93%)
 rename src/cohere/v2/{ => v2}/types/citation_start_event_delta.py (83%)
 rename src/cohere/v2/{ => v2}/types/citation_start_event_delta_message.py (81%)
 create mode 100644 src/cohere/v2/v2/types/document.py
 create mode 100644 src/cohere/v2/v2/types/document_content.py
 rename src/cohere/v2/{ => v2}/types/document_source.py (86%)
 rename src/cohere/v2/{types/json_response_format2.py => v2/types/json_response_format.py} (82%)
 rename src/cohere/v2/{types/non_streamed_chat_response2.py => v2/types/non_streamed_chat_response.py} (85%)
 create mode 100644 src/cohere/v2/v2/types/response_format.py
 create mode 100644 src/cohere/v2/v2/types/source.py
 create mode 100644 src/cohere/v2/v2/types/streamed_chat_response.py
 rename src/cohere/v2/{ => v2}/types/system_message.py (77%)
 create mode 100644 src/cohere/v2/v2/types/system_message_content.py
 rename src/cohere/v2/{ => v2}/types/text_content.py (75%)
 rename src/cohere/{ => v2/v2}/types/text_response_format.py (73%)
 rename src/cohere/v2/{types/tool2.py => v2/types/tool.py} (65%)
 rename src/cohere/v2/{types/tool_call2.py => v2/types/tool_call.py} (67%)
 rename src/cohere/{types/get_connector_response.py => v2/v2/types/tool_call_function.py} (61%)
 create mode 100644 src/cohere/v2/v2/types/tool_content.py
 rename src/cohere/v2/{types/tool2function.py => v2/types/tool_function.py} (84%)
 rename src/cohere/v2/{types/tool_message2.py => v2/types/tool_message.py} (56%)
 create mode 100644 src/cohere/v2/v2/types/tool_message_tool_content.py
 rename src/cohere/v2/{ => v2}/types/tool_source.py (84%)
 rename src/cohere/v2/{ => v2}/types/usage.py (84%)
 rename src/cohere/v2/{ => v2}/types/usage_billed_units.py (89%)
 rename src/cohere/v2/{ => v2}/types/usage_tokens.py (85%)
 rename src/cohere/v2/{ => v2}/types/user_message.py (69%)
 create mode 100644 src/cohere/v2/v2/types/user_message_content.py

diff --git a/poetry.lock b/poetry.lock
index 500cafff0..cb6a4db62 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -38,17 +38,17 @@ trio = ["trio (>=0.23)"]
 
 [[package]]
 name = "boto3"
-version = "1.35.18"
+version = "1.35.21"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.35.18-py3-none-any.whl", hash = "sha256:71e237d3997cf93425947854d7b121c577944f391ba633afb0659e1015364704"},
-    {file = "boto3-1.35.18.tar.gz", hash = "sha256:fd130308f1f49d748a5fc63de92de79a995b51c79af3947ddde8815fcf0684fe"},
+    {file = "boto3-1.35.21-py3-none-any.whl", hash = "sha256:247f88eedce9ae4e014a8fc14a9473759bb8e391460d49396a3b600fb649f33b"},
+    {file = "boto3-1.35.21.tar.gz", hash = "sha256:db5fbbd10248db060f2ccce3ae17764f1641c99c8b9f51d422c26ebe25703a1e"},
 ]
 
 [package.dependencies]
-botocore = ">=1.35.18,<1.36.0"
+botocore = ">=1.35.21,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -57,13 +57,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.35.18"
+version = "1.35.21"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.18-py3-none-any.whl", hash = "sha256:1027083aeb1fe74057273410fd768e018e22f85adfbd717b5a69f578f7812b80"},
-    {file = "botocore-1.35.18.tar.gz", hash = "sha256:e59da8b91ab06683d2725b6cbbb0383b30c68a241c3c63363f4c5bff59b3c0c0"},
+    {file = "botocore-1.35.21-py3-none-any.whl", hash = "sha256:3db9ddfe521edc0753fc8c68caef71c7806e1d2d21ce8cbabc2065b7d79192f2"},
+    {file = "botocore-1.35.21.tar.gz", hash = "sha256:db917e7d7b3a2eed1310c6496784bc813c91f020a021c2ab5f9df7d28cdb4f1d"},
 ]
 
 [package.dependencies]
@@ -260,18 +260,18 @@ zstandard = ["zstandard"]
 
 [[package]]
 name = "filelock"
-version = "3.16.0"
+version = "3.16.1"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "filelock-3.16.0-py3-none-any.whl", hash = "sha256:f6ed4c963184f4c84dd5557ce8fece759a3724b37b80c6c4f20a2f63a4dc6609"},
-    {file = "filelock-3.16.0.tar.gz", hash = "sha256:81de9eb8453c769b63369f87f11131a7ab04e367f8d97ad39dc230daa07e3bec"},
+    {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
+    {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
 ]
 
 [package.extras]
-docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.1.1)", "pytest (>=8.3.2)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.3)"]
+docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"]
 typing = ["typing-extensions (>=4.12.2)"]
 
 [[package]]
@@ -383,13 +383,13 @@ files = [
 
 [[package]]
 name = "huggingface-hub"
-version = "0.24.7"
+version = "0.25.0"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.24.7-py3-none-any.whl", hash = "sha256:a212c555324c8a7b1ffdd07266bb7e7d69ca71aa238d27b7842d65e9a26ac3e5"},
-    {file = "huggingface_hub-0.24.7.tar.gz", hash = "sha256:0ad8fb756e2831da0ac0491175b960f341fe06ebcf80ed6f8728313f95fc0207"},
+    {file = "huggingface_hub-0.25.0-py3-none-any.whl", hash = "sha256:e2f357b35d72d5012cfd127108c4e14abcd61ba4ebc90a5a374dc2456cb34e12"},
+    {file = "huggingface_hub-0.25.0.tar.gz", hash = "sha256:fb5fbe6c12fcd99d187ec7db95db9110fb1a20505f23040a5449a717c1a0db4d"},
 ]
 
 [package.dependencies]
@@ -417,15 +417,18 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t
 
 [[package]]
 name = "idna"
-version = "3.8"
+version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"},
-    {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"},
+    {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
+    {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
 ]
 
+[package.extras]
+all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+
 [[package]]
 name = "iniconfig"
 version = "2.0.0"
@@ -547,18 +550,18 @@ testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "pydantic"
-version = "2.9.1"
+version = "2.9.2"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.9.1-py3-none-any.whl", hash = "sha256:7aff4db5fdf3cf573d4b3c30926a510a10e19a0774d38fc4967f78beb6deb612"},
-    {file = "pydantic-2.9.1.tar.gz", hash = "sha256:1363c7d975c7036df0db2b4a61f2e062fbc0aa5ab5f2772e0ffc7191a4f4bce2"},
+    {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"},
+    {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"},
 ]
 
 [package.dependencies]
 annotated-types = ">=0.6.0"
-pydantic-core = "2.23.3"
+pydantic-core = "2.23.4"
 typing-extensions = [
     {version = ">=4.6.1", markers = "python_version < \"3.13\""},
     {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
@@ -570,100 +573,100 @@ timezone = ["tzdata"]
 
 [[package]]
 name = "pydantic-core"
-version = "2.23.3"
+version = "2.23.4"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.23.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7f10a5d1b9281392f1bf507d16ac720e78285dfd635b05737c3911637601bae6"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c09a7885dd33ee8c65266e5aa7fb7e2f23d49d8043f089989726391dd7350c5"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6470b5a1ec4d1c2e9afe928c6cb37eb33381cab99292a708b8cb9aa89e62429b"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9172d2088e27d9a185ea0a6c8cebe227a9139fd90295221d7d495944d2367700"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86fc6c762ca7ac8fbbdff80d61b2c59fb6b7d144aa46e2d54d9e1b7b0e780e01"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0cb80fd5c2df4898693aa841425ea1727b1b6d2167448253077d2a49003e0ed"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03667cec5daf43ac4995cefa8aaf58f99de036204a37b889c24a80927b629cec"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:047531242f8e9c2db733599f1c612925de095e93c9cc0e599e96cf536aaf56ba"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5499798317fff7f25dbef9347f4451b91ac2a4330c6669821c8202fd354c7bee"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bbb5e45eab7624440516ee3722a3044b83fff4c0372efe183fd6ba678ff681fe"},
-    {file = "pydantic_core-2.23.3-cp310-none-win32.whl", hash = "sha256:8b5b3ed73abb147704a6e9f556d8c5cb078f8c095be4588e669d315e0d11893b"},
-    {file = "pydantic_core-2.23.3-cp310-none-win_amd64.whl", hash = "sha256:2b603cde285322758a0279995b5796d64b63060bfbe214b50a3ca23b5cee3e83"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:c889fd87e1f1bbeb877c2ee56b63bb297de4636661cc9bbfcf4b34e5e925bc27"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea85bda3189fb27503af4c45273735bcde3dd31c1ab17d11f37b04877859ef45"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7f7f72f721223f33d3dc98a791666ebc6a91fa023ce63733709f4894a7dc611"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b2b55b0448e9da68f56b696f313949cda1039e8ec7b5d294285335b53104b61"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c24574c7e92e2c56379706b9a3f07c1e0c7f2f87a41b6ee86653100c4ce343e5"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2b05e6ccbee333a8f4b8f4d7c244fdb7a979e90977ad9c51ea31261e2085ce0"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2c409ce1c219c091e47cb03feb3c4ed8c2b8e004efc940da0166aaee8f9d6c8"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d965e8b325f443ed3196db890d85dfebbb09f7384486a77461347f4adb1fa7f8"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f56af3a420fb1ffaf43ece3ea09c2d27c444e7c40dcb7c6e7cf57aae764f2b48"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b01a078dd4f9a52494370af21aa52964e0a96d4862ac64ff7cea06e0f12d2c5"},
-    {file = "pydantic_core-2.23.3-cp311-none-win32.whl", hash = "sha256:560e32f0df04ac69b3dd818f71339983f6d1f70eb99d4d1f8e9705fb6c34a5c1"},
-    {file = "pydantic_core-2.23.3-cp311-none-win_amd64.whl", hash = "sha256:c744fa100fdea0d000d8bcddee95213d2de2e95b9c12be083370b2072333a0fa"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e0ec50663feedf64d21bad0809f5857bac1ce91deded203efc4a84b31b2e4305"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db6e6afcb95edbe6b357786684b71008499836e91f2a4a1e55b840955b341dbb"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98ccd69edcf49f0875d86942f4418a4e83eb3047f20eb897bffa62a5d419c8fa"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a678c1ac5c5ec5685af0133262103defb427114e62eafeda12f1357a12140162"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01491d8b4d8db9f3391d93b0df60701e644ff0894352947f31fff3e52bd5c801"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fcf31facf2796a2d3b7fe338fe8640aa0166e4e55b4cb108dbfd1058049bf4cb"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7200fd561fb3be06827340da066df4311d0b6b8eb0c2116a110be5245dceb326"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc1636770a809dee2bd44dd74b89cc80eb41172bcad8af75dd0bc182c2666d4c"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:67a5def279309f2e23014b608c4150b0c2d323bd7bccd27ff07b001c12c2415c"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:748bdf985014c6dd3e1e4cc3db90f1c3ecc7246ff5a3cd4ddab20c768b2f1dab"},
-    {file = "pydantic_core-2.23.3-cp312-none-win32.whl", hash = "sha256:255ec6dcb899c115f1e2a64bc9ebc24cc0e3ab097775755244f77360d1f3c06c"},
-    {file = "pydantic_core-2.23.3-cp312-none-win_amd64.whl", hash = "sha256:40b8441be16c1e940abebed83cd006ddb9e3737a279e339dbd6d31578b802f7b"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6daaf5b1ba1369a22c8b050b643250e3e5efc6a78366d323294aee54953a4d5f"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d015e63b985a78a3d4ccffd3bdf22b7c20b3bbd4b8227809b3e8e75bc37f9cb2"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3fc572d9b5b5cfe13f8e8a6e26271d5d13f80173724b738557a8c7f3a8a3791"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f6bd91345b5163ee7448bee201ed7dd601ca24f43f439109b0212e296eb5b423"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc379c73fd66606628b866f661e8785088afe2adaba78e6bbe80796baf708a63"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbdce4b47592f9e296e19ac31667daed8753c8367ebb34b9a9bd89dacaa299c9"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc3cf31edf405a161a0adad83246568647c54404739b614b1ff43dad2b02e6d5"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e22b477bf90db71c156f89a55bfe4d25177b81fce4aa09294d9e805eec13855"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:0a0137ddf462575d9bce863c4c95bac3493ba8e22f8c28ca94634b4a1d3e2bb4"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:203171e48946c3164fe7691fc349c79241ff8f28306abd4cad5f4f75ed80bc8d"},
-    {file = "pydantic_core-2.23.3-cp313-none-win32.whl", hash = "sha256:76bdab0de4acb3f119c2a4bff740e0c7dc2e6de7692774620f7452ce11ca76c8"},
-    {file = "pydantic_core-2.23.3-cp313-none-win_amd64.whl", hash = "sha256:37ba321ac2a46100c578a92e9a6aa33afe9ec99ffa084424291d84e456f490c1"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d063c6b9fed7d992bcbebfc9133f4c24b7a7f215d6b102f3e082b1117cddb72c"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6cb968da9a0746a0cf521b2b5ef25fc5a0bee9b9a1a8214e0a1cfaea5be7e8a4"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbefe079a520c5984e30e1f1f29325054b59534729c25b874a16a5048028d16"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbaaf2ef20d282659093913da9d402108203f7cb5955020bd8d1ae5a2325d1c4"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb539d7e5dc4aac345846f290cf504d2fd3c1be26ac4e8b5e4c2b688069ff4cf"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e6f33503c5495059148cc486867e1d24ca35df5fc064686e631e314d959ad5b"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04b07490bc2f6f2717b10c3969e1b830f5720b632f8ae2f3b8b1542394c47a8e"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:03795b9e8a5d7fda05f3873efc3f59105e2dcff14231680296b87b80bb327295"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c483dab0f14b8d3f0df0c6c18d70b21b086f74c87ab03c59250dbf6d3c89baba"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b2682038e255e94baf2c473dca914a7460069171ff5cdd4080be18ab8a7fd6e"},
-    {file = "pydantic_core-2.23.3-cp38-none-win32.whl", hash = "sha256:f4a57db8966b3a1d1a350012839c6a0099f0898c56512dfade8a1fe5fb278710"},
-    {file = "pydantic_core-2.23.3-cp38-none-win_amd64.whl", hash = "sha256:13dd45ba2561603681a2676ca56006d6dee94493f03d5cadc055d2055615c3ea"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:82da2f4703894134a9f000e24965df73cc103e31e8c31906cc1ee89fde72cbd8"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dd9be0a42de08f4b58a3cc73a123f124f65c24698b95a54c1543065baca8cf0e"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89b731f25c80830c76fdb13705c68fef6a2b6dc494402987c7ea9584fe189f5d"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6de1ec30c4bb94f3a69c9f5f2182baeda5b809f806676675e9ef6b8dc936f28"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb68b41c3fa64587412b104294b9cbb027509dc2f6958446c502638d481525ef"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c3980f2843de5184656aab58698011b42763ccba11c4a8c35936c8dd6c7068c"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94f85614f2cba13f62c3c6481716e4adeae48e1eaa7e8bac379b9d177d93947a"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:510b7fb0a86dc8f10a8bb43bd2f97beb63cffad1203071dc434dac26453955cd"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1eba2f7ce3e30ee2170410e2171867ea73dbd692433b81a93758ab2de6c64835"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4b259fd8409ab84b4041b7b3f24dcc41e4696f180b775961ca8142b5b21d0e70"},
-    {file = "pydantic_core-2.23.3-cp39-none-win32.whl", hash = "sha256:40d9bd259538dba2f40963286009bf7caf18b5112b19d2b55b09c14dde6db6a7"},
-    {file = "pydantic_core-2.23.3-cp39-none-win_amd64.whl", hash = "sha256:5a8cd3074a98ee70173a8633ad3c10e00dcb991ecec57263aacb4095c5efb958"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f399e8657c67313476a121a6944311fab377085ca7f490648c9af97fc732732d"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6b5547d098c76e1694ba85f05b595720d7c60d342f24d5aad32c3049131fa5c4"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dda0290a6f608504882d9f7650975b4651ff91c85673341789a476b1159f211"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65b6e5da855e9c55a0c67f4db8a492bf13d8d3316a59999cfbaf98cc6e401961"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:09e926397f392059ce0afdcac920df29d9c833256354d0c55f1584b0b70cf07e"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:87cfa0ed6b8c5bd6ae8b66de941cece179281239d482f363814d2b986b79cedc"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e61328920154b6a44d98cabcb709f10e8b74276bc709c9a513a8c37a18786cc4"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce3317d155628301d649fe5e16a99528d5680af4ec7aa70b90b8dacd2d725c9b"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e89513f014c6be0d17b00a9a7c81b1c426f4eb9224b15433f3d98c1a071f8433"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4f62c1c953d7ee375df5eb2e44ad50ce2f5aff931723b398b8bc6f0ac159791a"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2718443bc671c7ac331de4eef9b673063b10af32a0bb385019ad61dcf2cc8f6c"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0d90e08b2727c5d01af1b5ef4121d2f0c99fbee692c762f4d9d0409c9da6541"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2b676583fc459c64146debea14ba3af54e540b61762dfc0613dc4e98c3f66eeb"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:50e4661f3337977740fdbfbae084ae5693e505ca2b3130a6d4eb0f2281dc43b8"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:68f4cf373f0de6abfe599a38307f4417c1c867ca381c03df27c873a9069cda25"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:59d52cf01854cb26c46958552a21acb10dd78a52aa34c86f284e66b209db8cab"},
-    {file = "pydantic_core-2.23.3.tar.gz", hash = "sha256:3cb0f65d8b4121c1b015c60104a685feb929a29d7cf204387c7f2688c7974690"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"},
+    {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"},
+    {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"},
+    {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"},
+    {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"},
+    {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"},
+    {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"},
+    {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"},
+    {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"},
+    {file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"},
+    {file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"},
+    {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"},
+    {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"},
+    {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"},
 ]
 
 [package.dependencies]
@@ -1047,13 +1050,13 @@ types-urllib3 = "*"
 
 [[package]]
 name = "types-requests"
-version = "2.32.0.20240907"
+version = "2.32.0.20240914"
 description = "Typing stubs for requests"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-requests-2.32.0.20240907.tar.gz", hash = "sha256:ff33935f061b5e81ec87997e91050f7b4af4f82027a7a7a9d9aaea04a963fdf8"},
-    {file = "types_requests-2.32.0.20240907-py3-none-any.whl", hash = "sha256:1d1e79faeaf9d42def77f3c304893dea17a97cae98168ac69f3cb465516ee8da"},
+    {file = "types-requests-2.32.0.20240914.tar.gz", hash = "sha256:2850e178db3919d9bf809e434eef65ba49d0e7e33ac92d588f4a5e295fffd405"},
+    {file = "types_requests-2.32.0.20240914-py3-none-any.whl", hash = "sha256:59c2f673eb55f32a99b2894faf6020e1a9f4a402ad0f192bfee0b64469054310"},
 ]
 
 [package.dependencies]
diff --git a/pyproject.toml b/pyproject.toml
index 4230436b8..fe2fc4191 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cohere"
-version = "5.9.2"
+version = "5.10.0"
 description = ""
 readme = "README.md"
 authors = []
diff --git a/reference.md b/reference.md
index 9257bd953..45eb5984a 100644
--- a/reference.md
+++ b/reference.md
@@ -1,5 +1,6 @@
 # Reference
-<details><summary><code>client.<a href="src/cohere/base_client.py">chat_stream</a>(...)</code></summary>
+## V1
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">list</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -11,985 +12,47 @@
 <dl>
 <dd>
 
-Generates a text response to a user message.
-To learn how to use the Chat API with Streaming and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
-</dd>
-</dl>
-</dd>
-</dl>
-
-#### 🔌 Usage
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-```python
-from cohere import (
-    ChatConnector,
-    ChatStreamRequestConnectorsSearchOptions,
-    Client,
-    Message_Chatbot,
-    ResponseFormat_Text,
-    Tool,
-    ToolCall,
-    ToolParameterDefinitionsValue,
-    ToolResult,
-)
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-response = client.chat_stream(
-    message="string",
-    model="string",
-    preamble="string",
-    chat_history=[
-        Message_Chatbot(
-            message="string",
-            tool_calls=[
-                ToolCall(
-                    name="string",
-                    parameters={"string": {"key": "value"}},
-                )
-            ],
-        )
-    ],
-    conversation_id="string",
-    prompt_truncation="OFF",
-    connectors=[
-        ChatConnector(
-            id="string",
-            user_access_token="string",
-            continue_on_failure=True,
-            options={"string": {"key": "value"}},
-        )
-    ],
-    search_queries_only=True,
-    documents=[{"string": {"key": "value"}}],
-    citation_quality="fast",
-    temperature=1.1,
-    max_tokens=1,
-    max_input_tokens=1,
-    k=1,
-    p=1.1,
-    seed=1,
-    stop_sequences=["string"],
-    connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
-        seed=1,
-    ),
-    frequency_penalty=1.1,
-    presence_penalty=1.1,
-    raw_prompting=True,
-    return_prompt=True,
-    tools=[
-        Tool(
-            name="string",
-            description="string",
-            parameter_definitions={
-                "string": ToolParameterDefinitionsValue(
-                    description="string",
-                    type="string",
-                    required=True,
-                )
-            },
-        )
-    ],
-    tool_results=[
-        ToolResult(
-            call=ToolCall(
-                name="string",
-                parameters={"string": {"key": "value"}},
-            ),
-            outputs=[{"string": {"key": "value"}}],
-        )
-    ],
-    force_single_step=True,
-    response_format=ResponseFormat_Text(),
-    safety_mode="CONTEXTUAL",
-)
-for chunk in response:
-    yield chunk
-
-```
-</dd>
-</dl>
-</dd>
-</dl>
-
-#### ⚙️ Parameters
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-**message:** `str` 
-
-Text input for the model to respond to.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**accepts:** `typing.Optional[typing.Literal["text/event-stream"]]` — Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**model:** `typing.Optional[str]` 
-
-Defaults to `command-r-plus-08-2024`.
-
-The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
-
-Compatible Deployments: Cohere Platform, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**preamble:** `typing.Optional[str]` 
-
-When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
-
-The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**chat_history:** `typing.Optional[typing.Sequence[Message]]` 
-
-A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
-
-Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
-
-The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**conversation_id:** `typing.Optional[str]` 
-
-An alternative to `chat_history`.
-
-Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
-
-Compatible Deployments: Cohere Platform
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**prompt_truncation:** `typing.Optional[ChatStreamRequestPromptTruncation]` 
-
-Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
-
-Dictates how the prompt will be constructed.
-
-With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
-
-With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
-
-With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-
-Compatible Deployments: 
- - AUTO: Cohere Platform Only
- - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**connectors:** `typing.Optional[typing.Sequence[ChatConnector]]` 
-
-Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
-
-When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
-
-Compatible Deployments: Cohere Platform
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**search_queries_only:** `typing.Optional[bool]` 
-
-Defaults to `false`.
-
-When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**documents:** `typing.Optional[typing.Sequence[ChatDocument]]` 
-
-A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
-
-Example:
-```
-[
-  { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
-  { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-]
-```
-
-Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
-
-Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
-
-An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
-
-An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
-
-See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**citation_quality:** `typing.Optional[ChatStreamRequestCitationQuality]` 
-
-Defaults to `"accurate"`.
-
-Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**temperature:** `typing.Optional[float]` 
-
-Defaults to `0.3`.
-
-A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
-
-Randomness can be further maximized by increasing the  value of the `p` parameter.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**max_tokens:** `typing.Optional[int]` 
-
-The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**max_input_tokens:** `typing.Optional[int]` 
-
-The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
-
-Input will be truncated according to the `prompt_truncation` parameter.
-
-Compatible Deployments: Cohere Platform
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**k:** `typing.Optional[int]` 
-
-Ensures only the top `k` most likely tokens are considered for generation at each step.
-Defaults to `0`, min value of `0`, max value of `500`.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**p:** `typing.Optional[float]` 
-
-Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**seed:** `typing.Optional[int]` 
-
-If specified, the backend will make a best effort to sample tokens
-deterministically, such that repeated requests with the same
-seed and parameters should return the same result. However,
-determinism cannot be totally guaranteed.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` 
-
-A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**frequency_penalty:** `typing.Optional[float]` 
-
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**presence_penalty:** `typing.Optional[float]` 
-
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**raw_prompting:** `typing.Optional[bool]` 
-
-When enabled, the user's prompt will be sent to the model without
-any pre-processing.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**return_prompt:** `typing.Optional[bool]` — The prompt is returned in the `prompt` response field when this is enabled.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**tools:** `typing.Optional[typing.Sequence[Tool]]` 
-
-A list of available tools (functions) that the model may suggest invoking before producing a text response.
-
-When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**tool_results:** `typing.Optional[typing.Sequence[ToolResult]]` 
-
-A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
-Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
-
-**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
-```
-tool_results = [
-  {
-    "call": {
-      "name": <tool name>,
-      "parameters": {
-        <param name>: <param value>
-      }
-    },
-    "outputs": [{
-      <key>: <value>
-    }]
-  },
-  ...
-]
-```
-**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**force_single_step:** `typing.Optional[bool]` — Forces the chat to be single step. Defaults to `false`.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**response_format:** `typing.Optional[ResponseFormat]` 
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**safety_mode:** `typing.Optional[ChatStreamRequestSafetyMode]` 
-
-Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-When `NONE` is specified, the safety instruction will be omitted.
-
-Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
-
-**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
-    
-</dd>
-</dl>
-</dd>
-</dl>
-
-
-</dd>
-</dl>
-</details>
-
-<details><summary><code>client.<a href="src/cohere/base_client.py">chat</a>(...)</code></summary>
-<dl>
-<dd>
-
-#### 📝 Description
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-Generates a text response to a user message.
-To learn how to use the Chat API with Streaming and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
-</dd>
-</dl>
-</dd>
-</dl>
-
-#### 🔌 Usage
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-```python
-from cohere import Client, Message_Tool
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.chat(
-    message="Can you give me a global market overview of solar panels?",
-    chat_history=[Message_Tool(), Message_Tool()],
-    prompt_truncation="OFF",
-    temperature=0.3,
-)
-
-```
-</dd>
-</dl>
-</dd>
-</dl>
-
-#### ⚙️ Parameters
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-**message:** `str` 
-
-Text input for the model to respond to.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**accepts:** `typing.Optional[typing.Literal["text/event-stream"]]` — Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**model:** `typing.Optional[str]` 
-
-Defaults to `command-r-plus-08-2024`.
-
-The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
-
-Compatible Deployments: Cohere Platform, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**preamble:** `typing.Optional[str]` 
-
-When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
-
-The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**chat_history:** `typing.Optional[typing.Sequence[Message]]` 
-
-A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
-
-Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
-
-The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**conversation_id:** `typing.Optional[str]` 
-
-An alternative to `chat_history`.
-
-Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
-
-Compatible Deployments: Cohere Platform
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**prompt_truncation:** `typing.Optional[ChatRequestPromptTruncation]` 
-
-Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
-
-Dictates how the prompt will be constructed.
-
-With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
-
-With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
-
-With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-
-Compatible Deployments: 
- - AUTO: Cohere Platform Only
- - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**connectors:** `typing.Optional[typing.Sequence[ChatConnector]]` 
-
-Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
-
-When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
-
-Compatible Deployments: Cohere Platform
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**search_queries_only:** `typing.Optional[bool]` 
-
-Defaults to `false`.
-
-When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**documents:** `typing.Optional[typing.Sequence[ChatDocument]]` 
-
-A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
-
-Example:
-```
-[
-  { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
-  { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-]
-```
-
-Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
-
-Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
-
-An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
-
-An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
-
-See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**citation_quality:** `typing.Optional[ChatRequestCitationQuality]` 
-
-Defaults to `"accurate"`.
-
-Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**temperature:** `typing.Optional[float]` 
-
-Defaults to `0.3`.
-
-A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
-
-Randomness can be further maximized by increasing the  value of the `p` parameter.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**max_tokens:** `typing.Optional[int]` 
-
-The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**max_input_tokens:** `typing.Optional[int]` 
-
-The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
-
-Input will be truncated according to the `prompt_truncation` parameter.
-
-Compatible Deployments: Cohere Platform
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**k:** `typing.Optional[int]` 
-
-Ensures only the top `k` most likely tokens are considered for generation at each step.
-Defaults to `0`, min value of `0`, max value of `500`.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**p:** `typing.Optional[float]` 
-
-Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**seed:** `typing.Optional[int]` 
-
-If specified, the backend will make a best effort to sample tokens
-deterministically, such that repeated requests with the same
-seed and parameters should return the same result. However,
-determinism cannot be totally guaranteed.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
+Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
 </dd>
 </dl>
-
-<dl>
-<dd>
-
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` 
-
-A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
 </dd>
 </dl>
 
-<dl>
-<dd>
-
-**frequency_penalty:** `typing.Optional[float]` 
-
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
+#### 🔌 Usage
 
 <dl>
 <dd>
 
-**presence_penalty:** `typing.Optional[float]` 
-
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
 <dl>
 <dd>
 
-**raw_prompting:** `typing.Optional[bool]` 
-
-When enabled, the user's prompt will be sent to the model without
-any pre-processing.
+```python
+from cohere import Client
 
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+client = Client(
+    client_name="YOUR_CLIENT_NAME",
+    token="YOUR_TOKEN",
+)
+client.v1.list()
 
-    
+```
 </dd>
 </dl>
-
-<dl>
-<dd>
-
-**return_prompt:** `typing.Optional[bool]` — The prompt is returned in the `prompt` response field when this is enabled.
-    
 </dd>
 </dl>
 
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**tools:** `typing.Optional[typing.Sequence[Tool]]` 
-
-A list of available tools (functions) that the model may suggest invoking before producing a text response.
-
-When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
 <dl>
 <dd>
 
-**tool_results:** `typing.Optional[typing.Sequence[ToolResult]]` 
-
-A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
-Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
-
-**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
-```
-tool_results = [
-  {
-    "call": {
-      "name": <tool name>,
-      "parameters": {
-        <param name>: <param value>
-      }
-    },
-    "outputs": [{
-      <key>: <value>
-    }]
-  },
-  ...
-]
-```
-**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+**page_size:** `typing.Optional[float]` 
 
+Maximum number of models to include in a page
+Defaults to `20`, min value of `1`, max value of `1000`.
     
 </dd>
 </dl>
@@ -997,7 +60,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**force_single_step:** `typing.Optional[bool]` — Forces the chat to be single step. Defaults to `false`.
+**page_token:** `typing.Optional[str]` — Page token provided in the `next_page_token` field of a previous response.
     
 </dd>
 </dl>
@@ -1005,7 +68,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**response_format:** `typing.Optional[ResponseFormat]` 
+**endpoint:** `typing.Optional[CompatibleEndpoint]` — When provided, filters the list of models to only those that are compatible with the specified endpoint.
     
 </dd>
 </dl>
@@ -1013,17 +76,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**safety_mode:** `typing.Optional[ChatRequestSafetyMode]` 
-
-Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-When `NONE` is specified, the safety instruction will be omitted.
-
-Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
-
-**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
+**default_only:** `typing.Optional[bool]` — When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.
     
 </dd>
 </dl>
@@ -1043,7 +96,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 </dl>
 </details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">generate_stream</a>(...)</code></summary>
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">create</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -1055,10 +108,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-<Warning>
-This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-</Warning>
-Generates realistic text conditioned on a given input.
+Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/docs/creating-and-deploying-a-connector) for more information.
 </dd>
 </dl>
 </dd>
@@ -1079,26 +129,10 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-response = client.generate_stream(
-    prompt="string",
-    model="string",
-    num_generations=1,
-    max_tokens=1,
-    truncate="NONE",
-    temperature=1.1,
-    seed=1,
-    preset="string",
-    end_sequences=["string"],
-    stop_sequences=["string"],
-    k=1,
-    p=1.1,
-    frequency_penalty=1.1,
-    presence_penalty=1.1,
-    return_likelihoods="GENERATION",
-    raw_prompting=True,
+client.v1.create(
+    name="name",
+    url="url",
 )
-for chunk in response:
-    yield chunk
 
 ```
 </dd>
@@ -1106,115 +140,15 @@ for chunk in response:
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-**prompt:** `str` 
-
-The input text that serves as the starting point for generating the response.
-Note: The prompt will be pre-processed and modified before reaching the model.
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**model:** `typing.Optional[str]` 
-
-The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
-Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**num_generations:** `typing.Optional[int]` — The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**max_tokens:** `typing.Optional[int]` 
-
-The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
-
-Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**truncate:** `typing.Optional[GenerateStreamRequestTruncate]` 
-
-One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**temperature:** `typing.Optional[float]` 
-
-A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
-Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**seed:** `typing.Optional[int]` 
-
-If specified, the backend will make a best effort to sample tokens
-deterministically, such that repeated requests with the same
-seed and parameters should return the same result. However,
-determinism cannot be totally guaranteed.
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**preset:** `typing.Optional[str]` 
-
-Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
-When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
-
-    
-</dd>
-</dl>
-
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**end_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
+<dl>
+<dd>
+
+**name:** `str` — A human-readable name for the connector.
     
 </dd>
 </dl>
@@ -1222,7 +156,7 @@ When a preset is specified, the `prompt` parameter becomes optional, and any inc
 <dl>
 <dd>
 
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
+**url:** `str` — The URL of the connector that will be used to search for documents.
     
 </dd>
 </dl>
@@ -1230,11 +164,7 @@ When a preset is specified, the `prompt` parameter becomes optional, and any inc
 <dl>
 <dd>
 
-**k:** `typing.Optional[int]` 
-
-Ensures only the top `k` most likely tokens are considered for generation at each step.
-Defaults to `0`, min value of `0`, max value of `500`.
-
+**description:** `typing.Optional[str]` — A description of the connector.
     
 </dd>
 </dl>
@@ -1242,11 +172,7 @@ Defaults to `0`, min value of `0`, max value of `500`.
 <dl>
 <dd>
 
-**p:** `typing.Optional[float]` 
-
-Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
+**excludes:** `typing.Optional[typing.Sequence[str]]` — A list of fields to exclude from the prompt (fields remain in the document).
     
 </dd>
 </dl>
@@ -1254,12 +180,7 @@ Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 <dl>
 <dd>
 
-**frequency_penalty:** `typing.Optional[float]` 
-
-Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
+**oauth:** `typing.Optional[CreateConnectorOAuth]` — The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
     
 </dd>
 </dl>
@@ -1267,14 +188,7 @@ Using `frequency_penalty` in combination with `presence_penalty` is not supporte
 <dl>
 <dd>
 
-**presence_penalty:** `typing.Optional[float]` 
-
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
+**active:** `typing.Optional[bool]` — Whether the connector is active or not.
     
 </dd>
 </dl>
@@ -1282,13 +196,7 @@ Using `frequency_penalty` in combination with `presence_penalty` is not supporte
 <dl>
 <dd>
 
-**return_likelihoods:** `typing.Optional[GenerateStreamRequestReturnLikelihoods]` 
-
-One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
-
-If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
-
-If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+**continue_on_failure:** `typing.Optional[bool]` — Whether a chat request should continue or not if the request to this connector fails.
     
 </dd>
 </dl>
@@ -1296,7 +204,7 @@ If `ALL` is selected, the token likelihoods will be provided both for the prompt
 <dl>
 <dd>
 
-**raw_prompting:** `typing.Optional[bool]` — When enabled, the user's prompt will be sent to the model without any pre-processing.
+**service_auth:** `typing.Optional[CreateConnectorServiceAuth]` — The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
     
 </dd>
 </dl>
@@ -1316,7 +224,7 @@ If `ALL` is selected, the token likelihoods will be provided both for the prompt
 </dl>
 </details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">generate</a>(...)</code></summary>
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">get</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -1328,10 +236,7 @@ If `ALL` is selected, the token likelihoods will be provided both for the prompt
 <dl>
 <dd>
 
-<Warning>
-This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-</Warning>
-Generates realistic text conditioned on a given input.
+Returns the details of a model, provided its name.
 </dd>
 </dl>
 </dd>
@@ -1352,8 +257,8 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.generate(
-    prompt="Please explain to me how LLMs work",
+client.v1.get(
+    model="command-r",
 )
 
 ```
@@ -1370,11 +275,7 @@ client.generate(
 <dl>
 <dd>
 
-**prompt:** `str` 
-
-The input text that serves as the starting point for generating the response.
-Note: The prompt will be pre-processed and modified before reaching the model.
-
+**model:** `str` 
     
 </dd>
 </dl>
@@ -1382,60 +283,70 @@ Note: The prompt will be pre-processed and modified before reaching the model.
 <dl>
 <dd>
 
-**model:** `typing.Optional[str]` 
-
-The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
-Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
 
-<dl>
-<dd>
-
-**num_generations:** `typing.Optional[int]` — The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
 
-    
 </dd>
 </dl>
+</details>
 
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">cancel</a>(...)</code></summary>
 <dl>
 <dd>
 
-**max_tokens:** `typing.Optional[int]` 
-
-The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+#### 📝 Description
 
-This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
+<dl>
+<dd>
 
-Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
+<dl>
+<dd>
 
-    
+This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### 🔌 Usage
+
 <dl>
 <dd>
 
-**truncate:** `typing.Optional[GenerateRequestTruncate]` 
+<dl>
+<dd>
 
-One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+```python
+from cohere import Client
 
-Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+client = Client(
+    client_name="YOUR_CLIENT_NAME",
+    token="YOUR_TOKEN",
+)
+client.v1.cancel(
+    id="id",
+)
 
-If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-    
+```
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**temperature:** `typing.Optional[float]` 
-
-A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
-Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
+<dl>
+<dd>
 
+**id:** `str` — The ID of the embed job to cancel.
     
 </dd>
 </dl>
@@ -1443,116 +354,131 @@ Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
 <dl>
 <dd>
 
-**seed:** `typing.Optional[int]` 
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+    
+</dd>
+</dl>
+</dd>
+</dl>
 
-If specified, the backend will make a best effort to sample tokens
-deterministically, such that repeated requests with the same
-seed and parameters should return the same result. However,
-determinism cannot be totally guaranteed.
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-    
 </dd>
 </dl>
+</details>
 
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">get_usage</a>()</code></summary>
 <dl>
 <dd>
 
-**preset:** `typing.Optional[str]` 
-
-Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
-When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
+#### 📝 Description
 
-    
-</dd>
-</dl>
+<dl>
+<dd>
 
 <dl>
 <dd>
 
-**end_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
-    
+View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### 🔌 Usage
+
 <dl>
 <dd>
 
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
-    
-</dd>
-</dl>
-
 <dl>
 <dd>
 
-**k:** `typing.Optional[int]` 
+```python
+from cohere import Client
 
-Ensures only the top `k` most likely tokens are considered for generation at each step.
-Defaults to `0`, min value of `0`, max value of `500`.
+client = Client(
+    client_name="YOUR_CLIENT_NAME",
+    token="YOUR_TOKEN",
+)
+client.v1.get_usage()
 
-    
+```
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**p:** `typing.Optional[float]` 
-
-Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+<dl>
+<dd>
 
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
 
-<dl>
-<dd>
-
-**frequency_penalty:** `typing.Optional[float]` 
-
-Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
 
-    
 </dd>
 </dl>
+</details>
 
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">delete</a>(...)</code></summary>
 <dl>
 <dd>
 
-**presence_penalty:** `typing.Optional[float]` 
-
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+#### 📝 Description
 
-Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+<dl>
+<dd>
 
-Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+<dl>
+<dd>
 
-    
+Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### 🔌 Usage
+
 <dl>
 <dd>
 
-**return_likelihoods:** `typing.Optional[GenerateRequestReturnLikelihoods]` 
+<dl>
+<dd>
 
-One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
+```python
+from cohere import Client
 
-If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
+client = Client(
+    client_name="YOUR_CLIENT_NAME",
+    token="YOUR_TOKEN",
+)
+client.v1.delete(
+    id="id",
+)
 
-If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
-    
+```
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**raw_prompting:** `typing.Optional[bool]` — When enabled, the user's prompt will be sent to the model without any pre-processing.
+<dl>
+<dd>
+
+**id:** `str` — The ID of the connector to delete.
     
 </dd>
 </dl>
@@ -1572,7 +498,7 @@ If `ALL` is selected, the token likelihoods will be provided both for the prompt
 </dl>
 </details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">embed</a>(...)</code></summary>
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">update</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -1584,11 +510,7 @@ If `ALL` is selected, the token likelihoods will be provided both for the prompt
 <dl>
 <dd>
 
-This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
-
-Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
-
-If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
+Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.
 </dd>
 </dl>
 </dd>
@@ -1609,7 +531,9 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.embed()
+client.v1.update(
+    id="id",
+)
 
 ```
 </dd>
@@ -1625,7 +549,7 @@ client.embed()
 <dl>
 <dd>
 
-**texts:** `typing.Optional[typing.Sequence[str]]` — An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
+**id:** `str` — The ID of the connector to update.
     
 </dd>
 </dl>
@@ -1633,11 +557,7 @@ client.embed()
 <dl>
 <dd>
 
-**images:** `typing.Optional[typing.Sequence[str]]` 
-
-An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
-
-The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+**name:** `typing.Optional[str]` — A human-readable name for the connector.
     
 </dd>
 </dl>
@@ -1645,22 +565,7 @@ The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/We
 <dl>
 <dd>
 
-**model:** `typing.Optional[str]` 
-
-Defaults to embed-english-v2.0
-
-The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-
-Available models and corresponding embedding dimensions:
-
-* `embed-english-v3.0`  1024
-* `embed-multilingual-v3.0`  1024
-* `embed-english-light-v3.0`  384
-* `embed-multilingual-light-v3.0`  384
-
-* `embed-english-v2.0`  4096
-* `embed-english-light-v2.0`  1024
-* `embed-multilingual-v2.0`  768
+**url:** `typing.Optional[str]` — The URL of the connector that will be used to search for documents.
     
 </dd>
 </dl>
@@ -1668,7 +573,7 @@ Available models and corresponding embedding dimensions:
 <dl>
 <dd>
 
-**input_type:** `typing.Optional[EmbedInputType]` 
+**excludes:** `typing.Optional[typing.Sequence[str]]` — A list of fields to exclude from the prompt (fields remain in the document).
     
 </dd>
 </dl>
@@ -1676,15 +581,15 @@ Available models and corresponding embedding dimensions:
 <dl>
 <dd>
 
-**embedding_types:** `typing.Optional[typing.Sequence[EmbeddingType]]` 
+**oauth:** `typing.Optional[CreateConnectorOAuth]` — The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
+    
+</dd>
+</dl>
 
-Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
+<dl>
+<dd>
 
-* `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
-* `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
-* `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
-* `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
-* `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
+**active:** `typing.Optional[bool]` 
     
 </dd>
 </dl>
@@ -1692,13 +597,15 @@ Specifies the types of embeddings you want to get back. Not required and default
 <dl>
 <dd>
 
-**truncate:** `typing.Optional[EmbedRequestTruncate]` 
-
-One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+**continue_on_failure:** `typing.Optional[bool]` 
+    
+</dd>
+</dl>
 
-Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+<dl>
+<dd>
 
-If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+**service_auth:** `typing.Optional[CreateConnectorServiceAuth]` — The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
     
 </dd>
 </dl>
@@ -1718,7 +625,7 @@ If `NONE` is selected, when the input exceeds the maximum input token length an
 </dl>
 </details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">rerank</a>(...)</code></summary>
+<details><summary><code>client.v1.<a href="src/cohere/v1/client.py">o_auth_authorize</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -1730,7 +637,7 @@ If `NONE` is selected, when the input exceeds the maximum input token length an
 <dl>
 <dd>
 
-This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
+Authorize the connector with the given ID for the connector oauth app. See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.
 </dd>
 </dl>
 </dd>
@@ -1751,9 +658,8 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.rerank(
-    query="query",
-    documents=["documents"],
+client.v1.o_auth_authorize(
+    id="id",
 )
 
 ```
@@ -1770,7 +676,7 @@ client.rerank(
 <dl>
 <dd>
 
-**query:** `str` — The search query
+**id:** `str` — The ID of the connector to authorize.
     
 </dd>
 </dl>
@@ -1778,14 +684,7 @@ client.rerank(
 <dl>
 <dd>
 
-**documents:** `typing.Sequence[RerankRequestDocumentsItem]` 
-
-A list of document objects or strings to rerank.
-If a document is provided the text fields is required and all other fields will be preserved in the response.
-
-The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
-
-We recommend a maximum of 1,000 documents for optimal endpoint performance.
+**after_token_redirect:** `typing.Optional[str]` — The URL to redirect to after the connector has been authorized.
     
 </dd>
 </dl>
@@ -1793,34 +692,154 @@ We recommend a maximum of 1,000 documents for optimal endpoint performance.
 <dl>
 <dd>
 
-**model:** `typing.Optional[str]` — The identifier of the model to use, one of : `rerank-english-v3.0`, `rerank-multilingual-v3.0`, `rerank-english-v2.0`, `rerank-multilingual-v2.0`
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
 
+## V1 V1
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">chat_stream</a>(...)</code></summary>
 <dl>
 <dd>
 
-**top_n:** `typing.Optional[int]` — The number of most relevant documents or indices to return, defaults to the length of the documents
-    
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Generates a text response to a user message.
+To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
 </dd>
 </dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
 
 <dl>
 <dd>
 
-**rank_fields:** `typing.Optional[typing.Sequence[str]]` — If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.
-    
+<dl>
+<dd>
+
+```python
+from cohere import Client
+from cohere.v1 import (
+    ChatConnector,
+    ChatMessage,
+    TextResponseFormat,
+    Tool,
+    ToolCall,
+    ToolParameterDefinitionsValue,
+    ToolResult,
+)
+from cohere.v1.v1 import ChatStreamRequestConnectorsSearchOptions
+
+client = Client(
+    client_name="YOUR_CLIENT_NAME",
+    token="YOUR_TOKEN",
+)
+response = client.v1.v1.chat_stream(
+    message="string",
+    model="string",
+    preamble="string",
+    chat_history=[
+        ChatMessage(
+            message="string",
+            tool_calls=[
+                ToolCall(
+                    name="string",
+                    parameters={"string": {"key": "value"}},
+                )
+            ],
+        )
+    ],
+    conversation_id="string",
+    prompt_truncation="OFF",
+    connectors=[
+        ChatConnector(
+            id="string",
+            user_access_token="string",
+            continue_on_failure=True,
+            options={"string": {"key": "value"}},
+        )
+    ],
+    search_queries_only=True,
+    documents=[{"string": {"key": "value"}}],
+    citation_quality="fast",
+    temperature=1.1,
+    max_tokens=1,
+    max_input_tokens=1,
+    k=1,
+    p=1.1,
+    seed=1,
+    stop_sequences=["string"],
+    connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
+        seed=1,
+    ),
+    frequency_penalty=1.1,
+    presence_penalty=1.1,
+    raw_prompting=True,
+    return_prompt=True,
+    tools=[
+        Tool(
+            name="string",
+            description="string",
+            parameter_definitions={
+                "string": ToolParameterDefinitionsValue(
+                    description="string",
+                    type="string",
+                    required=True,
+                )
+            },
+        )
+    ],
+    tool_results=[
+        ToolResult(
+            call=ToolCall(
+                name="string",
+                parameters={"string": {"key": "value"}},
+            ),
+            outputs=[{"string": {"key": "value"}}],
+        )
+    ],
+    force_single_step=True,
+    response_format=TextResponseFormat(),
+    safety_mode="CONTEXTUAL",
+)
+for chunk in response:
+    yield chunk
+
+```
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**return_documents:** `typing.Optional[bool]` 
+<dl>
+<dd>
+
+**message:** `str` 
+
+Text input for the model to respond to.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-- If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
-- If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
     
 </dd>
 </dl>
@@ -1828,7 +847,7 @@ We recommend a maximum of 1,000 documents for optimal endpoint performance.
 <dl>
 <dd>
 
-**max_chunks_per_doc:** `typing.Optional[int]` — The maximum number of chunks to produce internally from a document
+**accepts:** `typing.Optional[typing.Literal["text/event-stream"]]` — Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
     
 </dd>
 </dl>
@@ -1836,75 +855,99 @@ We recommend a maximum of 1,000 documents for optimal endpoint performance.
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**model:** `typing.Optional[str]` 
+
+Defaults to `command-r-plus-08-2024`.
+
+The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
+Compatible Deployments: Cohere Platform, Private Deployments
+
     
 </dd>
 </dl>
-</dd>
-</dl>
 
+<dl>
+<dd>
+
+**preamble:** `typing.Optional[str]` 
+
+When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
+
+The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">classify</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**chat_history:** `typing.Optional[typing.Sequence[Message]]` 
 
-<dl>
-<dd>
+A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
 
-<dl>
-<dd>
+Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
 
-This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
-Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
-</dd>
-</dl>
+The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**conversation_id:** `typing.Optional[str]` 
 
-```python
-from cohere import Client
+An alternative to `chat_history`.
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.classify(
-    inputs=["inputs"],
-)
+Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
 
-```
+Compatible Deployments: Cohere Platform
+
+    
 </dd>
 </dl>
+
+<dl>
+<dd>
+
+**prompt_truncation:** `typing.Optional[ChatStreamRequestPromptTruncation]` 
+
+Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+
+Dictates how the prompt will be constructed.
+
+With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
+
+With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
+
+With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
+
+Compatible Deployments: 
+ - AUTO: Cohere Platform Only
+ - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**connectors:** `typing.Optional[typing.Sequence[ChatConnector]]` 
 
-**inputs:** `typing.Sequence[str]` 
+Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+
+When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
+Compatible Deployments: Cohere Platform
 
-A list of up to 96 texts to be classified. Each one must be a non-empty string.
-There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
-Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
     
 </dd>
 </dl>
@@ -1912,10 +955,14 @@ Note: by default the `truncate` parameter is set to `END`, so tokens exceeding t
 <dl>
 <dd>
 
-**examples:** `typing.Optional[typing.Sequence[ClassifyExample]]` 
+**search_queries_only:** `typing.Optional[bool]` 
+
+Defaults to `false`.
+
+When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
-Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
     
 </dd>
 </dl>
@@ -1923,7 +970,30 @@ Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) tra
 <dl>
 <dd>
 
-**model:** `typing.Optional[str]` — The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
+**documents:** `typing.Optional[typing.Sequence[ChatDocument]]` 
+
+A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
+
+Example:
+```
+[
+  { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
+  { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
+]
+```
+
+Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
+
+Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
+
+An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
+
+An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
+
+See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -1931,7 +1001,14 @@ Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) tra
 <dl>
 <dd>
 
-**preset:** `typing.Optional[str]` — The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
+**citation_quality:** `typing.Optional[ChatStreamRequestCitationQuality]` 
+
+Defaults to `"accurate"`.
+
+Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -1939,11 +1016,16 @@ Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) tra
 <dl>
 <dd>
 
-**truncate:** `typing.Optional[ClassifyRequestTruncate]` 
+**temperature:** `typing.Optional[float]` 
+
+Defaults to `0.3`.
+
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+Randomness can be further maximized by increasing the  value of the `p` parameter.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
     
 </dd>
 </dl>
@@ -1951,73 +1033,55 @@ If `NONE` is selected, when the input exceeds the maximum input token length an
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
-    
-</dd>
-</dl>
-</dd>
-</dl>
+**max_tokens:** `typing.Optional[int]` 
+
+The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
 
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">summarize</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**max_input_tokens:** `typing.Optional[int]` 
 
-<dl>
-<dd>
+The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
-<dl>
-<dd>
+Input will be truncated according to the `prompt_truncation` parameter.
 
-<Warning>
-This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-</Warning>
-Generates a summary in English for a given text.
-</dd>
-</dl>
+Compatible Deployments: Cohere Platform
+
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**k:** `typing.Optional[int]` 
 
-```python
-from cohere import Client
+Ensures only the top `k` most likely tokens are considered for generation at each step.
+Defaults to `0`, min value of `0`, max value of `500`.
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.summarize(
-    text="text",
-)
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-```
-</dd>
-</dl>
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**p:** `typing.Optional[float]` 
+
+Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-**text:** `str` — The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
     
 </dd>
 </dl>
@@ -2025,7 +1089,15 @@ client.summarize(
 <dl>
 <dd>
 
-**length:** `typing.Optional[SummarizeRequestLength]` — One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
+**seed:** `typing.Optional[int]` 
+
+If specified, the backend will make a best effort to sample tokens
+deterministically, such that repeated requests with the same
+seed and parameters should return the same result. However,
+determinism cannot be totally guaranteed.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2033,7 +1105,12 @@ client.summarize(
 <dl>
 <dd>
 
-**format:** `typing.Optional[SummarizeRequestFormat]` — One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` 
+
+A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2041,7 +1118,14 @@ client.summarize(
 <dl>
 <dd>
 
-**model:** `typing.Optional[str]` — The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
+**frequency_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2049,7 +1133,14 @@ client.summarize(
 <dl>
 <dd>
 
-**extractiveness:** `typing.Optional[SummarizeRequestExtractiveness]` — One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
+**presence_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2057,7 +1148,13 @@ client.summarize(
 <dl>
 <dd>
 
-**temperature:** `typing.Optional[float]` — Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
+**raw_prompting:** `typing.Optional[bool]` 
+
+When enabled, the user's prompt will be sent to the model without
+any pre-processing.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2065,7 +1162,7 @@ client.summarize(
 <dl>
 <dd>
 
-**additional_command:** `typing.Optional[str]` — A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
+**return_prompt:** `typing.Optional[bool]` — The prompt is returned in the `prompt` response field when this is enabled.
     
 </dd>
 </dl>
@@ -2073,79 +1170,81 @@ client.summarize(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
-    
-</dd>
-</dl>
-</dd>
-</dl>
+**tools:** `typing.Optional[typing.Sequence[Tool]]` 
+
+A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">tokenize</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**tool_results:** `typing.Optional[typing.Sequence[ToolResult]]` 
 
-<dl>
-<dd>
+A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
+Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
 
-<dl>
-<dd>
+**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
+```
+tool_results = [
+  {
+    "call": {
+      "name": <tool name>,
+      "parameters": {
+        <param name>: <param value>
+      }
+    },
+    "outputs": [{
+      <key>: <value>
+    }]
+  },
+  ...
+]
+```
+**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
 
-This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
-</dd>
-</dl>
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
+**force_single_step:** `typing.Optional[bool]` — Forces the chat to be single step. Defaults to `false`.
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
-```python
-from cohere import Client
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.tokenize(
-    text="tokenize me! :D",
-    model="command",
-)
-
-```
-</dd>
-</dl>
+**response_format:** `typing.Optional[ResponseFormat]` 
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**safety_mode:** `typing.Optional[ChatStreamRequestSafetyMode]` 
 
-**text:** `str` — The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
-    
-</dd>
-</dl>
+Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+When `NONE` is specified, the safety instruction will be omitted.
 
-<dl>
-<dd>
+Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-**model:** `str` — An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
     
 </dd>
 </dl>
@@ -2165,7 +1264,7 @@ client.tokenize(
 </dl>
 </details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">detokenize</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">chat</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -2177,7 +1276,8 @@ client.tokenize(
 <dl>
 <dd>
 
-This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
+Generates a text response to a user message.
+To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
 </dd>
 </dl>
 </dd>
@@ -2193,14 +1293,24 @@ This endpoint takes tokens using byte-pair encoding and returns their text repre
 
 ```python
 from cohere import Client
+from cohere.v1 import ChatMessage
 
 client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.detokenize(
-    tokens=[1],
-    model="model",
+client.v1.v1.chat(
+    message="Can you give me a global market overview of solar panels?",
+    chat_history=[
+        ChatMessage(
+            message="Hi!",
+        ),
+        ChatMessage(
+            message="How can I help you today?",
+        ),
+    ],
+    prompt_truncation="OFF",
+    temperature=0.3,
 )
 
 ```
@@ -2217,7 +1327,12 @@ client.detokenize(
 <dl>
 <dd>
 
-**tokens:** `typing.Sequence[int]` — The list of tokens to be detokenized.
+**message:** `str` 
+
+Text input for the model to respond to.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2225,7 +1340,7 @@ client.detokenize(
 <dl>
 <dd>
 
-**model:** `str` — An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
+**accepts:** `typing.Optional[typing.Literal["text/event-stream"]]` — Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
     
 </dd>
 </dl>
@@ -2233,167 +1348,219 @@ client.detokenize(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**model:** `typing.Optional[str]` 
+
+Defaults to `command-r-plus-08-2024`.
+
+The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
+Compatible Deployments: Cohere Platform, Private Deployments
+
     
 </dd>
 </dl>
-</dd>
-</dl>
 
+<dl>
+<dd>
+
+**preamble:** `typing.Optional[str]` 
+
+When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
+
+The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.<a href="src/cohere/base_client.py">check_api_key</a>()</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**chat_history:** `typing.Optional[typing.Sequence[Message]]` 
+
+A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
+
+Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
+
+The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
+**conversation_id:** `typing.Optional[str]` 
+
+An alternative to `chat_history`.
+
+Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
+Compatible Deployments: Cohere Platform
+
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
-Checks that the api key in the Authorization header is valid and active
+**prompt_truncation:** `typing.Optional[ChatRequestPromptTruncation]` 
+
+Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+
+Dictates how the prompt will be constructed.
+
+With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
+
+With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
+
+With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
+
+Compatible Deployments: 
+ - AUTO: Cohere Platform Only
+ - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
+
+<dl>
+<dd>
+
+**connectors:** `typing.Optional[typing.Sequence[ChatConnector]]` 
+
+Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+
+When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
+Compatible Deployments: Cohere Platform
+
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
+**search_queries_only:** `typing.Optional[bool]` 
+
+Defaults to `false`.
+
+When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
-```python
-from cohere import Client
+**documents:** `typing.Optional[typing.Sequence[ChatDocument]]` 
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.check_api_key()
+A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
 
+Example:
 ```
-</dd>
-</dl>
+[
+  { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
+  { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
+]
+```
+
+Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
+
+Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
+
+An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
+
+An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
+
+See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**citation_quality:** `typing.Optional[ChatRequestCitationQuality]` 
+
+Defaults to `"accurate"`.
+
+Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
-</dd>
-</dl>
 
+<dl>
+<dd>
+
+**temperature:** `typing.Optional[float]` 
+
+Defaults to `0.3`.
+
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+Randomness can be further maximized by increasing the  value of the `p` parameter.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
+    
 </dd>
 </dl>
-</details>
 
-## V2
-<details><summary><code>client.v2.<a href="src/cohere/v2/client.py">chat_stream</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**max_tokens:** `typing.Optional[int]` 
 
-<dl>
-<dd>
+The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
 
-<dl>
-<dd>
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
-</dd>
-</dl>
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**max_input_tokens:** `typing.Optional[int]` 
 
-```python
-from cohere import Client
-from cohere.v2 import (
-    ChatMessage2_User,
-    ResponseFormat2_Text,
-    Tool2,
-    Tool2Function,
-)
+The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-response = client.v2.chat_stream(
-    model="string",
-    messages=[
-        ChatMessage2_User(
-            content="string",
-            documents=[{"string": {"key": "value"}}],
-        )
-    ],
-    tools=[
-        Tool2(
-            function=Tool2Function(
-                name="string",
-                description="string",
-                parameters={"string": {"key": "value"}},
-            ),
-        )
-    ],
-    citation_mode="FAST",
-    response_format=ResponseFormat2_Text(),
-    safety_mode="CONTEXTUAL",
-    max_tokens=1,
-    stop_sequences=["string"],
-    temperature=1.1,
-    seed=1,
-    frequency_penalty=1.1,
-    presence_penalty=1.1,
-    k=1.1,
-    p=1.1,
-    return_prompt=True,
-)
-for chunk in response:
-    yield chunk
+Input will be truncated according to the `prompt_truncation` parameter.
 
-```
-</dd>
-</dl>
+Compatible Deployments: Cohere Platform
+
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**k:** `typing.Optional[int]` 
+
+Ensures only the top `k` most likely tokens are considered for generation at each step.
+Defaults to `0`, min value of `0`, max value of `500`.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
     
 </dd>
 </dl>
@@ -2401,7 +1568,13 @@ for chunk in response:
 <dl>
 <dd>
 
-**messages:** `ChatMessages` 
+**p:** `typing.Optional[float]` 
+
+Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2409,11 +1582,14 @@ for chunk in response:
 <dl>
 <dd>
 
-**tools:** `typing.Optional[typing.Sequence[Tool2]]` 
+**seed:** `typing.Optional[int]` 
 
-A list of available tools (functions) that the model may suggest invoking before producing a text response.
+If specified, the backend will make a best effort to sample tokens
+deterministically, such that repeated requests with the same
+seed and parameters should return the same result. However,
+determinism cannot be totally guaranteed.
 
-When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -2422,10 +1598,11 @@ When `tools` is passed (without `tool_results`), the `text` content in the respo
 <dl>
 <dd>
 
-**citation_mode:** `typing.Optional[V2ChatStreamRequestCitationMode]` 
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` 
+
+A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
 
-Defaults to `"accurate"`.
-Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -2434,7 +1611,14 @@ Dictates the approach taken to generating citations as part of the RAG flow by a
 <dl>
 <dd>
 
-**response_format:** `typing.Optional[ResponseFormat2]` 
+**frequency_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -2442,14 +1626,11 @@ Dictates the approach taken to generating citations as part of the RAG flow by a
 <dl>
 <dd>
 
-**safety_mode:** `typing.Optional[V2ChatStreamRequestSafetyMode]` 
-
-Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-When `NONE` is specified, the safety instruction will be omitted.
+**presence_penalty:** `typing.Optional[float]` 
 
-Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
-**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
 
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -2460,7 +1641,12 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+**raw_prompting:** `typing.Optional[bool]` 
+
+When enabled, the user's prompt will be sent to the model without
+any pre-processing.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -2469,8 +1655,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
-
+**return_prompt:** `typing.Optional[bool]` — The prompt is returned in the `prompt` response field when this is enabled.
     
 </dd>
 </dl>
@@ -2478,13 +1663,13 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**temperature:** `typing.Optional[float]` 
+**tools:** `typing.Optional[typing.Sequence[Tool]]` 
 
-Defaults to `0.3`.
+A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
-A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
-Randomness can be further maximized by increasing the  value of the `p` parameter.
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -2493,24 +1678,31 @@ Randomness can be further maximized by increasing the  value of the `p` paramete
 <dl>
 <dd>
 
-**seed:** `typing.Optional[int]` 
-
-If specified, the backend will make a best effort to sample tokens
-deterministically, such that repeated requests with the same
-seed and parameters should return the same result. However,
-determinism cannot be totally guaranteed.
-
-    
-</dd>
-</dl>
+**tool_results:** `typing.Optional[typing.Sequence[ToolResult]]` 
 
-<dl>
-<dd>
+A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
+Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
 
-**frequency_penalty:** `typing.Optional[float]` 
+**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
+```
+tool_results = [
+  {
+    "call": {
+      "name": <tool name>,
+      "parameters": {
+        <param name>: <param value>
+      }
+    },
+    "outputs": [{
+      <key>: <value>
+    }]
+  },
+  ...
+]
+```
+**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
 
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -2519,11 +1711,7 @@ Used to reduce repetitiveness of generated tokens. The higher the value, the str
 <dl>
 <dd>
 
-**presence_penalty:** `typing.Optional[float]` 
-
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
+**force_single_step:** `typing.Optional[bool]` — Forces the chat to be single step. Defaults to `false`.
     
 </dd>
 </dl>
@@ -2531,11 +1719,7 @@ Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty
 <dl>
 <dd>
 
-**k:** `typing.Optional[float]` 
-
-Ensures only the top `k` most likely tokens are considered for generation at each step.
-Defaults to `0`, min value of `0`, max value of `500`.
-
+**response_format:** `typing.Optional[ResponseFormat]` 
     
 </dd>
 </dl>
@@ -2543,19 +1727,17 @@ Defaults to `0`, min value of `0`, max value of `500`.
 <dl>
 <dd>
 
-**p:** `typing.Optional[float]` 
+**safety_mode:** `typing.Optional[ChatRequestSafetyMode]` 
 
-Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+When `NONE` is specified, the safety instruction will be omitted.
 
-    
-</dd>
-</dl>
+Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
 
-<dl>
-<dd>
+**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
-**return_prompt:** `typing.Optional[bool]` — Whether to return the prompt in the response.
     
 </dd>
 </dl>
@@ -2575,7 +1757,7 @@ Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 </dl>
 </details>
 
-<details><summary><code>client.v2.<a href="src/cohere/v2/client.py">chat</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">generate_stream</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -2587,7 +1769,10 @@ Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 <dl>
 <dd>
 
-Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+<Warning>
+This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+</Warning>
+Generates realistic text conditioned on a given input.
 </dd>
 </dl>
 </dd>
@@ -2603,21 +1788,31 @@ Generates a message from the model in response to a provided conversation. To le
 
 ```python
 from cohere import Client
-from cohere.v2 import ChatMessage2_Tool
 
 client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.v2.chat(
-    model="model",
-    messages=[
-        ChatMessage2_Tool(
-            tool_call_id="messages",
-            tool_content=["messages"],
-        )
-    ],
+response = client.v1.v1.generate_stream(
+    prompt="string",
+    model="string",
+    num_generations=1,
+    max_tokens=1,
+    truncate="NONE",
+    temperature=1.1,
+    seed=1,
+    preset="string",
+    end_sequences=["string"],
+    stop_sequences=["string"],
+    k=1,
+    p=1.1,
+    frequency_penalty=1.1,
+    presence_penalty=1.1,
+    return_likelihoods="GENERATION",
+    raw_prompting=True,
 )
+for chunk in response:
+    yield chunk
 
 ```
 </dd>
@@ -2633,15 +1828,11 @@ client.v2.chat(
 <dl>
 <dd>
 
-**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
-    
-</dd>
-</dl>
+**prompt:** `str` 
 
-<dl>
-<dd>
+The input text that serves as the starting point for generating the response.
+Note: The prompt will be pre-processed and modified before reaching the model.
 
-**messages:** `ChatMessages` 
     
 </dd>
 </dl>
@@ -2649,12 +1840,10 @@ client.v2.chat(
 <dl>
 <dd>
 
-**tools:** `typing.Optional[typing.Sequence[Tool2]]` 
-
-A list of available tools (functions) that the model may suggest invoking before producing a text response.
-
-When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+**model:** `typing.Optional[str]` 
 
+The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
+Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
     
 </dd>
 </dl>
@@ -2662,19 +1851,8 @@ When `tools` is passed (without `tool_results`), the `text` content in the respo
 <dl>
 <dd>
 
-**citation_mode:** `typing.Optional[V2ChatRequestCitationMode]` 
-
-Defaults to `"accurate"`.
-Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
+**num_generations:** `typing.Optional[int]` — The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
 
-**response_format:** `typing.Optional[ResponseFormat2]` 
     
 </dd>
 </dl>
@@ -2682,16 +1860,13 @@ Dictates the approach taken to generating citations as part of the RAG flow by a
 <dl>
 <dd>
 
-**safety_mode:** `typing.Optional[V2ChatRequestSafetyMode]` 
-
-Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-When `NONE` is specified, the safety instruction will be omitted.
+**max_tokens:** `typing.Optional[int]` 
 
-Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
 
-**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
 
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
 
     
 </dd>
@@ -2700,17 +1875,13 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-    
-</dd>
-</dl>
+**truncate:** `typing.Optional[GenerateStreamRequestTruncate]` 
 
-<dl>
-<dd>
+One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
 
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
 
+If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
     
 </dd>
 </dl>
@@ -2720,11 +1891,8 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 **temperature:** `typing.Optional[float]` 
 
-Defaults to `0.3`.
-
-A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
-
-Randomness can be further maximized by increasing the  value of the `p` parameter.
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
+Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
 
     
 </dd>
@@ -2739,6 +1907,7 @@ If specified, the backend will make a best effort to sample tokens
 deterministically, such that repeated requests with the same
 seed and parameters should return the same result. However,
 determinism cannot be totally guaranteed.
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -2747,10 +1916,10 @@ determinism cannot be totally guaranteed.
 <dl>
 <dd>
 
-**frequency_penalty:** `typing.Optional[float]` 
+**preset:** `typing.Optional[str]` 
 
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
+When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
 
     
 </dd>
@@ -2759,11 +1928,15 @@ Used to reduce repetitiveness of generated tokens. The higher the value, the str
 <dl>
 <dd>
 
-**presence_penalty:** `typing.Optional[float]` 
+**end_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
+    
+</dd>
+</dl>
 
-Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+<dl>
+<dd>
 
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
     
 </dd>
 </dl>
@@ -2771,7 +1944,7 @@ Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty
 <dl>
 <dd>
 
-**k:** `typing.Optional[float]` 
+**k:** `typing.Optional[int]` 
 
 Ensures only the top `k` most likely tokens are considered for generation at each step.
 Defaults to `0`, min value of `0`, max value of `500`.
@@ -2795,73 +1968,53 @@ Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 <dl>
 <dd>
 
-**return_prompt:** `typing.Optional[bool]` — Whether to return the prompt in the response.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
+**frequency_penalty:** `typing.Optional[float]` 
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
-    
-</dd>
-</dl>
-</dd>
-</dl>
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
 
+Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
 
+    
 </dd>
 </dl>
-</details>
 
-## EmbedJobs
-<details><summary><code>client.embed_jobs.<a href="src/cohere/embed_jobs/client.py">list</a>()</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**presence_penalty:** `typing.Optional[float]` 
 
-<dl>
-<dd>
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
-<dl>
-<dd>
+Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
 
-The list embed job endpoint allows users to view all embed jobs history for that specific user.
-</dd>
-</dl>
+Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**return_likelihoods:** `typing.Optional[GenerateStreamRequestReturnLikelihoods]` 
 
-```python
-from cohere import Client
+One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.embed_jobs.list()
+If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
 
-```
-</dd>
-</dl>
+If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
+**raw_prompting:** `typing.Optional[bool]` — When enabled, the user's prompt will be sent to the model without any pre-processing.
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
@@ -2877,7 +2030,7 @@ client.embed_jobs.list()
 </dl>
 </details>
 
-<details><summary><code>client.embed_jobs.<a href="src/cohere/embed_jobs/client.py">create</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">generate</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -2889,7 +2042,10 @@ client.embed_jobs.list()
 <dl>
 <dd>
 
-This API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.
+<Warning>
+This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+</Warning>
+Generates realistic text conditioned on a given input.
 </dd>
 </dl>
 </dd>
@@ -2910,10 +2066,8 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.embed_jobs.create(
-    model="model",
-    dataset_id="dataset_id",
-    input_type="search_document",
+client.v1.v1.generate(
+    prompt="Please explain to me how LLMs work",
 )
 
 ```
@@ -2930,16 +2084,10 @@ client.embed_jobs.create(
 <dl>
 <dd>
 
-**model:** `str` 
-
-ID of the embedding model.
-
-Available models and corresponding embedding dimensions:
+**prompt:** `str` 
 
-- `embed-english-v3.0` : 1024
-- `embed-multilingual-v3.0` : 1024
-- `embed-english-light-v3.0` : 384
-- `embed-multilingual-light-v3.0` : 384
+The input text that serves as the starting point for generating the response.
+Note: The prompt will be pre-processed and modified before reaching the model.
 
     
 </dd>
@@ -2948,7 +2096,10 @@ Available models and corresponding embedding dimensions:
 <dl>
 <dd>
 
-**dataset_id:** `str` — ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`
+**model:** `typing.Optional[str]` 
+
+The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
+Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
     
 </dd>
 </dl>
@@ -2956,7 +2107,8 @@ Available models and corresponding embedding dimensions:
 <dl>
 <dd>
 
-**input_type:** `EmbedInputType` 
+**num_generations:** `typing.Optional[int]` — The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
+
     
 </dd>
 </dl>
@@ -2964,23 +2116,14 @@ Available models and corresponding embedding dimensions:
 <dl>
 <dd>
 
-**name:** `typing.Optional[str]` — The name of the embed job.
-    
-</dd>
-</dl>
+**max_tokens:** `typing.Optional[int]` 
 
-<dl>
-<dd>
+The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
 
-**embedding_types:** `typing.Optional[typing.Sequence[EmbeddingType]]` 
+This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
 
-Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
+Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
 
-* `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
-* `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
-* `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
-* `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
-* `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
     
 </dd>
 </dl>
@@ -2988,12 +2131,13 @@ Specifies the types of embeddings you want to get back. Not required and default
 <dl>
 <dd>
 
-**truncate:** `typing.Optional[CreateEmbedJobRequestTruncate]` 
+**truncate:** `typing.Optional[GenerateRequestTruncate]` 
 
-One of `START|END` to specify how the API will handle inputs longer than the maximum token length.
+One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
 
 Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
 
+If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
     
 </dd>
 </dl>
@@ -3001,70 +2145,78 @@ Passing `START` will discard the start of the input. `END` will discard the end
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**temperature:** `typing.Optional[float]` 
+
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
+Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
+
     
 </dd>
 </dl>
-</dd>
-</dl>
 
+<dl>
+<dd>
+
+**seed:** `typing.Optional[int]` 
+
+If specified, the backend will make a best effort to sample tokens
+deterministically, such that repeated requests with the same
+seed and parameters should return the same result. However,
+determinism cannot be totally guaranteed.
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.embed_jobs.<a href="src/cohere/embed_jobs/client.py">get</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**preset:** `typing.Optional[str]` 
 
-<dl>
-<dd>
+Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
+When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
+
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
-This API retrieves the details about an embed job started by the same user.
-</dd>
-</dl>
+**end_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
-```python
-from cohere import Client
+**k:** `typing.Optional[int]` 
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.embed_jobs.get(
-    id="id",
-)
+Ensures only the top `k` most likely tokens are considered for generation at each step.
+Defaults to `0`, min value of `0`, max value of `500`.
 
-```
-</dd>
-</dl>
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**p:** `typing.Optional[float]` 
+
+Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 
-**id:** `str` — The ID of the embed job to retrieve.
     
 </dd>
 </dl>
@@ -3072,70 +2224,49 @@ client.embed_jobs.get(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
-    
-</dd>
-</dl>
-</dd>
-</dl>
+**frequency_penalty:** `typing.Optional[float]` 
 
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.embed_jobs.<a href="src/cohere/embed_jobs/client.py">cancel</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**presence_penalty:** `typing.Optional[float]` 
 
-<dl>
-<dd>
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
-<dl>
-<dd>
+Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
 
-This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.
-</dd>
-</dl>
+Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**return_likelihoods:** `typing.Optional[GenerateRequestReturnLikelihoods]` 
 
-```python
-from cohere import Client
+One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.embed_jobs.cancel(
-    id="id",
-)
+If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
 
-```
-</dd>
-</dl>
+If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
-<dl>
-<dd>
-
 <dl>
 <dd>
 
-**id:** `str` — The ID of the embed job to cancel.
+**raw_prompting:** `typing.Optional[bool]` — When enabled, the user's prompt will be sent to the model without any pre-processing.
     
 </dd>
 </dl>
@@ -3155,8 +2286,7 @@ client.embed_jobs.cancel(
 </dl>
 </details>
 
-## Datasets
-<details><summary><code>client.datasets.<a href="src/cohere/datasets/client.py">list</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">embed</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -3168,7 +2298,11 @@ client.embed_jobs.cancel(
 <dl>
 <dd>
 
-List datasets that have been created.
+This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
+
+Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
+
+If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
 </dd>
 </dl>
 </dd>
@@ -3189,7 +2323,7 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.datasets.list()
+client.v1.v1.embed()
 
 ```
 </dd>
@@ -3205,7 +2339,7 @@ client.datasets.list()
 <dl>
 <dd>
 
-**dataset_type:** `typing.Optional[str]` — optional filter by dataset type
+**texts:** `typing.Optional[typing.Sequence[str]]` — An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
     
 </dd>
 </dl>
@@ -3213,7 +2347,11 @@ client.datasets.list()
 <dl>
 <dd>
 
-**before:** `typing.Optional[dt.datetime]` — optional filter before a date
+**images:** `typing.Optional[typing.Sequence[str]]` 
+
+An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
+
+The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
     
 </dd>
 </dl>
@@ -3221,7 +2359,22 @@ client.datasets.list()
 <dl>
 <dd>
 
-**after:** `typing.Optional[dt.datetime]` — optional filter after a date
+**model:** `typing.Optional[str]` 
+
+Defaults to embed-english-v2.0
+
+The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+
+Available models and corresponding embedding dimensions:
+
+* `embed-english-v3.0`  1024
+* `embed-multilingual-v3.0`  1024
+* `embed-english-light-v3.0`  384
+* `embed-multilingual-light-v3.0`  384
+
+* `embed-english-v2.0`  4096
+* `embed-english-light-v2.0`  1024
+* `embed-multilingual-v2.0`  768
     
 </dd>
 </dl>
@@ -3229,7 +2382,7 @@ client.datasets.list()
 <dl>
 <dd>
 
-**limit:** `typing.Optional[float]` — optional limit to number of results
+**input_type:** `typing.Optional[EmbedInputType]` 
     
 </dd>
 </dl>
@@ -3237,7 +2390,15 @@ client.datasets.list()
 <dl>
 <dd>
 
-**offset:** `typing.Optional[float]` — optional offset to start of results
+**embedding_types:** `typing.Optional[typing.Sequence[EmbeddingType]]` 
+
+Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
+
+* `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
+* `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
+* `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
+* `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
+* `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
     
 </dd>
 </dl>
@@ -3245,7 +2406,13 @@ client.datasets.list()
 <dl>
 <dd>
 
-**validation_status:** `typing.Optional[DatasetValidationStatus]` — optional filter by validation status
+**truncate:** `typing.Optional[EmbedRequestTruncate]` 
+
+One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+
+Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+
+If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
     
 </dd>
 </dl>
@@ -3265,7 +2432,7 @@ client.datasets.list()
 </dl>
 </details>
 
-<details><summary><code>client.datasets.<a href="src/cohere/datasets/client.py">create</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">rerank</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -3277,7 +2444,7 @@ client.datasets.list()
 <dl>
 <dd>
 
-Create a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.
+This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
 </dd>
 </dl>
 </dd>
@@ -3298,9 +2465,9 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.datasets.create(
-    name="name",
-    type="embed-input",
+client.v1.v1.rerank(
+    query="query",
+    documents=["documents"],
 )
 
 ```
@@ -3312,46 +2479,12 @@ client.datasets.create(
 #### ⚙️ Parameters
 
 <dl>
-<dd>
-
-<dl>
-<dd>
-
-**name:** `str` — The name of the uploaded dataset.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**type:** `DatasetType` — The dataset type, which is used to validate the data. Valid types are `embed-input`, `reranker-finetune-input`, `single-label-classification-finetune-input`, `chat-finetune-input`, and `multi-label-classification-finetune-input`.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**data:** `from __future__ import annotations
-
-core.File` — See core.File for more documentation
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**keep_original_file:** `typing.Optional[bool]` — Indicates if the original file should be stored.
-    
-</dd>
-</dl>
+<dd>
 
 <dl>
 <dd>
 
-**skip_malformed_input:** `typing.Optional[bool]` — Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.
+**query:** `str` — The search query
     
 </dd>
 </dl>
@@ -3359,7 +2492,14 @@ core.File` — See core.File for more documentation
 <dl>
 <dd>
 
-**keep_fields:** `typing.Optional[typing.Union[str, typing.Sequence[str]]]` — List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.
+**documents:** `typing.Sequence[RerankRequestDocumentsItem]` 
+
+A list of document objects or strings to rerank.
+If a document is provided the text fields is required and all other fields will be preserved in the response.
+
+The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
+
+We recommend a maximum of 1,000 documents for optimal endpoint performance.
     
 </dd>
 </dl>
@@ -3367,7 +2507,7 @@ core.File` — See core.File for more documentation
 <dl>
 <dd>
 
-**optional_fields:** `typing.Optional[typing.Union[str, typing.Sequence[str]]]` — List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.
+**model:** `typing.Optional[str]` — The identifier of the model to use, one of : `rerank-english-v3.0`, `rerank-multilingual-v3.0`, `rerank-english-v2.0`, `rerank-multilingual-v2.0`
     
 </dd>
 </dl>
@@ -3375,7 +2515,7 @@ core.File` — See core.File for more documentation
 <dl>
 <dd>
 
-**text_separator:** `typing.Optional[str]` — Raw .txt uploads will be split into entries using the text_separator value.
+**top_n:** `typing.Optional[int]` — The number of most relevant documents or indices to return, defaults to the length of the documents
     
 </dd>
 </dl>
@@ -3383,7 +2523,7 @@ core.File` — See core.File for more documentation
 <dl>
 <dd>
 
-**csv_delimiter:** `typing.Optional[str]` — The delimiter used for .csv uploads.
+**rank_fields:** `typing.Optional[typing.Sequence[str]]` — If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.
     
 </dd>
 </dl>
@@ -3391,7 +2531,10 @@ core.File` — See core.File for more documentation
 <dl>
 <dd>
 
-**dry_run:** `typing.Optional[bool]` — flag to enable dry_run mode
+**return_documents:** `typing.Optional[bool]` 
+
+- If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
+- If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
     
 </dd>
 </dl>
@@ -3399,9 +2542,7 @@ core.File` — See core.File for more documentation
 <dl>
 <dd>
 
-**eval_data:** `from __future__ import annotations
-
-typing.Optional[core.File]` — See core.File for more documentation
+**max_chunks_per_doc:** `typing.Optional[int]` — The maximum number of chunks to produce internally from a document
     
 </dd>
 </dl>
@@ -3421,7 +2562,7 @@ typing.Optional[core.File]` — See core.File for more documentation
 </dl>
 </details>
 
-<details><summary><code>client.datasets.<a href="src/cohere/datasets/client.py">get_usage</a>()</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">classify</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -3433,7 +2574,8 @@ typing.Optional[core.File]` — See core.File for more documentation
 <dl>
 <dd>
 
-View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.
+This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
+Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
 </dd>
 </dl>
 </dd>
@@ -3454,7 +2596,9 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.datasets.get_usage()
+client.v1.v1.classify(
+    inputs=["inputs"],
+)
 
 ```
 </dd>
@@ -3470,70 +2614,50 @@ client.datasets.get_usage()
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
-    
-</dd>
-</dl>
-</dd>
-</dl>
-
+**inputs:** `typing.Sequence[str]` 
 
+A list of up to 96 texts to be classified. Each one must be a non-empty string.
+There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
+Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.datasets.<a href="src/cohere/datasets/client.py">get</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**examples:** `typing.Optional[typing.Sequence[ClassifyExample]]` 
 
-<dl>
-<dd>
+An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
+Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
-Retrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.
-</dd>
-</dl>
+**model:** `typing.Optional[str]` — The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
-<dl>
-<dd>
-
-```python
-from cohere import Client
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.datasets.get(
-    id="id",
-)
-
-```
-</dd>
-</dl>
+**preset:** `typing.Optional[str]` — The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
-<dl>
-<dd>
+**truncate:** `typing.Optional[ClassifyRequestTruncate]` 
 
-**id:** `str` 
+One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
     
 </dd>
 </dl>
@@ -3553,7 +2677,7 @@ client.datasets.get(
 </dl>
 </details>
 
-<details><summary><code>client.datasets.<a href="src/cohere/datasets/client.py">delete</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">summarize</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -3565,7 +2689,10 @@ client.datasets.get(
 <dl>
 <dd>
 
-Delete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.
+<Warning>
+This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+</Warning>
+Generates a summary in English for a given text.
 </dd>
 </dl>
 </dd>
@@ -3586,8 +2713,8 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.datasets.delete(
-    id="id",
+client.v1.v1.summarize(
+    text="text",
 )
 
 ```
@@ -3604,7 +2731,7 @@ client.datasets.delete(
 <dl>
 <dd>
 
-**id:** `str` 
+**text:** `str` — The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
     
 </dd>
 </dl>
@@ -3612,69 +2739,39 @@ client.datasets.delete(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**length:** `typing.Optional[SummarizeRequestLength]` — One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
     
 </dd>
 </dl>
-</dd>
-</dl>
-
-
-</dd>
-</dl>
-</details>
 
-## Connectors
-<details><summary><code>client.connectors.<a href="src/cohere/connectors/client.py">list</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
-
-<dl>
-<dd>
+**format:** `typing.Optional[SummarizeRequestFormat]` — One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
-Returns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.
-</dd>
-</dl>
+**model:** `typing.Optional[str]` — The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
 <dl>
 <dd>
 
-<dl>
-<dd>
-
-```python
-from cohere import Client
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.connectors.list()
-
-```
-</dd>
-</dl>
+**extractiveness:** `typing.Optional[SummarizeRequestExtractiveness]` — One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
-<dl>
-<dd>
-
 <dl>
 <dd>
 
-**limit:** `typing.Optional[float]` — Maximum number of connectors to return [0, 100].
+**temperature:** `typing.Optional[float]` — Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
     
 </dd>
 </dl>
@@ -3682,7 +2779,7 @@ client.connectors.list()
 <dl>
 <dd>
 
-**offset:** `typing.Optional[float]` — Number of connectors to skip before returning results [0, inf].
+**additional_command:** `typing.Optional[str]` — A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
     
 </dd>
 </dl>
@@ -3702,7 +2799,7 @@ client.connectors.list()
 </dl>
 </details>
 
-<details><summary><code>client.connectors.<a href="src/cohere/connectors/client.py">create</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">tokenize</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -3714,7 +2811,7 @@ client.connectors.list()
 <dl>
 <dd>
 
-Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/docs/creating-and-deploying-a-connector) for more information.
+This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
 </dd>
 </dl>
 </dd>
@@ -3735,9 +2832,9 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.connectors.create(
-    name="name",
-    url="url",
+client.v1.v1.tokenize(
+    text="tokenize me! :D",
+    model="command",
 )
 
 ```
@@ -3754,7 +2851,7 @@ client.connectors.create(
 <dl>
 <dd>
 
-**name:** `str` — A human-readable name for the connector.
+**text:** `str` — The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
     
 </dd>
 </dl>
@@ -3762,7 +2859,7 @@ client.connectors.create(
 <dl>
 <dd>
 
-**url:** `str` — The URL of the connector that will be used to search for documents.
+**model:** `str` — An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
     
 </dd>
 </dl>
@@ -3770,39 +2867,71 @@ client.connectors.create(
 <dl>
 <dd>
 
-**description:** `typing.Optional[str]` — A description of the connector.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
 
-<dl>
-<dd>
 
-**excludes:** `typing.Optional[typing.Sequence[str]]` — A list of fields to exclude from the prompt (fields remain in the document).
-    
 </dd>
 </dl>
+</details>
 
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">detokenize</a>(...)</code></summary>
 <dl>
 <dd>
 
-**oauth:** `typing.Optional[CreateConnectorOAuth]` — The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
-    
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### 🔌 Usage
+
 <dl>
 <dd>
 
-**active:** `typing.Optional[bool]` — Whether the connector is active or not.
-    
+<dl>
+<dd>
+
+```python
+from cohere import Client
+
+client = Client(
+    client_name="YOUR_CLIENT_NAME",
+    token="YOUR_TOKEN",
+)
+client.v1.v1.detokenize(
+    tokens=[1],
+    model="model",
+)
+
+```
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**continue_on_failure:** `typing.Optional[bool]` — Whether a chat request should continue or not if the request to this connector fails.
+<dl>
+<dd>
+
+**tokens:** `typing.Sequence[int]` — The list of tokens to be detokenized.
     
 </dd>
 </dl>
@@ -3810,7 +2939,7 @@ client.connectors.create(
 <dl>
 <dd>
 
-**service_auth:** `typing.Optional[CreateConnectorServiceAuth]` — The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
+**model:** `str` — An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
     
 </dd>
 </dl>
@@ -3830,7 +2959,7 @@ client.connectors.create(
 </dl>
 </details>
 
-<details><summary><code>client.connectors.<a href="src/cohere/connectors/client.py">get</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">check_api_key</a>()</code></summary>
 <dl>
 <dd>
 
@@ -3842,7 +2971,7 @@ client.connectors.create(
 <dl>
 <dd>
 
-Retrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.
+Checks that the api key in the Authorization header is valid and active
 </dd>
 </dl>
 </dd>
@@ -3863,9 +2992,7 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.connectors.get(
-    id="id",
-)
+client.v1.v1.check_api_key()
 
 ```
 </dd>
@@ -3881,14 +3008,6 @@ client.connectors.get(
 <dl>
 <dd>
 
-**id:** `str` — The ID of the connector to retrieve.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
@@ -3901,24 +3020,10 @@ client.connectors.get(
 </dl>
 </details>
 
-<details><summary><code>client.connectors.<a href="src/cohere/connectors/client.py">delete</a>(...)</code></summary>
-<dl>
-<dd>
-
-#### 📝 Description
-
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">list_finetuned_models</a>(...)</code></summary>
 <dl>
 <dd>
 
-<dl>
-<dd>
-
-Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.
-</dd>
-</dl>
-</dd>
-</dl>
-
 #### 🔌 Usage
 
 <dl>
@@ -3934,9 +3039,7 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.connectors.delete(
-    id="id",
-)
+client.v1.v1.list_finetuned_models()
 
 ```
 </dd>
@@ -3952,7 +3055,7 @@ client.connectors.delete(
 <dl>
 <dd>
 
-**id:** `str` — The ID of the connector to delete.
+**page_size:** `typing.Optional[int]` — Maximum number of results to be returned by the server. If 0, defaults to 50.
     
 </dd>
 </dl>
@@ -3960,35 +3063,45 @@ client.connectors.delete(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**page_token:** `typing.Optional[str]` — Request a specific page of the list results.
     
 </dd>
 </dl>
-</dd>
-</dl>
-
-
-</dd>
-</dl>
-</details>
 
-<details><summary><code>client.connectors.<a href="src/cohere/connectors/client.py">update</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
+**order_by:** `typing.Optional[str]` 
 
-<dl>
-<dd>
+Comma separated list of fields. For example: "created_at,name". The default
+sorting order is ascending. To specify descending order for a field, append
+" desc" to the field name. For example: "created_at desc,name".
+
+Supported sorting fields:
+
+- created_at (default)
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
-Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+    
+</dd>
+</dl>
 </dd>
 </dl>
+
+
 </dd>
 </dl>
+</details>
+
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">create_finetuned_model</a>(...)</code></summary>
+<dl>
+<dd>
 
 #### 🔌 Usage
 
@@ -4000,13 +3113,22 @@ Update a connector by ID. Omitted fields will not be updated. See ['Managing you
 
 ```python
 from cohere import Client
+from cohere.v1.finetuning.finetuning import BaseModel, FinetunedModel, Settings
 
 client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.connectors.update(
-    id="id",
+client.v1.v1.create_finetuned_model(
+    request=FinetunedModel(
+        name="api-test",
+        settings=Settings(
+            base_model=BaseModel(
+                base_type="BASE_TYPE_CHAT",
+            ),
+            dataset_id="my-dataset-id",
+        ),
+    ),
 )
 
 ```
@@ -4023,7 +3145,7 @@ client.connectors.update(
 <dl>
 <dd>
 
-**id:** `str` — The ID of the connector to update.
+**request:** `FinetunedModel` 
     
 </dd>
 </dl>
@@ -4031,55 +3153,56 @@ client.connectors.update(
 <dl>
 <dd>
 
-**name:** `typing.Optional[str]` — A human-readable name for the connector.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
 
-<dl>
-<dd>
 
-**url:** `typing.Optional[str]` — The URL of the connector that will be used to search for documents.
-    
 </dd>
 </dl>
+</details>
 
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">get_finetuned_model</a>(...)</code></summary>
 <dl>
 <dd>
 
-**excludes:** `typing.Optional[typing.Sequence[str]]` — A list of fields to exclude from the prompt (fields remain in the document).
-    
-</dd>
-</dl>
+#### 🔌 Usage
 
 <dl>
 <dd>
 
-**oauth:** `typing.Optional[CreateConnectorOAuth]` — The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
-    
-</dd>
-</dl>
-
 <dl>
 <dd>
 
-**active:** `typing.Optional[bool]` 
-    
+```python
+from cohere import Client
+
+client = Client(
+    client_name="YOUR_CLIENT_NAME",
+    token="YOUR_TOKEN",
+)
+client.v1.v1.get_finetuned_model(
+    id="id",
+)
+
+```
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### ⚙️ Parameters
+
 <dl>
 <dd>
 
-**continue_on_failure:** `typing.Optional[bool]` 
-    
-</dd>
-</dl>
-
 <dl>
 <dd>
 
-**service_auth:** `typing.Optional[CreateConnectorServiceAuth]` — The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
+**id:** `str` — The fine-tuned model ID.
     
 </dd>
 </dl>
@@ -4099,24 +3222,10 @@ client.connectors.update(
 </dl>
 </details>
 
-<details><summary><code>client.connectors.<a href="src/cohere/connectors/client.py">o_auth_authorize</a>(...)</code></summary>
-<dl>
-<dd>
-
-#### 📝 Description
-
-<dl>
-<dd>
-
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">delete_finetuned_model</a>(...)</code></summary>
 <dl>
 <dd>
 
-Authorize the connector with the given ID for the connector oauth app. See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.
-</dd>
-</dl>
-</dd>
-</dl>
-
 #### 🔌 Usage
 
 <dl>
@@ -4132,7 +3241,7 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.connectors.o_auth_authorize(
+client.v1.v1.delete_finetuned_model(
     id="id",
 )
 
@@ -4150,15 +3259,7 @@ client.connectors.o_auth_authorize(
 <dl>
 <dd>
 
-**id:** `str` — The ID of the connector to authorize.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**after_token_redirect:** `typing.Optional[str]` — The URL to redirect to after the connector has been authorized.
+**id:** `str` — The fine-tuned model ID.
     
 </dd>
 </dl>
@@ -4178,25 +3279,10 @@ client.connectors.o_auth_authorize(
 </dl>
 </details>
 
-## Models
-<details><summary><code>client.models.<a href="src/cohere/models/client.py">get</a>(...)</code></summary>
-<dl>
-<dd>
-
-#### 📝 Description
-
-<dl>
-<dd>
-
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">update_finetuned_model</a>(...)</code></summary>
 <dl>
 <dd>
 
-Returns the details of a model, provided its name.
-</dd>
-</dl>
-</dd>
-</dl>
-
 #### 🔌 Usage
 
 <dl>
@@ -4207,13 +3293,21 @@ Returns the details of a model, provided its name.
 
 ```python
 from cohere import Client
+from cohere.v1.finetuning.finetuning import BaseModel, Settings
 
 client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.models.get(
-    model="command-r",
+client.v1.v1.update_finetuned_model(
+    id="id",
+    name="name",
+    settings=Settings(
+        base_model=BaseModel(
+            base_type="BASE_TYPE_UNSPECIFIED",
+        ),
+        dataset_id="dataset_id",
+    ),
 )
 
 ```
@@ -4230,7 +3324,7 @@ client.models.get(
 <dl>
 <dd>
 
-**model:** `str` 
+**id:** `str` — FinetunedModel ID.
     
 </dd>
 </dl>
@@ -4238,71 +3332,47 @@ client.models.get(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**name:** `str` — FinetunedModel name (e.g. `foobar`).
     
 </dd>
 </dl>
-</dd>
-</dl>
 
+<dl>
+<dd>
 
+**settings:** `Settings` — FinetunedModel settings such as dataset, hyperparameters...
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.models.<a href="src/cohere/models/client.py">list</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 📝 Description
-
-<dl>
-<dd>
+**creator_id:** `typing.Optional[str]` — User ID of the creator.
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
-Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
-</dd>
-</dl>
+**organization_id:** `typing.Optional[str]` — Organization ID.
+    
 </dd>
 </dl>
 
-#### 🔌 Usage
-
-<dl>
-<dd>
-
 <dl>
 <dd>
 
-```python
-from cohere import Client
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.models.list()
-
-```
-</dd>
-</dl>
+**status:** `typing.Optional[Status]` — Current stage in the life-cycle of the fine-tuned model.
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
-<dl>
-<dd>
-
 <dl>
 <dd>
 
-**page_size:** `typing.Optional[float]` 
-
-Maximum number of models to include in a page
-Defaults to `20`, min value of `1`, max value of `1000`.
+**created_at:** `typing.Optional[dt.datetime]` — Creation timestamp.
     
 </dd>
 </dl>
@@ -4310,7 +3380,7 @@ Defaults to `20`, min value of `1`, max value of `1000`.
 <dl>
 <dd>
 
-**page_token:** `typing.Optional[str]` — Page token provided in the `next_page_token` field of a previous response.
+**updated_at:** `typing.Optional[dt.datetime]` — Latest update timestamp.
     
 </dd>
 </dl>
@@ -4318,7 +3388,7 @@ Defaults to `20`, min value of `1`, max value of `1000`.
 <dl>
 <dd>
 
-**endpoint:** `typing.Optional[CompatibleEndpoint]` — When provided, filters the list of models to only those that are compatible with the specified endpoint.
+**completed_at:** `typing.Optional[dt.datetime]` — Timestamp for the completed fine-tuning.
     
 </dd>
 </dl>
@@ -4326,7 +3396,7 @@ Defaults to `20`, min value of `1`, max value of `1000`.
 <dl>
 <dd>
 
-**default_only:** `typing.Optional[bool]` — When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.
+**last_used:** `typing.Optional[dt.datetime]` — Timestamp for the latest request to this fine-tuned model.
     
 </dd>
 </dl>
@@ -4346,8 +3416,7 @@ Defaults to `20`, min value of `1`, max value of `1000`.
 </dl>
 </details>
 
-## /finetuning
-<details><summary><code>client.finetuning.<a href="src/cohere/finetuning/client.py">list_finetuned_models</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">list_events</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -4366,7 +3435,9 @@ client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.finetuning.list_finetuned_models()
+client.v1.v1.list_events(
+    finetuned_model_id="finetuned_model_id",
+)
 
 ```
 </dd>
@@ -4382,6 +3453,14 @@ client.finetuning.list_finetuned_models()
 <dl>
 <dd>
 
+**finetuned_model_id:** `str` — The parent fine-tuned model ID.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
 **page_size:** `typing.Optional[int]` — Maximum number of results to be returned by the server. If 0, defaults to 50.
     
 </dd>
@@ -4426,7 +3505,7 @@ Supported sorting fields:
 </dl>
 </details>
 
-<details><summary><code>client.finetuning.<a href="src/cohere/finetuning/client.py">create_finetuned_model</a>(...)</code></summary>
+<details><summary><code>client.v1.v1.<a href="src/cohere/v1/v1/client.py">list_training_step_metrics</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -4440,22 +3519,13 @@ Supported sorting fields:
 
 ```python
 from cohere import Client
-from cohere.finetuning.finetuning import BaseModel, FinetunedModel, Settings
 
 client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.finetuning.create_finetuned_model(
-    request=FinetunedModel(
-        name="api-test",
-        settings=Settings(
-            base_model=BaseModel(
-                base_type="BASE_TYPE_CHAT",
-            ),
-            dataset_id="my-dataset-id",
-        ),
-    ),
+client.v1.v1.list_training_step_metrics(
+    finetuned_model_id="finetuned_model_id",
 )
 
 ```
@@ -4472,7 +3542,7 @@ client.finetuning.create_finetuned_model(
 <dl>
 <dd>
 
-**request:** `FinetunedModel` 
+**finetuned_model_id:** `str` — The parent fine-tuned model ID.
     
 </dd>
 </dl>
@@ -4480,78 +3550,52 @@ client.finetuning.create_finetuned_model(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**page_size:** `typing.Optional[int]` — Maximum number of results to be returned by the server. If 0, defaults to 50.
     
 </dd>
 </dl>
-</dd>
-</dl>
-
-
-</dd>
-</dl>
-</details>
 
-<details><summary><code>client.finetuning.<a href="src/cohere/finetuning/client.py">get_finetuned_model</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 🔌 Usage
-
-<dl>
-<dd>
+**page_token:** `typing.Optional[str]` — Request a specific page of the list results.
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
-```python
-from cohere import Client
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.finetuning.get_finetuned_model(
-    id="id",
-)
-
-```
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+    
 </dd>
 </dl>
 </dd>
 </dl>
 
-#### ⚙️ Parameters
 
+</dd>
+</dl>
+</details>
+
+## V2
+<details><summary><code>client.v2.<a href="src/cohere/v2/client.py">chat_stream</a>(...)</code></summary>
 <dl>
 <dd>
 
+#### 📝 Description
+
 <dl>
 <dd>
 
-**id:** `str` — The fine-tuned model ID.
-    
-</dd>
-</dl>
-
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
-    
-</dd>
-</dl>
+Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
 </dd>
 </dl>
-
-
 </dd>
 </dl>
-</details>
-
-<details><summary><code>client.finetuning.<a href="src/cohere/finetuning/client.py">delete_finetuned_model</a>(...)</code></summary>
-<dl>
-<dd>
 
 #### 🔌 Usage
 
@@ -4563,14 +3607,52 @@ client.finetuning.get_finetuned_model(
 
 ```python
 from cohere import Client
+from cohere.v2.v2 import (
+    CitationOptions,
+    TextResponseFormat,
+    Tool,
+    ToolFunction,
+    UserMessage,
+)
 
 client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.finetuning.delete_finetuned_model(
-    id="id",
+response = client.v2.chat_stream(
+    model="string",
+    messages=[
+        UserMessage(
+            content="string",
+        )
+    ],
+    tools=[
+        Tool(
+            function=ToolFunction(
+                name="string",
+                description="string",
+                parameters={"string": {"key": "value"}},
+            ),
+        )
+    ],
+    documents=["string"],
+    citation_options=CitationOptions(
+        mode="FAST",
+    ),
+    response_format=TextResponseFormat(),
+    safety_mode="CONTEXTUAL",
+    max_tokens=1,
+    stop_sequences=["string"],
+    temperature=1.1,
+    seed=1,
+    frequency_penalty=1.1,
+    presence_penalty=1.1,
+    k=1.1,
+    p=1.1,
+    return_prompt=True,
 )
+for chunk in response:
+    yield chunk
 
 ```
 </dd>
@@ -4586,7 +3668,7 @@ client.finetuning.delete_finetuned_model(
 <dl>
 <dd>
 
-**id:** `str` — The fine-tuned model ID.
+**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
     
 </dd>
 </dl>
@@ -4594,64 +3676,63 @@ client.finetuning.delete_finetuned_model(
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**messages:** `ChatMessages` 
     
 </dd>
 </dl>
-</dd>
-</dl>
 
+<dl>
+<dd>
+
+**tools:** `typing.Optional[typing.Sequence[Tool]]` 
+
+A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.finetuning.<a href="src/cohere/finetuning/client.py">update_finetuned_model</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 🔌 Usage
+**documents:** `typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]` — A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.
 
-<dl>
-<dd>
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
-```python
-from cohere import Client
-from cohere.finetuning.finetuning import BaseModel, Settings
-
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.finetuning.update_finetuned_model(
-    id="id",
-    name="name",
-    settings=Settings(
-        base_model=BaseModel(
-            base_type="BASE_TYPE_UNSPECIFIED",
-        ),
-        dataset_id="dataset_id",
-    ),
-)
-
-```
-</dd>
-</dl>
+**citation_options:** `typing.Optional[CitationOptions]` 
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
+**response_format:** `typing.Optional[ResponseFormat]` 
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
-**id:** `str` — FinetunedModel ID.
+**safety_mode:** `typing.Optional[V2ChatStreamRequestSafetyMode]` 
+
+Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+When `NONE` is specified, the safety instruction will be omitted.
+
+Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
     
 </dd>
 </dl>
@@ -4659,7 +3740,8 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**name:** `str` — FinetunedModel name (e.g. `foobar`).
+**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
     
 </dd>
 </dl>
@@ -4667,7 +3749,8 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**settings:** `Settings` — FinetunedModel settings such as dataset, hyperparameters...
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
     
 </dd>
 </dl>
@@ -4675,7 +3758,14 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**creator_id:** `typing.Optional[str]` — User ID of the creator.
+**temperature:** `typing.Optional[float]` 
+
+Defaults to `0.3`.
+
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+Randomness can be further maximized by increasing the  value of the `p` parameter.
+
     
 </dd>
 </dl>
@@ -4683,7 +3773,13 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**organization_id:** `typing.Optional[str]` — Organization ID.
+**seed:** `typing.Optional[int]` 
+
+If specified, the backend will make a best effort to sample tokens
+deterministically, such that repeated requests with the same
+seed and parameters should return the same result. However,
+determinism cannot be totally guaranteed.
+
     
 </dd>
 </dl>
@@ -4691,7 +3787,11 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**status:** `typing.Optional[Status]` — Current stage in the life-cycle of the fine-tuned model.
+**frequency_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
     
 </dd>
 </dl>
@@ -4699,7 +3799,11 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**created_at:** `typing.Optional[dt.datetime]` — Creation timestamp.
+**presence_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
     
 </dd>
 </dl>
@@ -4707,7 +3811,11 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**updated_at:** `typing.Optional[dt.datetime]` — Latest update timestamp.
+**k:** `typing.Optional[float]` 
+
+Ensures only the top `k` most likely tokens are considered for generation at each step.
+Defaults to `0`, min value of `0`, max value of `500`.
+
     
 </dd>
 </dl>
@@ -4715,7 +3823,11 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**completed_at:** `typing.Optional[dt.datetime]` — Timestamp for the completed fine-tuning.
+**p:** `typing.Optional[float]` 
+
+Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
     
 </dd>
 </dl>
@@ -4723,7 +3835,7 @@ client.finetuning.update_finetuned_model(
 <dl>
 <dd>
 
-**last_used:** `typing.Optional[dt.datetime]` — Timestamp for the latest request to this fine-tuned model.
+**return_prompt:** `typing.Optional[bool]` — Whether to return the prompt in the response.
     
 </dd>
 </dl>
@@ -4743,10 +3855,24 @@ client.finetuning.update_finetuned_model(
 </dl>
 </details>
 
-<details><summary><code>client.finetuning.<a href="src/cohere/finetuning/client.py">list_events</a>(...)</code></summary>
+<details><summary><code>client.v2.<a href="src/cohere/v2/client.py">chat</a>(...)</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
+
+<dl>
+<dd>
+
 <dl>
 <dd>
 
+Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+</dd>
+</dl>
+</dd>
+</dl>
+
 #### 🔌 Usage
 
 <dl>
@@ -4757,13 +3883,19 @@ client.finetuning.update_finetuned_model(
 
 ```python
 from cohere import Client
+from cohere.v2.v2 import UserMessage
 
 client = Client(
     client_name="YOUR_CLIENT_NAME",
     token="YOUR_TOKEN",
 )
-client.finetuning.list_events(
-    finetuned_model_id="finetuned_model_id",
+client.v2.chat(
+    model="model",
+    messages=[
+        UserMessage(
+            content="content",
+        )
+    ],
 )
 
 ```
@@ -4780,7 +3912,7 @@ client.finetuning.list_events(
 <dl>
 <dd>
 
-**finetuned_model_id:** `str` — The parent fine-tuned model ID.
+**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
     
 </dd>
 </dl>
@@ -4788,7 +3920,7 @@ client.finetuning.list_events(
 <dl>
 <dd>
 
-**page_size:** `typing.Optional[int]` — Maximum number of results to be returned by the server. If 0, defaults to 50.
+**messages:** `ChatMessages` 
     
 </dd>
 </dl>
@@ -4796,7 +3928,12 @@ client.finetuning.list_events(
 <dl>
 <dd>
 
-**page_token:** `typing.Optional[str]` — Request a specific page of the list results.
+**tools:** `typing.Optional[typing.Sequence[Tool]]` 
+
+A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
     
 </dd>
 </dl>
@@ -4804,15 +3941,16 @@ client.finetuning.list_events(
 <dl>
 <dd>
 
-**order_by:** `typing.Optional[str]` 
+**documents:** `typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]` — A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.
 
-Comma separated list of fields. For example: "created_at,name". The default
-sorting order is ascending. To specify descending order for a field, append
-" desc" to the field name. For example: "created_at desc,name".
+    
+</dd>
+</dl>
 
-Supported sorting fields:
+<dl>
+<dd>
 
-- created_at (default)
+**citation_options:** `typing.Optional[CitationOptions]` 
     
 </dd>
 </dl>
@@ -4820,56 +3958,108 @@ Supported sorting fields:
 <dl>
 <dd>
 
-**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+**response_format:** `typing.Optional[ResponseFormat]` 
     
 </dd>
 </dl>
+
+<dl>
+<dd>
+
+**safety_mode:** `typing.Optional[V2ChatRequestSafetyMode]` 
+
+Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+When `NONE` is specified, the safety instruction will be omitted.
+
+Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
 
+<dl>
+<dd>
+
+**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
 
+    
 </dd>
 </dl>
-</details>
 
-<details><summary><code>client.finetuning.<a href="src/cohere/finetuning/client.py">list_training_step_metrics</a>(...)</code></summary>
 <dl>
 <dd>
 
-#### 🔌 Usage
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
+    
+</dd>
+</dl>
 
 <dl>
 <dd>
 
+**temperature:** `typing.Optional[float]` 
+
+Defaults to `0.3`.
+
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+Randomness can be further maximized by increasing the  value of the `p` parameter.
+
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
-```python
-from cohere import Client
+**seed:** `typing.Optional[int]` 
 
-client = Client(
-    client_name="YOUR_CLIENT_NAME",
-    token="YOUR_TOKEN",
-)
-client.finetuning.list_training_step_metrics(
-    finetuned_model_id="finetuned_model_id",
-)
+If specified, the backend will make a best effort to sample tokens
+deterministically, such that repeated requests with the same
+seed and parameters should return the same result. However,
+determinism cannot be totally guaranteed.
 
-```
+    
 </dd>
 </dl>
+
+<dl>
+<dd>
+
+**frequency_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+    
 </dd>
 </dl>
 
-#### ⚙️ Parameters
-
 <dl>
 <dd>
 
+**presence_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+    
+</dd>
+</dl>
+
 <dl>
 <dd>
 
-**finetuned_model_id:** `str` — The parent fine-tuned model ID.
+**k:** `typing.Optional[float]` 
+
+Ensures only the top `k` most likely tokens are considered for generation at each step.
+Defaults to `0`, min value of `0`, max value of `500`.
+
     
 </dd>
 </dl>
@@ -4877,7 +4067,11 @@ client.finetuning.list_training_step_metrics(
 <dl>
 <dd>
 
-**page_size:** `typing.Optional[int]` — Maximum number of results to be returned by the server. If 0, defaults to 50.
+**p:** `typing.Optional[float]` 
+
+Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
     
 </dd>
 </dl>
@@ -4885,7 +4079,7 @@ client.finetuning.list_training_step_metrics(
 <dl>
 <dd>
 
-**page_token:** `typing.Optional[str]` — Request a specific page of the list results.
+**return_prompt:** `typing.Optional[bool]` — Whether to return the prompt in the response.
     
 </dd>
 </dl>
diff --git a/src/cohere/__init__.py b/src/cohere/__init__.py
index adad8ada0..940619bda 100644
--- a/src/cohere/__init__.py
+++ b/src/cohere/__init__.py
@@ -1,524 +1,28 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from .types import (
-    ApiMeta,
-    ApiMetaApiVersion,
-    ApiMetaBilledUnits,
-    ApiMetaTokens,
-    AuthTokenType,
-    ChatCitation,
-    ChatCitationGenerationEvent,
-    ChatConnector,
-    ChatDataMetrics,
-    ChatDocument,
-    ChatMessage,
-    ChatRequestCitationQuality,
-    ChatRequestConnectorsSearchOptions,
-    ChatRequestPromptTruncation,
-    ChatRequestSafetyMode,
-    ChatSearchQueriesGenerationEvent,
-    ChatSearchQuery,
-    ChatSearchResult,
-    ChatSearchResultConnector,
-    ChatSearchResultsEvent,
-    ChatStreamEndEvent,
-    ChatStreamEndEventFinishReason,
-    ChatStreamEvent,
-    ChatStreamRequestCitationQuality,
-    ChatStreamRequestConnectorsSearchOptions,
-    ChatStreamRequestPromptTruncation,
-    ChatStreamRequestSafetyMode,
-    ChatStreamStartEvent,
-    ChatTextGenerationEvent,
-    ChatToolCallsChunkEvent,
-    ChatToolCallsGenerationEvent,
-    CheckApiKeyResponse,
-    ClassifyDataMetrics,
-    ClassifyExample,
-    ClassifyRequestTruncate,
-    ClassifyResponse,
-    ClassifyResponseClassificationsItem,
-    ClassifyResponseClassificationsItemClassificationType,
-    ClassifyResponseClassificationsItemLabelsValue,
-    ClientClosedRequestErrorBody,
-    CompatibleEndpoint,
-    Connector,
-    ConnectorAuthStatus,
-    ConnectorOAuth,
-    CreateConnectorOAuth,
-    CreateConnectorResponse,
-    CreateConnectorServiceAuth,
-    CreateEmbedJobResponse,
-    Dataset,
-    DatasetPart,
-    DatasetType,
-    DatasetValidationStatus,
-    DeleteConnectorResponse,
-    DetokenizeResponse,
-    EmbedByTypeResponse,
-    EmbedByTypeResponseEmbeddings,
-    EmbedFloatsResponse,
-    EmbedInputType,
-    EmbedJob,
-    EmbedJobStatus,
-    EmbedJobTruncate,
-    EmbedRequestTruncate,
-    EmbedResponse,
-    EmbedResponse_EmbeddingsByType,
-    EmbedResponse_EmbeddingsFloats,
-    EmbeddingType,
-    FinetuneDatasetMetrics,
-    FinishReason,
-    GatewayTimeoutErrorBody,
-    GenerateRequestReturnLikelihoods,
-    GenerateRequestTruncate,
-    GenerateStreamEnd,
-    GenerateStreamEndResponse,
-    GenerateStreamError,
-    GenerateStreamEvent,
-    GenerateStreamRequestReturnLikelihoods,
-    GenerateStreamRequestTruncate,
-    GenerateStreamText,
-    GenerateStreamedResponse,
-    GenerateStreamedResponse_StreamEnd,
-    GenerateStreamedResponse_StreamError,
-    GenerateStreamedResponse_TextGeneration,
-    Generation,
-    GetConnectorResponse,
-    GetModelResponse,
-    JsonResponseFormat,
-    LabelMetric,
-    ListConnectorsResponse,
-    ListEmbedJobResponse,
-    ListModelsResponse,
-    Message,
-    Message_Chatbot,
-    Message_System,
-    Message_Tool,
-    Message_User,
-    Metrics,
-    MetricsEmbedData,
-    MetricsEmbedDataFieldsItem,
-    NonStreamedChatResponse,
-    NotImplementedErrorBody,
-    OAuthAuthorizeResponse,
-    ParseInfo,
-    RerankDocument,
-    RerankRequestDocumentsItem,
-    RerankResponse,
-    RerankResponseResultsItem,
-    RerankResponseResultsItemDocument,
-    RerankerDataMetrics,
-    ResponseFormat,
-    ResponseFormat_JsonObject,
-    ResponseFormat_Text,
-    SingleGeneration,
-    SingleGenerationInStream,
-    SingleGenerationTokenLikelihoodsItem,
-    StreamedChatResponse,
-    StreamedChatResponse_CitationGeneration,
-    StreamedChatResponse_SearchQueriesGeneration,
-    StreamedChatResponse_SearchResults,
-    StreamedChatResponse_StreamEnd,
-    StreamedChatResponse_StreamStart,
-    StreamedChatResponse_TextGeneration,
-    StreamedChatResponse_ToolCallsChunk,
-    StreamedChatResponse_ToolCallsGeneration,
-    SummarizeRequestExtractiveness,
-    SummarizeRequestFormat,
-    SummarizeRequestLength,
-    SummarizeResponse,
-    TextResponseFormat,
-    TokenizeResponse,
-    TooManyRequestsErrorBody,
-    Tool,
-    ToolCall,
-    ToolCallDelta,
-    ToolMessage,
-    ToolParameterDefinitionsValue,
-    ToolResult,
-    UnprocessableEntityErrorBody,
-    UpdateConnectorResponse,
-)
-from .errors import (
-    BadRequestError,
-    ClientClosedRequestError,
-    ForbiddenError,
-    GatewayTimeoutError,
-    InternalServerError,
-    NotFoundError,
-    NotImplementedError,
-    ServiceUnavailableError,
-    TooManyRequestsError,
-    UnauthorizedError,
-    UnprocessableEntityError,
-)
-from . import connectors, datasets, embed_jobs, finetuning, models, v2
+from .types import ChatDocument, ComponentsSchemasTextContent, RerankDocument
+from . import v1, v2
 from .aws_client import AwsClient
 from .bedrock_client import BedrockClient
 from .client import AsyncClient, Client
 from .client_v2 import AsyncClientV2, ClientV2
-from .datasets import (
-    DatasetsCreateResponse,
-    DatasetsCreateResponseDatasetPartsItem,
-    DatasetsGetResponse,
-    DatasetsGetUsageResponse,
-    DatasetsListResponse,
-)
-from .embed_jobs import CreateEmbedJobRequestTruncate
 from .environment import ClientEnvironment
 from .sagemaker_client import SagemakerClient
-from .v2 import (
-    AssistantMessage,
-    AssistantMessageContent,
-    AssistantMessageContentItem,
-    AssistantMessageContentItem_Text,
-    AssistantMessageResponse,
-    AssistantMessageResponseContentItem,
-    AssistantMessageResponseContentItem_Text,
-    ChatContentDeltaEvent,
-    ChatContentDeltaEventDelta,
-    ChatContentDeltaEventDeltaMessage,
-    ChatContentDeltaEventDeltaMessageContent,
-    ChatContentEndEvent,
-    ChatContentStartEvent,
-    ChatContentStartEventDelta,
-    ChatContentStartEventDeltaMessage,
-    ChatContentStartEventDeltaMessageContent,
-    ChatFinishReason,
-    ChatMessage2,
-    ChatMessage2_Assistant,
-    ChatMessage2_System,
-    ChatMessage2_Tool,
-    ChatMessage2_User,
-    ChatMessageEndEvent,
-    ChatMessageEndEventDelta,
-    ChatMessageStartEvent,
-    ChatMessageStartEventDelta,
-    ChatMessageStartEventDeltaMessage,
-    ChatMessages,
-    ChatStreamEventType,
-    ChatToolCallDeltaEvent,
-    ChatToolCallDeltaEventDelta,
-    ChatToolCallDeltaEventDeltaToolCall,
-    ChatToolCallDeltaEventDeltaToolCallFunction,
-    ChatToolCallEndEvent,
-    ChatToolCallStartEvent,
-    ChatToolCallStartEventDelta,
-    ChatToolCallStartEventDeltaToolCall,
-    ChatToolCallStartEventDeltaToolCallFunction,
-    ChatToolPlanDeltaEvent,
-    ChatToolPlanDeltaEventDelta,
-    Citation,
-    CitationEndEvent,
-    CitationStartEvent,
-    CitationStartEventDelta,
-    CitationStartEventDeltaMessage,
-    Content,
-    Content_Text,
-    DocumentSource,
-    JsonResponseFormat2,
-    NonStreamedChatResponse2,
-    ResponseFormat2,
-    ResponseFormat2_JsonObject,
-    ResponseFormat2_Text,
-    Source,
-    Source_Document,
-    Source_Tool,
-    StreamedChatResponse2,
-    StreamedChatResponse2_CitationEnd,
-    StreamedChatResponse2_CitationStart,
-    StreamedChatResponse2_ContentDelta,
-    StreamedChatResponse2_ContentEnd,
-    StreamedChatResponse2_ContentStart,
-    StreamedChatResponse2_MessageEnd,
-    StreamedChatResponse2_MessageStart,
-    StreamedChatResponse2_ToolCallDelta,
-    StreamedChatResponse2_ToolCallEnd,
-    StreamedChatResponse2_ToolCallStart,
-    StreamedChatResponse2_ToolPlanDelta,
-    SystemMessage,
-    SystemMessageContent,
-    SystemMessageContentItem,
-    SystemMessageContentItem_Text,
-    TextContent,
-    TextResponseFormat2,
-    Tool2,
-    Tool2Function,
-    ToolCall2,
-    ToolCall2Function,
-    ToolMessage2,
-    ToolSource,
-    Usage,
-    UsageBilledUnits,
-    UsageTokens,
-    UserMessage,
-    UserMessageContent,
-    V2ChatRequestCitationMode,
-    V2ChatRequestSafetyMode,
-    V2ChatStreamRequestCitationMode,
-    V2ChatStreamRequestSafetyMode,
-)
 from .version import __version__
 
 __all__ = [
-    "ApiMeta",
-    "ApiMetaApiVersion",
-    "ApiMetaBilledUnits",
-    "ApiMetaTokens",
-    "AssistantMessage",
-    "AssistantMessageContent",
-    "AssistantMessageContentItem",
-    "AssistantMessageContentItem_Text",
-    "AssistantMessageResponse",
-    "AssistantMessageResponseContentItem",
-    "AssistantMessageResponseContentItem_Text",
     "AsyncClient",
     "AsyncClientV2",
-    "AuthTokenType",
     "AwsClient",
-    "BadRequestError",
     "BedrockClient",
-    "ChatCitation",
-    "ChatCitationGenerationEvent",
-    "ChatConnector",
-    "ChatContentDeltaEvent",
-    "ChatContentDeltaEventDelta",
-    "ChatContentDeltaEventDeltaMessage",
-    "ChatContentDeltaEventDeltaMessageContent",
-    "ChatContentEndEvent",
-    "ChatContentStartEvent",
-    "ChatContentStartEventDelta",
-    "ChatContentStartEventDeltaMessage",
-    "ChatContentStartEventDeltaMessageContent",
-    "ChatDataMetrics",
     "ChatDocument",
-    "ChatFinishReason",
-    "ChatMessage",
-    "ChatMessage2",
-    "ChatMessage2_Assistant",
-    "ChatMessage2_System",
-    "ChatMessage2_Tool",
-    "ChatMessage2_User",
-    "ChatMessageEndEvent",
-    "ChatMessageEndEventDelta",
-    "ChatMessageStartEvent",
-    "ChatMessageStartEventDelta",
-    "ChatMessageStartEventDeltaMessage",
-    "ChatMessages",
-    "ChatRequestCitationQuality",
-    "ChatRequestConnectorsSearchOptions",
-    "ChatRequestPromptTruncation",
-    "ChatRequestSafetyMode",
-    "ChatSearchQueriesGenerationEvent",
-    "ChatSearchQuery",
-    "ChatSearchResult",
-    "ChatSearchResultConnector",
-    "ChatSearchResultsEvent",
-    "ChatStreamEndEvent",
-    "ChatStreamEndEventFinishReason",
-    "ChatStreamEvent",
-    "ChatStreamEventType",
-    "ChatStreamRequestCitationQuality",
-    "ChatStreamRequestConnectorsSearchOptions",
-    "ChatStreamRequestPromptTruncation",
-    "ChatStreamRequestSafetyMode",
-    "ChatStreamStartEvent",
-    "ChatTextGenerationEvent",
-    "ChatToolCallDeltaEvent",
-    "ChatToolCallDeltaEventDelta",
-    "ChatToolCallDeltaEventDeltaToolCall",
-    "ChatToolCallDeltaEventDeltaToolCallFunction",
-    "ChatToolCallEndEvent",
-    "ChatToolCallStartEvent",
-    "ChatToolCallStartEventDelta",
-    "ChatToolCallStartEventDeltaToolCall",
-    "ChatToolCallStartEventDeltaToolCallFunction",
-    "ChatToolCallsChunkEvent",
-    "ChatToolCallsGenerationEvent",
-    "ChatToolPlanDeltaEvent",
-    "ChatToolPlanDeltaEventDelta",
-    "CheckApiKeyResponse",
-    "Citation",
-    "CitationEndEvent",
-    "CitationStartEvent",
-    "CitationStartEventDelta",
-    "CitationStartEventDeltaMessage",
-    "ClassifyDataMetrics",
-    "ClassifyExample",
-    "ClassifyRequestTruncate",
-    "ClassifyResponse",
-    "ClassifyResponseClassificationsItem",
-    "ClassifyResponseClassificationsItemClassificationType",
-    "ClassifyResponseClassificationsItemLabelsValue",
     "Client",
-    "ClientClosedRequestError",
-    "ClientClosedRequestErrorBody",
     "ClientEnvironment",
     "ClientV2",
-    "CompatibleEndpoint",
-    "Connector",
-    "ConnectorAuthStatus",
-    "ConnectorOAuth",
-    "Content",
-    "Content_Text",
-    "CreateConnectorOAuth",
-    "CreateConnectorResponse",
-    "CreateConnectorServiceAuth",
-    "CreateEmbedJobRequestTruncate",
-    "CreateEmbedJobResponse",
-    "Dataset",
-    "DatasetPart",
-    "DatasetType",
-    "DatasetValidationStatus",
-    "DatasetsCreateResponse",
-    "DatasetsCreateResponseDatasetPartsItem",
-    "DatasetsGetResponse",
-    "DatasetsGetUsageResponse",
-    "DatasetsListResponse",
-    "DeleteConnectorResponse",
-    "DetokenizeResponse",
-    "DocumentSource",
-    "EmbedByTypeResponse",
-    "EmbedByTypeResponseEmbeddings",
-    "EmbedFloatsResponse",
-    "EmbedInputType",
-    "EmbedJob",
-    "EmbedJobStatus",
-    "EmbedJobTruncate",
-    "EmbedRequestTruncate",
-    "EmbedResponse",
-    "EmbedResponse_EmbeddingsByType",
-    "EmbedResponse_EmbeddingsFloats",
-    "EmbeddingType",
-    "FinetuneDatasetMetrics",
-    "FinishReason",
-    "ForbiddenError",
-    "GatewayTimeoutError",
-    "GatewayTimeoutErrorBody",
-    "GenerateRequestReturnLikelihoods",
-    "GenerateRequestTruncate",
-    "GenerateStreamEnd",
-    "GenerateStreamEndResponse",
-    "GenerateStreamError",
-    "GenerateStreamEvent",
-    "GenerateStreamRequestReturnLikelihoods",
-    "GenerateStreamRequestTruncate",
-    "GenerateStreamText",
-    "GenerateStreamedResponse",
-    "GenerateStreamedResponse_StreamEnd",
-    "GenerateStreamedResponse_StreamError",
-    "GenerateStreamedResponse_TextGeneration",
-    "Generation",
-    "GetConnectorResponse",
-    "GetModelResponse",
-    "InternalServerError",
-    "JsonResponseFormat",
-    "JsonResponseFormat2",
-    "LabelMetric",
-    "ListConnectorsResponse",
-    "ListEmbedJobResponse",
-    "ListModelsResponse",
-    "Message",
-    "Message_Chatbot",
-    "Message_System",
-    "Message_Tool",
-    "Message_User",
-    "Metrics",
-    "MetricsEmbedData",
-    "MetricsEmbedDataFieldsItem",
-    "NonStreamedChatResponse",
-    "NonStreamedChatResponse2",
-    "NotFoundError",
-    "NotImplementedError",
-    "NotImplementedErrorBody",
-    "OAuthAuthorizeResponse",
-    "ParseInfo",
+    "ComponentsSchemasTextContent",
     "RerankDocument",
-    "RerankRequestDocumentsItem",
-    "RerankResponse",
-    "RerankResponseResultsItem",
-    "RerankResponseResultsItemDocument",
-    "RerankerDataMetrics",
-    "ResponseFormat",
-    "ResponseFormat2",
-    "ResponseFormat2_JsonObject",
-    "ResponseFormat2_Text",
-    "ResponseFormat_JsonObject",
-    "ResponseFormat_Text",
     "SagemakerClient",
-    "ServiceUnavailableError",
-    "SingleGeneration",
-    "SingleGenerationInStream",
-    "SingleGenerationTokenLikelihoodsItem",
-    "Source",
-    "Source_Document",
-    "Source_Tool",
-    "StreamedChatResponse",
-    "StreamedChatResponse2",
-    "StreamedChatResponse2_CitationEnd",
-    "StreamedChatResponse2_CitationStart",
-    "StreamedChatResponse2_ContentDelta",
-    "StreamedChatResponse2_ContentEnd",
-    "StreamedChatResponse2_ContentStart",
-    "StreamedChatResponse2_MessageEnd",
-    "StreamedChatResponse2_MessageStart",
-    "StreamedChatResponse2_ToolCallDelta",
-    "StreamedChatResponse2_ToolCallEnd",
-    "StreamedChatResponse2_ToolCallStart",
-    "StreamedChatResponse2_ToolPlanDelta",
-    "StreamedChatResponse_CitationGeneration",
-    "StreamedChatResponse_SearchQueriesGeneration",
-    "StreamedChatResponse_SearchResults",
-    "StreamedChatResponse_StreamEnd",
-    "StreamedChatResponse_StreamStart",
-    "StreamedChatResponse_TextGeneration",
-    "StreamedChatResponse_ToolCallsChunk",
-    "StreamedChatResponse_ToolCallsGeneration",
-    "SummarizeRequestExtractiveness",
-    "SummarizeRequestFormat",
-    "SummarizeRequestLength",
-    "SummarizeResponse",
-    "SystemMessage",
-    "SystemMessageContent",
-    "SystemMessageContentItem",
-    "SystemMessageContentItem_Text",
-    "TextContent",
-    "TextResponseFormat",
-    "TextResponseFormat2",
-    "TokenizeResponse",
-    "TooManyRequestsError",
-    "TooManyRequestsErrorBody",
-    "Tool",
-    "Tool2",
-    "Tool2Function",
-    "ToolCall",
-    "ToolCall2",
-    "ToolCall2Function",
-    "ToolCallDelta",
-    "ToolMessage",
-    "ToolMessage2",
-    "ToolParameterDefinitionsValue",
-    "ToolResult",
-    "ToolSource",
-    "UnauthorizedError",
-    "UnprocessableEntityError",
-    "UnprocessableEntityErrorBody",
-    "UpdateConnectorResponse",
-    "Usage",
-    "UsageBilledUnits",
-    "UsageTokens",
-    "UserMessage",
-    "UserMessageContent",
-    "V2ChatRequestCitationMode",
-    "V2ChatRequestSafetyMode",
-    "V2ChatStreamRequestCitationMode",
-    "V2ChatStreamRequestSafetyMode",
     "__version__",
-    "connectors",
-    "datasets",
-    "embed_jobs",
-    "finetuning",
-    "models",
+    "v1",
     "v2",
 ]
diff --git a/src/cohere/base_client.py b/src/cohere/base_client.py
index 910b0dafb..8d019f985 100644
--- a/src/cohere/base_client.py
+++ b/src/cohere/base_client.py
@@ -6,79 +6,11 @@
 import httpx
 from .core.api_error import ApiError
 from .core.client_wrapper import SyncClientWrapper
+from .v1.client import V1Client
 from .v2.client import V2Client
-from .embed_jobs.client import EmbedJobsClient
-from .datasets.client import DatasetsClient
-from .connectors.client import ConnectorsClient
-from .models.client import ModelsClient
-from .finetuning.client import FinetuningClient
-from .types.message import Message
-from .types.chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
-from .types.chat_connector import ChatConnector
-from .types.chat_document import ChatDocument
-from .types.chat_stream_request_citation_quality import ChatStreamRequestCitationQuality
-from .types.tool import Tool
-from .types.tool_result import ToolResult
-from .types.response_format import ResponseFormat
-from .types.chat_stream_request_safety_mode import ChatStreamRequestSafetyMode
-from .core.request_options import RequestOptions
-from .types.streamed_chat_response import StreamedChatResponse
-from .core.serialization import convert_and_respect_annotation_metadata
-from .core.unchecked_base_model import construct_type
-import json
-from .errors.bad_request_error import BadRequestError
-from .errors.unauthorized_error import UnauthorizedError
-from .errors.forbidden_error import ForbiddenError
-from .errors.not_found_error import NotFoundError
-from .errors.unprocessable_entity_error import UnprocessableEntityError
-from .types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
-from .errors.too_many_requests_error import TooManyRequestsError
-from .types.too_many_requests_error_body import TooManyRequestsErrorBody
-from .errors.client_closed_request_error import ClientClosedRequestError
-from .types.client_closed_request_error_body import ClientClosedRequestErrorBody
-from .errors.internal_server_error import InternalServerError
-from .errors.not_implemented_error import NotImplementedError
-from .types.not_implemented_error_body import NotImplementedErrorBody
-from .errors.service_unavailable_error import ServiceUnavailableError
-from .errors.gateway_timeout_error import GatewayTimeoutError
-from .types.gateway_timeout_error_body import GatewayTimeoutErrorBody
-from json.decoder import JSONDecodeError
-from .types.chat_request_prompt_truncation import ChatRequestPromptTruncation
-from .types.chat_request_citation_quality import ChatRequestCitationQuality
-from .types.chat_request_safety_mode import ChatRequestSafetyMode
-from .types.non_streamed_chat_response import NonStreamedChatResponse
-from .types.generate_stream_request_truncate import GenerateStreamRequestTruncate
-from .types.generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods
-from .types.generate_streamed_response import GenerateStreamedResponse
-from .types.generate_request_truncate import GenerateRequestTruncate
-from .types.generate_request_return_likelihoods import GenerateRequestReturnLikelihoods
-from .types.generation import Generation
-from .types.embed_input_type import EmbedInputType
-from .types.embedding_type import EmbeddingType
-from .types.embed_request_truncate import EmbedRequestTruncate
-from .types.embed_response import EmbedResponse
-from .types.rerank_request_documents_item import RerankRequestDocumentsItem
-from .types.rerank_response import RerankResponse
-from .types.classify_example import ClassifyExample
-from .types.classify_request_truncate import ClassifyRequestTruncate
-from .types.classify_response import ClassifyResponse
-from .types.summarize_request_length import SummarizeRequestLength
-from .types.summarize_request_format import SummarizeRequestFormat
-from .types.summarize_request_extractiveness import SummarizeRequestExtractiveness
-from .types.summarize_response import SummarizeResponse
-from .types.tokenize_response import TokenizeResponse
-from .types.detokenize_response import DetokenizeResponse
-from .types.check_api_key_response import CheckApiKeyResponse
 from .core.client_wrapper import AsyncClientWrapper
+from .v1.client import AsyncV1Client
 from .v2.client import AsyncV2Client
-from .embed_jobs.client import AsyncEmbedJobsClient
-from .datasets.client import AsyncDatasetsClient
-from .connectors.client import AsyncConnectorsClient
-from .models.client import AsyncModelsClient
-from .finetuning.client import AsyncFinetuningClient
-
-# this is used as the default value for optional parameters
-OMIT = typing.cast(typing.Any, ...)
 
 
 class BaseCohere:
@@ -145,6018 +77,76 @@ def __init__(
             else httpx.Client(timeout=_defaulted_timeout),
             timeout=_defaulted_timeout,
         )
+        self.v1 = V1Client(client_wrapper=self._client_wrapper)
         self.v2 = V2Client(client_wrapper=self._client_wrapper)
-        self.embed_jobs = EmbedJobsClient(client_wrapper=self._client_wrapper)
-        self.datasets = DatasetsClient(client_wrapper=self._client_wrapper)
-        self.connectors = ConnectorsClient(client_wrapper=self._client_wrapper)
-        self.models = ModelsClient(client_wrapper=self._client_wrapper)
-        self.finetuning = FinetuningClient(client_wrapper=self._client_wrapper)
-
-    def chat_stream(
-        self,
-        *,
-        message: str,
-        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
-        model: typing.Optional[str] = OMIT,
-        preamble: typing.Optional[str] = OMIT,
-        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
-        conversation_id: typing.Optional[str] = OMIT,
-        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
-        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
-        search_queries_only: typing.Optional[bool] = OMIT,
-        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
-        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        return_prompt: typing.Optional[bool] = OMIT,
-        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
-        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
-        force_single_step: typing.Optional[bool] = OMIT,
-        response_format: typing.Optional[ResponseFormat] = OMIT,
-        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[StreamedChatResponse]:
-        """
-        Generates a text response to a user message.
-        To learn how to use the Chat API with Streaming and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
-
-        Parameters
-        ----------
-        message : str
-            Text input for the model to respond to.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        accepts : typing.Optional[typing.Literal["text/event-stream"]]
-            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
-
-        model : typing.Optional[str]
-            Defaults to `command-r-plus-08-2024`.
-
-            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
-
-            Compatible Deployments: Cohere Platform, Private Deployments
-
-
-        preamble : typing.Optional[str]
-            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
-
-            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        chat_history : typing.Optional[typing.Sequence[Message]]
-            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
-
-            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
-
-            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        conversation_id : typing.Optional[str]
-            An alternative to `chat_history`.
-
-            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
-
-            Compatible Deployments: Cohere Platform
-
-
-        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]
-            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
-
-            Dictates how the prompt will be constructed.
-
-            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
-
-            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
-
-            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-
-            Compatible Deployments:
-             - AUTO: Cohere Platform Only
-             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        connectors : typing.Optional[typing.Sequence[ChatConnector]]
-            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
-
-            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
-
-            Compatible Deployments: Cohere Platform
-
-
-        search_queries_only : typing.Optional[bool]
-            Defaults to `false`.
-
-            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        documents : typing.Optional[typing.Sequence[ChatDocument]]
-            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
-
-            Example:
-            ```
-            [
-              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
-              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]
-            ```
-
-            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
-
-            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
-
-            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
-
-            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
-
-            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        temperature : typing.Optional[float]
-            Defaults to `0.3`.
-
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
-
-            Randomness can be further maximized by increasing the  value of the `p` parameter.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_input_tokens : typing.Optional[int]
-            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
-
-            Input will be truncated according to the `prompt_truncation` parameter.
-
-            Compatible Deployments: Cohere Platform
 
 
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        frequency_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without
-            any pre-processing.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        return_prompt : typing.Optional[bool]
-            The prompt is returned in the `prompt` response field when this is enabled.
-
-        tools : typing.Optional[typing.Sequence[Tool]]
-            A list of available tools (functions) that the model may suggest invoking before producing a text response.
-
-            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        tool_results : typing.Optional[typing.Sequence[ToolResult]]
-            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
-            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
-
-            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
-            ```
-            tool_results = [
-              {
-                "call": {
-                  "name": <tool name>,
-                  "parameters": {
-                    <param name>: <param value>
-                  }
-                },
-                "outputs": [{
-                  <key>: <value>
-                }]
-              },
-              ...
-            ]
-            ```
-            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
+class AsyncBaseCohere:
+    """
+    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propagate to these functions.
 
-        force_single_step : typing.Optional[bool]
-            Forces the chat to be single step. Defaults to `false`.
+    Parameters
+    ----------
+    base_url : typing.Optional[str]
+        The base url to use for requests from the client.
 
-        response_format : typing.Optional[ResponseFormat]
+    environment : ClientEnvironment
+        The environment to use for requests from the client. from .environment import ClientEnvironment
 
-        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]
-            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-            When `NONE` is specified, the safety instruction will be omitted.
 
-            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
 
-            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+        Defaults to ClientEnvironment.PRODUCTION
 
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
+    client_name : typing.Optional[str]
+    token : typing.Optional[typing.Union[str, typing.Callable[[], str]]]
+    timeout : typing.Optional[float]
+        The timeout to be used, in seconds, for requests. By default the timeout is 300 seconds, unless a custom httpx client is used, in which case this default is not enforced.
 
-        Yields
-        ------
-        typing.Iterator[StreamedChatResponse]
+    follow_redirects : typing.Optional[bool]
+        Whether the default httpx client follows redirects or not, this is irrelevant if a custom httpx client is passed in.
 
+    httpx_client : typing.Optional[httpx.AsyncClient]
+        The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
 
-        Examples
-        --------
-        from cohere import (
-            ChatConnector,
-            ChatStreamRequestConnectorsSearchOptions,
-            Client,
-            Message_Chatbot,
-            ResponseFormat_Text,
-            Tool,
-            ToolCall,
-            ToolParameterDefinitionsValue,
-            ToolResult,
-        )
+    Examples
+    --------
+    from cohere import AsyncClient
 
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        response = client.chat_stream(
-            message="string",
-            model="string",
-            preamble="string",
-            chat_history=[
-                Message_Chatbot(
-                    message="string",
-                    tool_calls=[
-                        ToolCall(
-                            name="string",
-                            parameters={"string": {"key": "value"}},
-                        )
-                    ],
-                )
-            ],
-            conversation_id="string",
-            prompt_truncation="OFF",
-            connectors=[
-                ChatConnector(
-                    id="string",
-                    user_access_token="string",
-                    continue_on_failure=True,
-                    options={"string": {"key": "value"}},
-                )
-            ],
-            search_queries_only=True,
-            documents=[{"string": {"key": "value"}}],
-            citation_quality="fast",
-            temperature=1.1,
-            max_tokens=1,
-            max_input_tokens=1,
-            k=1,
-            p=1.1,
-            seed=1,
-            stop_sequences=["string"],
-            connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
-                seed=1,
-            ),
-            frequency_penalty=1.1,
-            presence_penalty=1.1,
-            raw_prompting=True,
-            return_prompt=True,
-            tools=[
-                Tool(
-                    name="string",
-                    description="string",
-                    parameter_definitions={
-                        "string": ToolParameterDefinitionsValue(
-                            description="string",
-                            type="string",
-                            required=True,
-                        )
-                    },
-                )
-            ],
-            tool_results=[
-                ToolResult(
-                    call=ToolCall(
-                        name="string",
-                        parameters={"string": {"key": "value"}},
-                    ),
-                    outputs=[{"string": {"key": "value"}}],
-                )
-            ],
-            force_single_step=True,
-            response_format=ResponseFormat_Text(),
-            safety_mode="CONTEXTUAL",
-        )
-        for chunk in response:
-            yield chunk
-        """
-        with self._client_wrapper.httpx_client.stream(
-            "v1/chat",
-            method="POST",
-            json={
-                "message": message,
-                "model": model,
-                "preamble": preamble,
-                "chat_history": convert_and_respect_annotation_metadata(
-                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
-                ),
-                "conversation_id": conversation_id,
-                "prompt_truncation": prompt_truncation,
-                "connectors": convert_and_respect_annotation_metadata(
-                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
-                ),
-                "search_queries_only": search_queries_only,
-                "documents": documents,
-                "citation_quality": citation_quality,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "max_input_tokens": max_input_tokens,
-                "k": k,
-                "p": p,
-                "seed": seed,
-                "stop_sequences": stop_sequences,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "raw_prompting": raw_prompting,
-                "return_prompt": return_prompt,
-                "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
-                ),
-                "tool_results": convert_and_respect_annotation_metadata(
-                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
-                ),
-                "force_single_step": force_single_step,
-                "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat, direction="write"
-                ),
-                "safety_mode": safety_mode,
-                "stream": True,
-            },
-            headers={
-                "Accepts": str(accepts) if accepts is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    for _text in _response.iter_lines():
-                        try:
-                            if len(_text) == 0:
-                                continue
-                            yield typing.cast(
-                                StreamedChatResponse,
-                                construct_type(
-                                    type_=StreamedChatResponse,  # type: ignore
-                                    object_=json.loads(_text),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                _response.read()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 401:
-                    raise UnauthorizedError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 422:
-                    raise UnprocessableEntityError(
-                        typing.cast(
-                            UnprocessableEntityErrorBody,
-                            construct_type(
-                                type_=UnprocessableEntityErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 429:
-                    raise TooManyRequestsError(
-                        typing.cast(
-                            TooManyRequestsErrorBody,
-                            construct_type(
-                                type_=TooManyRequestsErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 499:
-                    raise ClientClosedRequestError(
-                        typing.cast(
-                            ClientClosedRequestErrorBody,
-                            construct_type(
-                                type_=ClientClosedRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 500:
-                    raise InternalServerError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 501:
-                    raise NotImplementedError(
-                        typing.cast(
-                            NotImplementedErrorBody,
-                            construct_type(
-                                type_=NotImplementedErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 503:
-                    raise ServiceUnavailableError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 504:
-                    raise GatewayTimeoutError(
-                        typing.cast(
-                            GatewayTimeoutErrorBody,
-                            construct_type(
-                                type_=GatewayTimeoutErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
+    client = AsyncClient(
+        client_name="YOUR_CLIENT_NAME",
+        token="YOUR_TOKEN",
+    )
+    """
 
-    def chat(
+    def __init__(
         self,
         *,
-        message: str,
-        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
-        model: typing.Optional[str] = OMIT,
-        preamble: typing.Optional[str] = OMIT,
-        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
-        conversation_id: typing.Optional[str] = OMIT,
-        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
-        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
-        search_queries_only: typing.Optional[bool] = OMIT,
-        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
-        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        return_prompt: typing.Optional[bool] = OMIT,
-        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
-        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
-        force_single_step: typing.Optional[bool] = OMIT,
-        response_format: typing.Optional[ResponseFormat] = OMIT,
-        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> NonStreamedChatResponse:
-        """
-        Generates a text response to a user message.
-        To learn how to use the Chat API with Streaming and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
-
-        Parameters
-        ----------
-        message : str
-            Text input for the model to respond to.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        accepts : typing.Optional[typing.Literal["text/event-stream"]]
-            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
-
-        model : typing.Optional[str]
-            Defaults to `command-r-plus-08-2024`.
-
-            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
-
-            Compatible Deployments: Cohere Platform, Private Deployments
-
-
-        preamble : typing.Optional[str]
-            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
-
-            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        chat_history : typing.Optional[typing.Sequence[Message]]
-            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
-
-            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
-
-            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        conversation_id : typing.Optional[str]
-            An alternative to `chat_history`.
-
-            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
-
-            Compatible Deployments: Cohere Platform
-
-
-        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]
-            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
-
-            Dictates how the prompt will be constructed.
-
-            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
-
-            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
-
-            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-
-            Compatible Deployments:
-             - AUTO: Cohere Platform Only
-             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        connectors : typing.Optional[typing.Sequence[ChatConnector]]
-            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
-
-            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
-
-            Compatible Deployments: Cohere Platform
-
-
-        search_queries_only : typing.Optional[bool]
-            Defaults to `false`.
-
-            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        documents : typing.Optional[typing.Sequence[ChatDocument]]
-            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
-
-            Example:
-            ```
-            [
-              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
-              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]
-            ```
-
-            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
-
-            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
-
-            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
-
-            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
-
-            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        citation_quality : typing.Optional[ChatRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        temperature : typing.Optional[float]
-            Defaults to `0.3`.
-
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
-
-            Randomness can be further maximized by increasing the  value of the `p` parameter.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_input_tokens : typing.Optional[int]
-            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
-
-            Input will be truncated according to the `prompt_truncation` parameter.
-
-            Compatible Deployments: Cohere Platform
-
-
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        frequency_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without
-            any pre-processing.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        return_prompt : typing.Optional[bool]
-            The prompt is returned in the `prompt` response field when this is enabled.
-
-        tools : typing.Optional[typing.Sequence[Tool]]
-            A list of available tools (functions) that the model may suggest invoking before producing a text response.
-
-            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        tool_results : typing.Optional[typing.Sequence[ToolResult]]
-            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
-            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
-
-            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
-            ```
-            tool_results = [
-              {
-                "call": {
-                  "name": <tool name>,
-                  "parameters": {
-                    <param name>: <param value>
-                  }
-                },
-                "outputs": [{
-                  <key>: <value>
-                }]
-              },
-              ...
-            ]
-            ```
-            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        force_single_step : typing.Optional[bool]
-            Forces the chat to be single step. Defaults to `false`.
-
-        response_format : typing.Optional[ResponseFormat]
-
-        safety_mode : typing.Optional[ChatRequestSafetyMode]
-            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-            When `NONE` is specified, the safety instruction will be omitted.
-
-            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
-
-            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        NonStreamedChatResponse
-
-
-        Examples
-        --------
-        from cohere import Client, Message_Tool
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.chat(
-            message="Can you give me a global market overview of solar panels?",
-            chat_history=[Message_Tool(), Message_Tool()],
-            prompt_truncation="OFF",
-            temperature=0.3,
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/chat",
-            method="POST",
-            json={
-                "message": message,
-                "model": model,
-                "preamble": preamble,
-                "chat_history": convert_and_respect_annotation_metadata(
-                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
-                ),
-                "conversation_id": conversation_id,
-                "prompt_truncation": prompt_truncation,
-                "connectors": convert_and_respect_annotation_metadata(
-                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
-                ),
-                "search_queries_only": search_queries_only,
-                "documents": documents,
-                "citation_quality": citation_quality,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "max_input_tokens": max_input_tokens,
-                "k": k,
-                "p": p,
-                "seed": seed,
-                "stop_sequences": stop_sequences,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "raw_prompting": raw_prompting,
-                "return_prompt": return_prompt,
-                "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
-                ),
-                "tool_results": convert_and_respect_annotation_metadata(
-                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
-                ),
-                "force_single_step": force_single_step,
-                "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat, direction="write"
-                ),
-                "safety_mode": safety_mode,
-                "stream": False,
-            },
-            headers={
-                "Accepts": str(accepts) if accepts is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
+        base_url: typing.Optional[str] = None,
+        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,
+        client_name: typing.Optional[str] = None,
+        token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CO_API_KEY"),
+        timeout: typing.Optional[float] = None,
+        follow_redirects: typing.Optional[bool] = True,
+        httpx_client: typing.Optional[httpx.AsyncClient] = None,
+    ):
+        _defaulted_timeout = timeout if timeout is not None else 300 if httpx_client is None else None
+        if token is None:
+            raise ApiError(body="The client must be instantiated be either passing in token or setting CO_API_KEY")
+        self._client_wrapper = AsyncClientWrapper(
+            base_url=_get_base_url(base_url=base_url, environment=environment),
+            client_name=client_name,
+            token=token,
+            httpx_client=httpx_client
+            if httpx_client is not None
+            else httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
+            if follow_redirects is not None
+            else httpx.AsyncClient(timeout=_defaulted_timeout),
+            timeout=_defaulted_timeout,
         )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    NonStreamedChatResponse,
-                    construct_type(
-                        type_=NonStreamedChatResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def generate_stream(
-        self,
-        *,
-        prompt: str,
-        model: typing.Optional[str] = OMIT,
-        num_generations: typing.Optional[int] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        preset: typing.Optional[str] = OMIT,
-        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[GenerateStreamedResponse]:
-        """
-        <Warning>
-        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-        </Warning>
-        Generates realistic text conditioned on a given input.
-
-        Parameters
-        ----------
-        prompt : str
-            The input text that serves as the starting point for generating the response.
-            Note: The prompt will be pre-processed and modified before reaching the model.
-
-
-        model : typing.Optional[str]
-            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
-            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-
-        num_generations : typing.Optional[int]
-            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
-
-            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
-
-
-        truncate : typing.Optional[GenerateStreamRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        temperature : typing.Optional[float]
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
-            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        preset : typing.Optional[str]
-            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
-            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
-
-
-        end_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
-
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-
-        frequency_penalty : typing.Optional[float]
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]
-            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
-
-            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
-
-            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without any pre-processing.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Yields
-        ------
-        typing.Iterator[GenerateStreamedResponse]
-
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        response = client.generate_stream(
-            prompt="string",
-            model="string",
-            num_generations=1,
-            max_tokens=1,
-            truncate="NONE",
-            temperature=1.1,
-            seed=1,
-            preset="string",
-            end_sequences=["string"],
-            stop_sequences=["string"],
-            k=1,
-            p=1.1,
-            frequency_penalty=1.1,
-            presence_penalty=1.1,
-            return_likelihoods="GENERATION",
-            raw_prompting=True,
-        )
-        for chunk in response:
-            yield chunk
-        """
-        with self._client_wrapper.httpx_client.stream(
-            "v1/generate",
-            method="POST",
-            json={
-                "prompt": prompt,
-                "model": model,
-                "num_generations": num_generations,
-                "max_tokens": max_tokens,
-                "truncate": truncate,
-                "temperature": temperature,
-                "seed": seed,
-                "preset": preset,
-                "end_sequences": end_sequences,
-                "stop_sequences": stop_sequences,
-                "k": k,
-                "p": p,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "return_likelihoods": return_likelihoods,
-                "raw_prompting": raw_prompting,
-                "stream": True,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    for _text in _response.iter_lines():
-                        try:
-                            if len(_text) == 0:
-                                continue
-                            yield typing.cast(
-                                GenerateStreamedResponse,
-                                construct_type(
-                                    type_=GenerateStreamedResponse,  # type: ignore
-                                    object_=json.loads(_text),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                _response.read()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 401:
-                    raise UnauthorizedError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 422:
-                    raise UnprocessableEntityError(
-                        typing.cast(
-                            UnprocessableEntityErrorBody,
-                            construct_type(
-                                type_=UnprocessableEntityErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 429:
-                    raise TooManyRequestsError(
-                        typing.cast(
-                            TooManyRequestsErrorBody,
-                            construct_type(
-                                type_=TooManyRequestsErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 499:
-                    raise ClientClosedRequestError(
-                        typing.cast(
-                            ClientClosedRequestErrorBody,
-                            construct_type(
-                                type_=ClientClosedRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 500:
-                    raise InternalServerError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 501:
-                    raise NotImplementedError(
-                        typing.cast(
-                            NotImplementedErrorBody,
-                            construct_type(
-                                type_=NotImplementedErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 503:
-                    raise ServiceUnavailableError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 504:
-                    raise GatewayTimeoutError(
-                        typing.cast(
-                            GatewayTimeoutErrorBody,
-                            construct_type(
-                                type_=GatewayTimeoutErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def generate(
-        self,
-        *,
-        prompt: str,
-        model: typing.Optional[str] = OMIT,
-        num_generations: typing.Optional[int] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        preset: typing.Optional[str] = OMIT,
-        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> Generation:
-        """
-        <Warning>
-        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-        </Warning>
-        Generates realistic text conditioned on a given input.
-
-        Parameters
-        ----------
-        prompt : str
-            The input text that serves as the starting point for generating the response.
-            Note: The prompt will be pre-processed and modified before reaching the model.
-
-
-        model : typing.Optional[str]
-            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
-            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-
-        num_generations : typing.Optional[int]
-            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
-
-            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
-
-
-        truncate : typing.Optional[GenerateRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        temperature : typing.Optional[float]
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
-            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        preset : typing.Optional[str]
-            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
-            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
-
-
-        end_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
-
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-
-        frequency_penalty : typing.Optional[float]
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]
-            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
-
-            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
-
-            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without any pre-processing.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        Generation
-
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.generate(
-            prompt="Please explain to me how LLMs work",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/generate",
-            method="POST",
-            json={
-                "prompt": prompt,
-                "model": model,
-                "num_generations": num_generations,
-                "max_tokens": max_tokens,
-                "truncate": truncate,
-                "temperature": temperature,
-                "seed": seed,
-                "preset": preset,
-                "end_sequences": end_sequences,
-                "stop_sequences": stop_sequences,
-                "k": k,
-                "p": p,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "return_likelihoods": return_likelihoods,
-                "raw_prompting": raw_prompting,
-                "stream": False,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    Generation,
-                    construct_type(
-                        type_=Generation,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def embed(
-        self,
-        *,
-        texts: typing.Optional[typing.Sequence[str]] = OMIT,
-        images: typing.Optional[typing.Sequence[str]] = OMIT,
-        model: typing.Optional[str] = OMIT,
-        input_type: typing.Optional[EmbedInputType] = OMIT,
-        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
-        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> EmbedResponse:
-        """
-        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
-
-        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
-
-        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
-
-        Parameters
-        ----------
-        texts : typing.Optional[typing.Sequence[str]]
-            An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
-
-        images : typing.Optional[typing.Sequence[str]]
-            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
-
-            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
-
-        model : typing.Optional[str]
-            Defaults to embed-english-v2.0
-
-            The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-
-            Available models and corresponding embedding dimensions:
-
-            * `embed-english-v3.0`  1024
-            * `embed-multilingual-v3.0`  1024
-            * `embed-english-light-v3.0`  384
-            * `embed-multilingual-light-v3.0`  384
-
-            * `embed-english-v2.0`  4096
-            * `embed-english-light-v2.0`  1024
-            * `embed-multilingual-v2.0`  768
-
-        input_type : typing.Optional[EmbedInputType]
-
-        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
-            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
-
-            * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
-            * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
-            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
-            * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
-            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
-
-        truncate : typing.Optional[EmbedRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        EmbedResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.embed()
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/embed",
-            method="POST",
-            json={
-                "texts": texts,
-                "images": images,
-                "model": model,
-                "input_type": input_type,
-                "embedding_types": embedding_types,
-                "truncate": truncate,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    EmbedResponse,
-                    construct_type(
-                        type_=EmbedResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def rerank(
-        self,
-        *,
-        query: str,
-        documents: typing.Sequence[RerankRequestDocumentsItem],
-        model: typing.Optional[str] = OMIT,
-        top_n: typing.Optional[int] = OMIT,
-        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,
-        return_documents: typing.Optional[bool] = OMIT,
-        max_chunks_per_doc: typing.Optional[int] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> RerankResponse:
-        """
-        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
-
-        Parameters
-        ----------
-        query : str
-            The search query
-
-        documents : typing.Sequence[RerankRequestDocumentsItem]
-            A list of document objects or strings to rerank.
-            If a document is provided the text fields is required and all other fields will be preserved in the response.
-
-            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
-
-            We recommend a maximum of 1,000 documents for optimal endpoint performance.
-
-        model : typing.Optional[str]
-            The identifier of the model to use, one of : `rerank-english-v3.0`, `rerank-multilingual-v3.0`, `rerank-english-v2.0`, `rerank-multilingual-v2.0`
-
-        top_n : typing.Optional[int]
-            The number of most relevant documents or indices to return, defaults to the length of the documents
-
-        rank_fields : typing.Optional[typing.Sequence[str]]
-            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.
-
-        return_documents : typing.Optional[bool]
-            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
-            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
-
-        max_chunks_per_doc : typing.Optional[int]
-            The maximum number of chunks to produce internally from a document
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        RerankResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.rerank(
-            query="query",
-            documents=["documents"],
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/rerank",
-            method="POST",
-            json={
-                "model": model,
-                "query": query,
-                "documents": convert_and_respect_annotation_metadata(
-                    object_=documents, annotation=typing.Sequence[RerankRequestDocumentsItem], direction="write"
-                ),
-                "top_n": top_n,
-                "rank_fields": rank_fields,
-                "return_documents": return_documents,
-                "max_chunks_per_doc": max_chunks_per_doc,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    RerankResponse,
-                    construct_type(
-                        type_=RerankResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def classify(
-        self,
-        *,
-        inputs: typing.Sequence[str],
-        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,
-        model: typing.Optional[str] = OMIT,
-        preset: typing.Optional[str] = OMIT,
-        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ClassifyResponse:
-        """
-        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
-        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
-
-        Parameters
-        ----------
-        inputs : typing.Sequence[str]
-            A list of up to 96 texts to be classified. Each one must be a non-empty string.
-            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
-            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
-
-        examples : typing.Optional[typing.Sequence[ClassifyExample]]
-            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
-            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
-
-        model : typing.Optional[str]
-            The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
-
-        preset : typing.Optional[str]
-            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
-
-        truncate : typing.Optional[ClassifyRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ClassifyResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.classify(
-            inputs=["inputs"],
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/classify",
-            method="POST",
-            json={
-                "inputs": inputs,
-                "examples": convert_and_respect_annotation_metadata(
-                    object_=examples, annotation=typing.Sequence[ClassifyExample], direction="write"
-                ),
-                "model": model,
-                "preset": preset,
-                "truncate": truncate,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ClassifyResponse,
-                    construct_type(
-                        type_=ClassifyResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def summarize(
-        self,
-        *,
-        text: str,
-        length: typing.Optional[SummarizeRequestLength] = OMIT,
-        format: typing.Optional[SummarizeRequestFormat] = OMIT,
-        model: typing.Optional[str] = OMIT,
-        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        additional_command: typing.Optional[str] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> SummarizeResponse:
-        """
-        <Warning>
-        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-        </Warning>
-        Generates a summary in English for a given text.
-
-        Parameters
-        ----------
-        text : str
-            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
-
-        length : typing.Optional[SummarizeRequestLength]
-            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
-
-        format : typing.Optional[SummarizeRequestFormat]
-            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
-
-        model : typing.Optional[str]
-            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
-
-        extractiveness : typing.Optional[SummarizeRequestExtractiveness]
-            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
-
-        temperature : typing.Optional[float]
-            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
-
-        additional_command : typing.Optional[str]
-            A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        SummarizeResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.summarize(
-            text="text",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/summarize",
-            method="POST",
-            json={
-                "text": text,
-                "length": length,
-                "format": format,
-                "model": model,
-                "extractiveness": extractiveness,
-                "temperature": temperature,
-                "additional_command": additional_command,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    SummarizeResponse,
-                    construct_type(
-                        type_=SummarizeResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def tokenize(
-        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None
-    ) -> TokenizeResponse:
-        """
-        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
-
-        Parameters
-        ----------
-        text : str
-            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
-
-        model : str
-            An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        TokenizeResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.tokenize(
-            text="tokenize me! :D",
-            model="command",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/tokenize",
-            method="POST",
-            json={
-                "text": text,
-                "model": model,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    TokenizeResponse,
-                    construct_type(
-                        type_=TokenizeResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def detokenize(
-        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None
-    ) -> DetokenizeResponse:
-        """
-        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
-
-        Parameters
-        ----------
-        tokens : typing.Sequence[int]
-            The list of tokens to be detokenized.
-
-        model : str
-            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DetokenizeResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.detokenize(
-            tokens=[1],
-            model="model",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/detokenize",
-            method="POST",
-            json={
-                "tokens": tokens,
-                "model": model,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DetokenizeResponse,
-                    construct_type(
-                        type_=DetokenizeResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def check_api_key(self, *, request_options: typing.Optional[RequestOptions] = None) -> CheckApiKeyResponse:
-        """
-        Checks that the api key in the Authorization header is valid and active
-
-        Parameters
-        ----------
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        CheckApiKeyResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.check_api_key()
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/check-api-key",
-            method="POST",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    CheckApiKeyResponse,
-                    construct_type(
-                        type_=CheckApiKeyResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-
-class AsyncBaseCohere:
-    """
-    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propagate to these functions.
-
-    Parameters
-    ----------
-    base_url : typing.Optional[str]
-        The base url to use for requests from the client.
-
-    environment : ClientEnvironment
-        The environment to use for requests from the client. from .environment import ClientEnvironment
-
-
-
-        Defaults to ClientEnvironment.PRODUCTION
-
-
-
-    client_name : typing.Optional[str]
-    token : typing.Optional[typing.Union[str, typing.Callable[[], str]]]
-    timeout : typing.Optional[float]
-        The timeout to be used, in seconds, for requests. By default the timeout is 300 seconds, unless a custom httpx client is used, in which case this default is not enforced.
-
-    follow_redirects : typing.Optional[bool]
-        Whether the default httpx client follows redirects or not, this is irrelevant if a custom httpx client is passed in.
-
-    httpx_client : typing.Optional[httpx.AsyncClient]
-        The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
-
-    Examples
-    --------
-    from cohere import AsyncClient
-
-    client = AsyncClient(
-        client_name="YOUR_CLIENT_NAME",
-        token="YOUR_TOKEN",
-    )
-    """
-
-    def __init__(
-        self,
-        *,
-        base_url: typing.Optional[str] = None,
-        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,
-        client_name: typing.Optional[str] = None,
-        token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CO_API_KEY"),
-        timeout: typing.Optional[float] = None,
-        follow_redirects: typing.Optional[bool] = True,
-        httpx_client: typing.Optional[httpx.AsyncClient] = None,
-    ):
-        _defaulted_timeout = timeout if timeout is not None else 300 if httpx_client is None else None
-        if token is None:
-            raise ApiError(body="The client must be instantiated be either passing in token or setting CO_API_KEY")
-        self._client_wrapper = AsyncClientWrapper(
-            base_url=_get_base_url(base_url=base_url, environment=environment),
-            client_name=client_name,
-            token=token,
-            httpx_client=httpx_client
-            if httpx_client is not None
-            else httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
-            if follow_redirects is not None
-            else httpx.AsyncClient(timeout=_defaulted_timeout),
-            timeout=_defaulted_timeout,
-        )
-        self.v2 = AsyncV2Client(client_wrapper=self._client_wrapper)
-        self.embed_jobs = AsyncEmbedJobsClient(client_wrapper=self._client_wrapper)
-        self.datasets = AsyncDatasetsClient(client_wrapper=self._client_wrapper)
-        self.connectors = AsyncConnectorsClient(client_wrapper=self._client_wrapper)
-        self.models = AsyncModelsClient(client_wrapper=self._client_wrapper)
-        self.finetuning = AsyncFinetuningClient(client_wrapper=self._client_wrapper)
-
-    async def chat_stream(
-        self,
-        *,
-        message: str,
-        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
-        model: typing.Optional[str] = OMIT,
-        preamble: typing.Optional[str] = OMIT,
-        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
-        conversation_id: typing.Optional[str] = OMIT,
-        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
-        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
-        search_queries_only: typing.Optional[bool] = OMIT,
-        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
-        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        return_prompt: typing.Optional[bool] = OMIT,
-        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
-        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
-        force_single_step: typing.Optional[bool] = OMIT,
-        response_format: typing.Optional[ResponseFormat] = OMIT,
-        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[StreamedChatResponse]:
-        """
-        Generates a text response to a user message.
-        To learn how to use the Chat API with Streaming and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
-
-        Parameters
-        ----------
-        message : str
-            Text input for the model to respond to.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        accepts : typing.Optional[typing.Literal["text/event-stream"]]
-            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
-
-        model : typing.Optional[str]
-            Defaults to `command-r-plus-08-2024`.
-
-            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
-
-            Compatible Deployments: Cohere Platform, Private Deployments
-
-
-        preamble : typing.Optional[str]
-            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
-
-            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        chat_history : typing.Optional[typing.Sequence[Message]]
-            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
-
-            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
-
-            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        conversation_id : typing.Optional[str]
-            An alternative to `chat_history`.
-
-            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
-
-            Compatible Deployments: Cohere Platform
-
-
-        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]
-            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
-
-            Dictates how the prompt will be constructed.
-
-            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
-
-            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
-
-            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-
-            Compatible Deployments:
-             - AUTO: Cohere Platform Only
-             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        connectors : typing.Optional[typing.Sequence[ChatConnector]]
-            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
-
-            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
-
-            Compatible Deployments: Cohere Platform
-
-
-        search_queries_only : typing.Optional[bool]
-            Defaults to `false`.
-
-            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        documents : typing.Optional[typing.Sequence[ChatDocument]]
-            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
-
-            Example:
-            ```
-            [
-              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
-              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]
-            ```
-
-            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
-
-            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
-
-            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
-
-            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
-
-            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        temperature : typing.Optional[float]
-            Defaults to `0.3`.
-
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
-
-            Randomness can be further maximized by increasing the  value of the `p` parameter.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_input_tokens : typing.Optional[int]
-            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
-
-            Input will be truncated according to the `prompt_truncation` parameter.
-
-            Compatible Deployments: Cohere Platform
-
-
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        frequency_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without
-            any pre-processing.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        return_prompt : typing.Optional[bool]
-            The prompt is returned in the `prompt` response field when this is enabled.
-
-        tools : typing.Optional[typing.Sequence[Tool]]
-            A list of available tools (functions) that the model may suggest invoking before producing a text response.
-
-            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        tool_results : typing.Optional[typing.Sequence[ToolResult]]
-            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
-            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
-
-            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
-            ```
-            tool_results = [
-              {
-                "call": {
-                  "name": <tool name>,
-                  "parameters": {
-                    <param name>: <param value>
-                  }
-                },
-                "outputs": [{
-                  <key>: <value>
-                }]
-              },
-              ...
-            ]
-            ```
-            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        force_single_step : typing.Optional[bool]
-            Forces the chat to be single step. Defaults to `false`.
-
-        response_format : typing.Optional[ResponseFormat]
-
-        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]
-            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-            When `NONE` is specified, the safety instruction will be omitted.
-
-            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
-
-            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Yields
-        ------
-        typing.AsyncIterator[StreamedChatResponse]
-
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import (
-            AsyncClient,
-            ChatConnector,
-            ChatStreamRequestConnectorsSearchOptions,
-            Message_Chatbot,
-            ResponseFormat_Text,
-            Tool,
-            ToolCall,
-            ToolParameterDefinitionsValue,
-            ToolResult,
-        )
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            response = await client.chat_stream(
-                message="string",
-                model="string",
-                preamble="string",
-                chat_history=[
-                    Message_Chatbot(
-                        message="string",
-                        tool_calls=[
-                            ToolCall(
-                                name="string",
-                                parameters={"string": {"key": "value"}},
-                            )
-                        ],
-                    )
-                ],
-                conversation_id="string",
-                prompt_truncation="OFF",
-                connectors=[
-                    ChatConnector(
-                        id="string",
-                        user_access_token="string",
-                        continue_on_failure=True,
-                        options={"string": {"key": "value"}},
-                    )
-                ],
-                search_queries_only=True,
-                documents=[{"string": {"key": "value"}}],
-                citation_quality="fast",
-                temperature=1.1,
-                max_tokens=1,
-                max_input_tokens=1,
-                k=1,
-                p=1.1,
-                seed=1,
-                stop_sequences=["string"],
-                connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
-                    seed=1,
-                ),
-                frequency_penalty=1.1,
-                presence_penalty=1.1,
-                raw_prompting=True,
-                return_prompt=True,
-                tools=[
-                    Tool(
-                        name="string",
-                        description="string",
-                        parameter_definitions={
-                            "string": ToolParameterDefinitionsValue(
-                                description="string",
-                                type="string",
-                                required=True,
-                            )
-                        },
-                    )
-                ],
-                tool_results=[
-                    ToolResult(
-                        call=ToolCall(
-                            name="string",
-                            parameters={"string": {"key": "value"}},
-                        ),
-                        outputs=[{"string": {"key": "value"}}],
-                    )
-                ],
-                force_single_step=True,
-                response_format=ResponseFormat_Text(),
-                safety_mode="CONTEXTUAL",
-            )
-            async for chunk in response:
-                yield chunk
-
-
-        asyncio.run(main())
-        """
-        async with self._client_wrapper.httpx_client.stream(
-            "v1/chat",
-            method="POST",
-            json={
-                "message": message,
-                "model": model,
-                "preamble": preamble,
-                "chat_history": convert_and_respect_annotation_metadata(
-                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
-                ),
-                "conversation_id": conversation_id,
-                "prompt_truncation": prompt_truncation,
-                "connectors": convert_and_respect_annotation_metadata(
-                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
-                ),
-                "search_queries_only": search_queries_only,
-                "documents": documents,
-                "citation_quality": citation_quality,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "max_input_tokens": max_input_tokens,
-                "k": k,
-                "p": p,
-                "seed": seed,
-                "stop_sequences": stop_sequences,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "raw_prompting": raw_prompting,
-                "return_prompt": return_prompt,
-                "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
-                ),
-                "tool_results": convert_and_respect_annotation_metadata(
-                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
-                ),
-                "force_single_step": force_single_step,
-                "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat, direction="write"
-                ),
-                "safety_mode": safety_mode,
-                "stream": True,
-            },
-            headers={
-                "Accepts": str(accepts) if accepts is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_lines():
-                        try:
-                            if len(_text) == 0:
-                                continue
-                            yield typing.cast(
-                                StreamedChatResponse,
-                                construct_type(
-                                    type_=StreamedChatResponse,  # type: ignore
-                                    object_=json.loads(_text),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                await _response.aread()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 401:
-                    raise UnauthorizedError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 422:
-                    raise UnprocessableEntityError(
-                        typing.cast(
-                            UnprocessableEntityErrorBody,
-                            construct_type(
-                                type_=UnprocessableEntityErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 429:
-                    raise TooManyRequestsError(
-                        typing.cast(
-                            TooManyRequestsErrorBody,
-                            construct_type(
-                                type_=TooManyRequestsErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 499:
-                    raise ClientClosedRequestError(
-                        typing.cast(
-                            ClientClosedRequestErrorBody,
-                            construct_type(
-                                type_=ClientClosedRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 500:
-                    raise InternalServerError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 501:
-                    raise NotImplementedError(
-                        typing.cast(
-                            NotImplementedErrorBody,
-                            construct_type(
-                                type_=NotImplementedErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 503:
-                    raise ServiceUnavailableError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 504:
-                    raise GatewayTimeoutError(
-                        typing.cast(
-                            GatewayTimeoutErrorBody,
-                            construct_type(
-                                type_=GatewayTimeoutErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def chat(
-        self,
-        *,
-        message: str,
-        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
-        model: typing.Optional[str] = OMIT,
-        preamble: typing.Optional[str] = OMIT,
-        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
-        conversation_id: typing.Optional[str] = OMIT,
-        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
-        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
-        search_queries_only: typing.Optional[bool] = OMIT,
-        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
-        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        return_prompt: typing.Optional[bool] = OMIT,
-        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
-        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
-        force_single_step: typing.Optional[bool] = OMIT,
-        response_format: typing.Optional[ResponseFormat] = OMIT,
-        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> NonStreamedChatResponse:
-        """
-        Generates a text response to a user message.
-        To learn how to use the Chat API with Streaming and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
-
-        Parameters
-        ----------
-        message : str
-            Text input for the model to respond to.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        accepts : typing.Optional[typing.Literal["text/event-stream"]]
-            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
-
-        model : typing.Optional[str]
-            Defaults to `command-r-plus-08-2024`.
-
-            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
-
-            Compatible Deployments: Cohere Platform, Private Deployments
-
-
-        preamble : typing.Optional[str]
-            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
-
-            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        chat_history : typing.Optional[typing.Sequence[Message]]
-            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
-
-            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
-
-            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        conversation_id : typing.Optional[str]
-            An alternative to `chat_history`.
-
-            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
-
-            Compatible Deployments: Cohere Platform
-
-
-        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]
-            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
-
-            Dictates how the prompt will be constructed.
-
-            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
-
-            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
-
-            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-
-            Compatible Deployments:
-             - AUTO: Cohere Platform Only
-             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        connectors : typing.Optional[typing.Sequence[ChatConnector]]
-            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
-
-            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
-
-            Compatible Deployments: Cohere Platform
-
-
-        search_queries_only : typing.Optional[bool]
-            Defaults to `false`.
-
-            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        documents : typing.Optional[typing.Sequence[ChatDocument]]
-            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
-
-            Example:
-            ```
-            [
-              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
-              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]
-            ```
-
-            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
-
-            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
-
-            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
-
-            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
-
-            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        citation_quality : typing.Optional[ChatRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        temperature : typing.Optional[float]
-            Defaults to `0.3`.
-
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
-
-            Randomness can be further maximized by increasing the  value of the `p` parameter.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        max_input_tokens : typing.Optional[int]
-            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
-
-            Input will be truncated according to the `prompt_truncation` parameter.
-
-            Compatible Deployments: Cohere Platform
-
-
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        frequency_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without
-            any pre-processing.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        return_prompt : typing.Optional[bool]
-            The prompt is returned in the `prompt` response field when this is enabled.
-
-        tools : typing.Optional[typing.Sequence[Tool]]
-            A list of available tools (functions) that the model may suggest invoking before producing a text response.
-
-            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        tool_results : typing.Optional[typing.Sequence[ToolResult]]
-            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
-            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
-
-            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
-            ```
-            tool_results = [
-              {
-                "call": {
-                  "name": <tool name>,
-                  "parameters": {
-                    <param name>: <param value>
-                  }
-                },
-                "outputs": [{
-                  <key>: <value>
-                }]
-              },
-              ...
-            ]
-            ```
-            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        force_single_step : typing.Optional[bool]
-            Forces the chat to be single step. Defaults to `false`.
-
-        response_format : typing.Optional[ResponseFormat]
-
-        safety_mode : typing.Optional[ChatRequestSafetyMode]
-            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
-            When `NONE` is specified, the safety instruction will be omitted.
-
-            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
-
-            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
-
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        NonStreamedChatResponse
-
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient, Message_Tool
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.chat(
-                message="Can you give me a global market overview of solar panels?",
-                chat_history=[Message_Tool(), Message_Tool()],
-                prompt_truncation="OFF",
-                temperature=0.3,
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/chat",
-            method="POST",
-            json={
-                "message": message,
-                "model": model,
-                "preamble": preamble,
-                "chat_history": convert_and_respect_annotation_metadata(
-                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
-                ),
-                "conversation_id": conversation_id,
-                "prompt_truncation": prompt_truncation,
-                "connectors": convert_and_respect_annotation_metadata(
-                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
-                ),
-                "search_queries_only": search_queries_only,
-                "documents": documents,
-                "citation_quality": citation_quality,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-                "max_input_tokens": max_input_tokens,
-                "k": k,
-                "p": p,
-                "seed": seed,
-                "stop_sequences": stop_sequences,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "raw_prompting": raw_prompting,
-                "return_prompt": return_prompt,
-                "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
-                ),
-                "tool_results": convert_and_respect_annotation_metadata(
-                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
-                ),
-                "force_single_step": force_single_step,
-                "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat, direction="write"
-                ),
-                "safety_mode": safety_mode,
-                "stream": False,
-            },
-            headers={
-                "Accepts": str(accepts) if accepts is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    NonStreamedChatResponse,
-                    construct_type(
-                        type_=NonStreamedChatResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def generate_stream(
-        self,
-        *,
-        prompt: str,
-        model: typing.Optional[str] = OMIT,
-        num_generations: typing.Optional[int] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        preset: typing.Optional[str] = OMIT,
-        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[GenerateStreamedResponse]:
-        """
-        <Warning>
-        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-        </Warning>
-        Generates realistic text conditioned on a given input.
-
-        Parameters
-        ----------
-        prompt : str
-            The input text that serves as the starting point for generating the response.
-            Note: The prompt will be pre-processed and modified before reaching the model.
-
-
-        model : typing.Optional[str]
-            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
-            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-
-        num_generations : typing.Optional[int]
-            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
-
-            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
-
-
-        truncate : typing.Optional[GenerateStreamRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        temperature : typing.Optional[float]
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
-            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        preset : typing.Optional[str]
-            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
-            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
-
-
-        end_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
-
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-
-        frequency_penalty : typing.Optional[float]
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]
-            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
-
-            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
-
-            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without any pre-processing.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Yields
-        ------
-        typing.AsyncIterator[GenerateStreamedResponse]
-
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            response = await client.generate_stream(
-                prompt="string",
-                model="string",
-                num_generations=1,
-                max_tokens=1,
-                truncate="NONE",
-                temperature=1.1,
-                seed=1,
-                preset="string",
-                end_sequences=["string"],
-                stop_sequences=["string"],
-                k=1,
-                p=1.1,
-                frequency_penalty=1.1,
-                presence_penalty=1.1,
-                return_likelihoods="GENERATION",
-                raw_prompting=True,
-            )
-            async for chunk in response:
-                yield chunk
-
-
-        asyncio.run(main())
-        """
-        async with self._client_wrapper.httpx_client.stream(
-            "v1/generate",
-            method="POST",
-            json={
-                "prompt": prompt,
-                "model": model,
-                "num_generations": num_generations,
-                "max_tokens": max_tokens,
-                "truncate": truncate,
-                "temperature": temperature,
-                "seed": seed,
-                "preset": preset,
-                "end_sequences": end_sequences,
-                "stop_sequences": stop_sequences,
-                "k": k,
-                "p": p,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "return_likelihoods": return_likelihoods,
-                "raw_prompting": raw_prompting,
-                "stream": True,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_lines():
-                        try:
-                            if len(_text) == 0:
-                                continue
-                            yield typing.cast(
-                                GenerateStreamedResponse,
-                                construct_type(
-                                    type_=GenerateStreamedResponse,  # type: ignore
-                                    object_=json.loads(_text),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                await _response.aread()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 401:
-                    raise UnauthorizedError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 422:
-                    raise UnprocessableEntityError(
-                        typing.cast(
-                            UnprocessableEntityErrorBody,
-                            construct_type(
-                                type_=UnprocessableEntityErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 429:
-                    raise TooManyRequestsError(
-                        typing.cast(
-                            TooManyRequestsErrorBody,
-                            construct_type(
-                                type_=TooManyRequestsErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 499:
-                    raise ClientClosedRequestError(
-                        typing.cast(
-                            ClientClosedRequestErrorBody,
-                            construct_type(
-                                type_=ClientClosedRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 500:
-                    raise InternalServerError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 501:
-                    raise NotImplementedError(
-                        typing.cast(
-                            NotImplementedErrorBody,
-                            construct_type(
-                                type_=NotImplementedErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 503:
-                    raise ServiceUnavailableError(
-                        typing.cast(
-                            typing.Optional[typing.Any],
-                            construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 504:
-                    raise GatewayTimeoutError(
-                        typing.cast(
-                            GatewayTimeoutErrorBody,
-                            construct_type(
-                                type_=GatewayTimeoutErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def generate(
-        self,
-        *,
-        prompt: str,
-        model: typing.Optional[str] = OMIT,
-        num_generations: typing.Optional[int] = OMIT,
-        max_tokens: typing.Optional[int] = OMIT,
-        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        seed: typing.Optional[int] = OMIT,
-        preset: typing.Optional[str] = OMIT,
-        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[float] = OMIT,
-        frequency_penalty: typing.Optional[float] = OMIT,
-        presence_penalty: typing.Optional[float] = OMIT,
-        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
-        raw_prompting: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> Generation:
-        """
-        <Warning>
-        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-        </Warning>
-        Generates realistic text conditioned on a given input.
-
-        Parameters
-        ----------
-        prompt : str
-            The input text that serves as the starting point for generating the response.
-            Note: The prompt will be pre-processed and modified before reaching the model.
-
-
-        model : typing.Optional[str]
-            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
-            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-
-        num_generations : typing.Optional[int]
-            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
-
-
-        max_tokens : typing.Optional[int]
-            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
-
-            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
-
-            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
-
-
-        truncate : typing.Optional[GenerateRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        temperature : typing.Optional[float]
-            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
-            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
-
-
-        seed : typing.Optional[int]
-            If specified, the backend will make a best effort to sample tokens
-            deterministically, such that repeated requests with the same
-            seed and parameters should return the same result. However,
-            determinism cannot be totally guaranteed.
-            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
-
-
-        preset : typing.Optional[str]
-            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
-            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
-
-
-        end_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
-
-        stop_sequences : typing.Optional[typing.Sequence[str]]
-            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
-
-        k : typing.Optional[int]
-            Ensures only the top `k` most likely tokens are considered for generation at each step.
-            Defaults to `0`, min value of `0`, max value of `500`.
-
-
-        p : typing.Optional[float]
-            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
-            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
-
-
-        frequency_penalty : typing.Optional[float]
-            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        presence_penalty : typing.Optional[float]
-            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
-
-            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
-
-            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
-
-
-        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]
-            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
-
-            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
-
-            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
-
-        raw_prompting : typing.Optional[bool]
-            When enabled, the user's prompt will be sent to the model without any pre-processing.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        Generation
-
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.generate(
-                prompt="Please explain to me how LLMs work",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/generate",
-            method="POST",
-            json={
-                "prompt": prompt,
-                "model": model,
-                "num_generations": num_generations,
-                "max_tokens": max_tokens,
-                "truncate": truncate,
-                "temperature": temperature,
-                "seed": seed,
-                "preset": preset,
-                "end_sequences": end_sequences,
-                "stop_sequences": stop_sequences,
-                "k": k,
-                "p": p,
-                "frequency_penalty": frequency_penalty,
-                "presence_penalty": presence_penalty,
-                "return_likelihoods": return_likelihoods,
-                "raw_prompting": raw_prompting,
-                "stream": False,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    Generation,
-                    construct_type(
-                        type_=Generation,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def embed(
-        self,
-        *,
-        texts: typing.Optional[typing.Sequence[str]] = OMIT,
-        images: typing.Optional[typing.Sequence[str]] = OMIT,
-        model: typing.Optional[str] = OMIT,
-        input_type: typing.Optional[EmbedInputType] = OMIT,
-        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
-        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> EmbedResponse:
-        """
-        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
-
-        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
-
-        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
-
-        Parameters
-        ----------
-        texts : typing.Optional[typing.Sequence[str]]
-            An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
-
-        images : typing.Optional[typing.Sequence[str]]
-            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
-
-            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
-
-        model : typing.Optional[str]
-            Defaults to embed-english-v2.0
-
-            The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
-
-            Available models and corresponding embedding dimensions:
-
-            * `embed-english-v3.0`  1024
-            * `embed-multilingual-v3.0`  1024
-            * `embed-english-light-v3.0`  384
-            * `embed-multilingual-light-v3.0`  384
-
-            * `embed-english-v2.0`  4096
-            * `embed-english-light-v2.0`  1024
-            * `embed-multilingual-v2.0`  768
-
-        input_type : typing.Optional[EmbedInputType]
-
-        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
-            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
-
-            * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
-            * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
-            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
-            * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
-            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
-
-        truncate : typing.Optional[EmbedRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        EmbedResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.embed()
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/embed",
-            method="POST",
-            json={
-                "texts": texts,
-                "images": images,
-                "model": model,
-                "input_type": input_type,
-                "embedding_types": embedding_types,
-                "truncate": truncate,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    EmbedResponse,
-                    construct_type(
-                        type_=EmbedResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def rerank(
-        self,
-        *,
-        query: str,
-        documents: typing.Sequence[RerankRequestDocumentsItem],
-        model: typing.Optional[str] = OMIT,
-        top_n: typing.Optional[int] = OMIT,
-        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,
-        return_documents: typing.Optional[bool] = OMIT,
-        max_chunks_per_doc: typing.Optional[int] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> RerankResponse:
-        """
-        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
-
-        Parameters
-        ----------
-        query : str
-            The search query
-
-        documents : typing.Sequence[RerankRequestDocumentsItem]
-            A list of document objects or strings to rerank.
-            If a document is provided the text fields is required and all other fields will be preserved in the response.
-
-            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
-
-            We recommend a maximum of 1,000 documents for optimal endpoint performance.
-
-        model : typing.Optional[str]
-            The identifier of the model to use, one of : `rerank-english-v3.0`, `rerank-multilingual-v3.0`, `rerank-english-v2.0`, `rerank-multilingual-v2.0`
-
-        top_n : typing.Optional[int]
-            The number of most relevant documents or indices to return, defaults to the length of the documents
-
-        rank_fields : typing.Optional[typing.Sequence[str]]
-            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.
-
-        return_documents : typing.Optional[bool]
-            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
-            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
-
-        max_chunks_per_doc : typing.Optional[int]
-            The maximum number of chunks to produce internally from a document
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        RerankResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.rerank(
-                query="query",
-                documents=["documents"],
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/rerank",
-            method="POST",
-            json={
-                "model": model,
-                "query": query,
-                "documents": convert_and_respect_annotation_metadata(
-                    object_=documents, annotation=typing.Sequence[RerankRequestDocumentsItem], direction="write"
-                ),
-                "top_n": top_n,
-                "rank_fields": rank_fields,
-                "return_documents": return_documents,
-                "max_chunks_per_doc": max_chunks_per_doc,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    RerankResponse,
-                    construct_type(
-                        type_=RerankResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def classify(
-        self,
-        *,
-        inputs: typing.Sequence[str],
-        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,
-        model: typing.Optional[str] = OMIT,
-        preset: typing.Optional[str] = OMIT,
-        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ClassifyResponse:
-        """
-        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
-        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
-
-        Parameters
-        ----------
-        inputs : typing.Sequence[str]
-            A list of up to 96 texts to be classified. Each one must be a non-empty string.
-            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
-            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
-
-        examples : typing.Optional[typing.Sequence[ClassifyExample]]
-            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
-            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
-
-        model : typing.Optional[str]
-            The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
-
-        preset : typing.Optional[str]
-            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
-
-        truncate : typing.Optional[ClassifyRequestTruncate]
-            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ClassifyResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.classify(
-                inputs=["inputs"],
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/classify",
-            method="POST",
-            json={
-                "inputs": inputs,
-                "examples": convert_and_respect_annotation_metadata(
-                    object_=examples, annotation=typing.Sequence[ClassifyExample], direction="write"
-                ),
-                "model": model,
-                "preset": preset,
-                "truncate": truncate,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ClassifyResponse,
-                    construct_type(
-                        type_=ClassifyResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def summarize(
-        self,
-        *,
-        text: str,
-        length: typing.Optional[SummarizeRequestLength] = OMIT,
-        format: typing.Optional[SummarizeRequestFormat] = OMIT,
-        model: typing.Optional[str] = OMIT,
-        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
-        temperature: typing.Optional[float] = OMIT,
-        additional_command: typing.Optional[str] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> SummarizeResponse:
-        """
-        <Warning>
-        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-        </Warning>
-        Generates a summary in English for a given text.
-
-        Parameters
-        ----------
-        text : str
-            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
-
-        length : typing.Optional[SummarizeRequestLength]
-            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
-
-        format : typing.Optional[SummarizeRequestFormat]
-            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
-
-        model : typing.Optional[str]
-            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
-
-        extractiveness : typing.Optional[SummarizeRequestExtractiveness]
-            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
-
-        temperature : typing.Optional[float]
-            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
-
-        additional_command : typing.Optional[str]
-            A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        SummarizeResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.summarize(
-                text="text",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/summarize",
-            method="POST",
-            json={
-                "text": text,
-                "length": length,
-                "format": format,
-                "model": model,
-                "extractiveness": extractiveness,
-                "temperature": temperature,
-                "additional_command": additional_command,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    SummarizeResponse,
-                    construct_type(
-                        type_=SummarizeResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def tokenize(
-        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None
-    ) -> TokenizeResponse:
-        """
-        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
-
-        Parameters
-        ----------
-        text : str
-            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
-
-        model : str
-            An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        TokenizeResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.tokenize(
-                text="tokenize me! :D",
-                model="command",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/tokenize",
-            method="POST",
-            json={
-                "text": text,
-                "model": model,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    TokenizeResponse,
-                    construct_type(
-                        type_=TokenizeResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def detokenize(
-        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None
-    ) -> DetokenizeResponse:
-        """
-        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
-
-        Parameters
-        ----------
-        tokens : typing.Sequence[int]
-            The list of tokens to be detokenized.
-
-        model : str
-            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DetokenizeResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.detokenize(
-                tokens=[1],
-                model="model",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/detokenize",
-            method="POST",
-            json={
-                "tokens": tokens,
-                "model": model,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DetokenizeResponse,
-                    construct_type(
-                        type_=DetokenizeResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def check_api_key(self, *, request_options: typing.Optional[RequestOptions] = None) -> CheckApiKeyResponse:
-        """
-        Checks that the api key in the Authorization header is valid and active
-
-        Parameters
-        ----------
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        CheckApiKeyResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.check_api_key()
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/check-api-key",
-            method="POST",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    CheckApiKeyResponse,
-                    construct_type(
-                        type_=CheckApiKeyResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+        self.v1 = AsyncV1Client(client_wrapper=self._client_wrapper)
+        self.v2 = AsyncV2Client(client_wrapper=self._client_wrapper)
 
 
 def _get_base_url(*, base_url: typing.Optional[str] = None, environment: ClientEnvironment) -> str:
diff --git a/src/cohere/connectors/__init__.py b/src/cohere/connectors/__init__.py
deleted file mode 100644
index f3ea2659b..000000000
--- a/src/cohere/connectors/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
diff --git a/src/cohere/core/client_wrapper.py b/src/cohere/core/client_wrapper.py
index 54cd9787c..bad0ef86f 100644
--- a/src/cohere/core/client_wrapper.py
+++ b/src/cohere/core/client_wrapper.py
@@ -24,7 +24,7 @@ def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cohere",
-            "X-Fern-SDK-Version": "5.9.2",
+            "X-Fern-SDK-Version": "5.10.0",
         }
         if self._client_name is not None:
             headers["X-Client-Name"] = self._client_name
diff --git a/src/cohere/datasets/__init__.py b/src/cohere/datasets/__init__.py
deleted file mode 100644
index 3778ddcf3..000000000
--- a/src/cohere/datasets/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from .types import (
-    DatasetsCreateResponse,
-    DatasetsCreateResponseDatasetPartsItem,
-    DatasetsGetResponse,
-    DatasetsGetUsageResponse,
-    DatasetsListResponse,
-)
-
-__all__ = [
-    "DatasetsCreateResponse",
-    "DatasetsCreateResponseDatasetPartsItem",
-    "DatasetsGetResponse",
-    "DatasetsGetUsageResponse",
-    "DatasetsListResponse",
-]
diff --git a/src/cohere/datasets/client.py b/src/cohere/datasets/client.py
deleted file mode 100644
index ca9038a4c..000000000
--- a/src/cohere/datasets/client.py
+++ /dev/null
@@ -1,1846 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-from ..core.client_wrapper import SyncClientWrapper
-import datetime as dt
-from ..types.dataset_validation_status import DatasetValidationStatus
-from ..core.request_options import RequestOptions
-from .types.datasets_list_response import DatasetsListResponse
-from ..core.datetime_utils import serialize_datetime
-from ..core.unchecked_base_model import construct_type
-from ..errors.bad_request_error import BadRequestError
-from ..errors.unauthorized_error import UnauthorizedError
-from ..errors.forbidden_error import ForbiddenError
-from ..errors.not_found_error import NotFoundError
-from ..errors.unprocessable_entity_error import UnprocessableEntityError
-from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
-from ..errors.too_many_requests_error import TooManyRequestsError
-from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
-from ..errors.client_closed_request_error import ClientClosedRequestError
-from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
-from ..errors.internal_server_error import InternalServerError
-from ..errors.not_implemented_error import NotImplementedError
-from ..types.not_implemented_error_body import NotImplementedErrorBody
-from ..errors.service_unavailable_error import ServiceUnavailableError
-from ..errors.gateway_timeout_error import GatewayTimeoutError
-from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
-from json.decoder import JSONDecodeError
-from ..core.api_error import ApiError
-from ..types.dataset_type import DatasetType
-from .. import core
-from .types.datasets_create_response import DatasetsCreateResponse
-from .types.datasets_get_usage_response import DatasetsGetUsageResponse
-from .types.datasets_get_response import DatasetsGetResponse
-from ..core.jsonable_encoder import jsonable_encoder
-from ..core.client_wrapper import AsyncClientWrapper
-
-# this is used as the default value for optional parameters
-OMIT = typing.cast(typing.Any, ...)
-
-
-class DatasetsClient:
-    def __init__(self, *, client_wrapper: SyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    def list(
-        self,
-        *,
-        dataset_type: typing.Optional[str] = None,
-        before: typing.Optional[dt.datetime] = None,
-        after: typing.Optional[dt.datetime] = None,
-        limit: typing.Optional[float] = None,
-        offset: typing.Optional[float] = None,
-        validation_status: typing.Optional[DatasetValidationStatus] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> DatasetsListResponse:
-        """
-        List datasets that have been created.
-
-        Parameters
-        ----------
-        dataset_type : typing.Optional[str]
-            optional filter by dataset type
-
-        before : typing.Optional[dt.datetime]
-            optional filter before a date
-
-        after : typing.Optional[dt.datetime]
-            optional filter after a date
-
-        limit : typing.Optional[float]
-            optional limit to number of results
-
-        offset : typing.Optional[float]
-            optional offset to start of results
-
-        validation_status : typing.Optional[DatasetValidationStatus]
-            optional filter by validation status
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsListResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.datasets.list()
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/datasets",
-            method="GET",
-            params={
-                "datasetType": dataset_type,
-                "before": serialize_datetime(before) if before is not None else None,
-                "after": serialize_datetime(after) if after is not None else None,
-                "limit": limit,
-                "offset": offset,
-                "validationStatus": validation_status,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsListResponse,
-                    construct_type(
-                        type_=DatasetsListResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def create(
-        self,
-        *,
-        name: str,
-        type: DatasetType,
-        data: core.File,
-        keep_original_file: typing.Optional[bool] = None,
-        skip_malformed_input: typing.Optional[bool] = None,
-        keep_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,
-        optional_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,
-        text_separator: typing.Optional[str] = None,
-        csv_delimiter: typing.Optional[str] = None,
-        dry_run: typing.Optional[bool] = None,
-        eval_data: typing.Optional[core.File] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> DatasetsCreateResponse:
-        """
-        Create a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.
-
-        Parameters
-        ----------
-        name : str
-            The name of the uploaded dataset.
-
-        type : DatasetType
-            The dataset type, which is used to validate the data. Valid types are `embed-input`, `reranker-finetune-input`, `single-label-classification-finetune-input`, `chat-finetune-input`, and `multi-label-classification-finetune-input`.
-
-        data : core.File
-            See core.File for more documentation
-
-        keep_original_file : typing.Optional[bool]
-            Indicates if the original file should be stored.
-
-        skip_malformed_input : typing.Optional[bool]
-            Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.
-
-        keep_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]
-            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.
-
-        optional_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]
-            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.
-
-        text_separator : typing.Optional[str]
-            Raw .txt uploads will be split into entries using the text_separator value.
-
-        csv_delimiter : typing.Optional[str]
-            The delimiter used for .csv uploads.
-
-        dry_run : typing.Optional[bool]
-            flag to enable dry_run mode
-
-        eval_data : typing.Optional[core.File]
-            See core.File for more documentation
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsCreateResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.datasets.create(
-            name="name",
-            type="embed-input",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/datasets",
-            method="POST",
-            params={
-                "name": name,
-                "type": type,
-                "keep_original_file": keep_original_file,
-                "skip_malformed_input": skip_malformed_input,
-                "keep_fields": keep_fields,
-                "optional_fields": optional_fields,
-                "text_separator": text_separator,
-                "csv_delimiter": csv_delimiter,
-                "dry_run": dry_run,
-            },
-            data={},
-            files={
-                "data": data,
-                "eval_data": eval_data,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsCreateResponse,
-                    construct_type(
-                        type_=DatasetsCreateResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def get_usage(self, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetUsageResponse:
-        """
-        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.
-
-        Parameters
-        ----------
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsGetUsageResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.datasets.get_usage()
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/datasets/usage",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsGetUsageResponse,
-                    construct_type(
-                        type_=DatasetsGetUsageResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetResponse:
-        """
-        Retrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.
-
-        Parameters
-        ----------
-        id : str
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsGetResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.datasets.get(
-            id="id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/datasets/{jsonable_encoder(id)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsGetResponse,
-                    construct_type(
-                        type_=DatasetsGetResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def delete(
-        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
-    ) -> typing.Dict[str, typing.Optional[typing.Any]]:
-        """
-        Delete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.
-
-        Parameters
-        ----------
-        id : str
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        typing.Dict[str, typing.Optional[typing.Any]]
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.datasets.delete(
-            id="id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/datasets/{jsonable_encoder(id)}",
-            method="DELETE",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    typing.Dict[str, typing.Optional[typing.Any]],
-                    construct_type(
-                        type_=typing.Dict[str, typing.Optional[typing.Any]],  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-
-class AsyncDatasetsClient:
-    def __init__(self, *, client_wrapper: AsyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    async def list(
-        self,
-        *,
-        dataset_type: typing.Optional[str] = None,
-        before: typing.Optional[dt.datetime] = None,
-        after: typing.Optional[dt.datetime] = None,
-        limit: typing.Optional[float] = None,
-        offset: typing.Optional[float] = None,
-        validation_status: typing.Optional[DatasetValidationStatus] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> DatasetsListResponse:
-        """
-        List datasets that have been created.
-
-        Parameters
-        ----------
-        dataset_type : typing.Optional[str]
-            optional filter by dataset type
-
-        before : typing.Optional[dt.datetime]
-            optional filter before a date
-
-        after : typing.Optional[dt.datetime]
-            optional filter after a date
-
-        limit : typing.Optional[float]
-            optional limit to number of results
-
-        offset : typing.Optional[float]
-            optional offset to start of results
-
-        validation_status : typing.Optional[DatasetValidationStatus]
-            optional filter by validation status
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsListResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.datasets.list()
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/datasets",
-            method="GET",
-            params={
-                "datasetType": dataset_type,
-                "before": serialize_datetime(before) if before is not None else None,
-                "after": serialize_datetime(after) if after is not None else None,
-                "limit": limit,
-                "offset": offset,
-                "validationStatus": validation_status,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsListResponse,
-                    construct_type(
-                        type_=DatasetsListResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def create(
-        self,
-        *,
-        name: str,
-        type: DatasetType,
-        data: core.File,
-        keep_original_file: typing.Optional[bool] = None,
-        skip_malformed_input: typing.Optional[bool] = None,
-        keep_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,
-        optional_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,
-        text_separator: typing.Optional[str] = None,
-        csv_delimiter: typing.Optional[str] = None,
-        dry_run: typing.Optional[bool] = None,
-        eval_data: typing.Optional[core.File] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> DatasetsCreateResponse:
-        """
-        Create a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.
-
-        Parameters
-        ----------
-        name : str
-            The name of the uploaded dataset.
-
-        type : DatasetType
-            The dataset type, which is used to validate the data. Valid types are `embed-input`, `reranker-finetune-input`, `single-label-classification-finetune-input`, `chat-finetune-input`, and `multi-label-classification-finetune-input`.
-
-        data : core.File
-            See core.File for more documentation
-
-        keep_original_file : typing.Optional[bool]
-            Indicates if the original file should be stored.
-
-        skip_malformed_input : typing.Optional[bool]
-            Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.
-
-        keep_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]
-            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.
-
-        optional_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]
-            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.
-
-        text_separator : typing.Optional[str]
-            Raw .txt uploads will be split into entries using the text_separator value.
-
-        csv_delimiter : typing.Optional[str]
-            The delimiter used for .csv uploads.
-
-        dry_run : typing.Optional[bool]
-            flag to enable dry_run mode
-
-        eval_data : typing.Optional[core.File]
-            See core.File for more documentation
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsCreateResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.datasets.create(
-                name="name",
-                type="embed-input",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/datasets",
-            method="POST",
-            params={
-                "name": name,
-                "type": type,
-                "keep_original_file": keep_original_file,
-                "skip_malformed_input": skip_malformed_input,
-                "keep_fields": keep_fields,
-                "optional_fields": optional_fields,
-                "text_separator": text_separator,
-                "csv_delimiter": csv_delimiter,
-                "dry_run": dry_run,
-            },
-            data={},
-            files={
-                "data": data,
-                "eval_data": eval_data,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsCreateResponse,
-                    construct_type(
-                        type_=DatasetsCreateResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def get_usage(self, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetUsageResponse:
-        """
-        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.
-
-        Parameters
-        ----------
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsGetUsageResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.datasets.get_usage()
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/datasets/usage",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsGetUsageResponse,
-                    construct_type(
-                        type_=DatasetsGetUsageResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetResponse:
-        """
-        Retrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.
-
-        Parameters
-        ----------
-        id : str
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DatasetsGetResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.datasets.get(
-                id="id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/datasets/{jsonable_encoder(id)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DatasetsGetResponse,
-                    construct_type(
-                        type_=DatasetsGetResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def delete(
-        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
-    ) -> typing.Dict[str, typing.Optional[typing.Any]]:
-        """
-        Delete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.
-
-        Parameters
-        ----------
-        id : str
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        typing.Dict[str, typing.Optional[typing.Any]]
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.datasets.delete(
-                id="id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/datasets/{jsonable_encoder(id)}",
-            method="DELETE",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    typing.Dict[str, typing.Optional[typing.Any]],
-                    construct_type(
-                        type_=typing.Dict[str, typing.Optional[typing.Any]],  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
diff --git a/src/cohere/datasets/types/__init__.py b/src/cohere/datasets/types/__init__.py
deleted file mode 100644
index 5d0f602a2..000000000
--- a/src/cohere/datasets/types/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from .datasets_create_response import DatasetsCreateResponse
-from .datasets_create_response_dataset_parts_item import DatasetsCreateResponseDatasetPartsItem
-from .datasets_get_response import DatasetsGetResponse
-from .datasets_get_usage_response import DatasetsGetUsageResponse
-from .datasets_list_response import DatasetsListResponse
-
-__all__ = [
-    "DatasetsCreateResponse",
-    "DatasetsCreateResponseDatasetPartsItem",
-    "DatasetsGetResponse",
-    "DatasetsGetUsageResponse",
-    "DatasetsListResponse",
-]
diff --git a/src/cohere/datasets/types/datasets_create_response_dataset_parts_item.py b/src/cohere/datasets/types/datasets_create_response_dataset_parts_item.py
deleted file mode 100644
index 4204e9c01..000000000
--- a/src/cohere/datasets/types/datasets_create_response_dataset_parts_item.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class DatasetsCreateResponseDatasetPartsItem(UncheckedBaseModel):
-    """
-    the underlying files that make up the dataset
-    """
-
-    name: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    the name of the dataset part
-    """
-
-    num_rows: typing.Optional[float] = pydantic.Field(default=None)
-    """
-    the number of rows in the dataset part
-    """
-
-    samples: typing.Optional[typing.List[str]] = None
-    part_kind: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    the kind of dataset part
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/embed_jobs/__init__.py b/src/cohere/embed_jobs/__init__.py
deleted file mode 100644
index 923ab6de0..000000000
--- a/src/cohere/embed_jobs/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from .types import CreateEmbedJobRequestTruncate
-
-__all__ = ["CreateEmbedJobRequestTruncate"]
diff --git a/src/cohere/embed_jobs/client.py b/src/cohere/embed_jobs/client.py
deleted file mode 100644
index a667c7549..000000000
--- a/src/cohere/embed_jobs/client.py
+++ /dev/null
@@ -1,1421 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-from ..core.client_wrapper import SyncClientWrapper
-from ..core.request_options import RequestOptions
-from ..types.list_embed_job_response import ListEmbedJobResponse
-from ..core.unchecked_base_model import construct_type
-from ..errors.bad_request_error import BadRequestError
-from ..errors.unauthorized_error import UnauthorizedError
-from ..errors.forbidden_error import ForbiddenError
-from ..errors.not_found_error import NotFoundError
-from ..errors.unprocessable_entity_error import UnprocessableEntityError
-from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
-from ..errors.too_many_requests_error import TooManyRequestsError
-from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
-from ..errors.client_closed_request_error import ClientClosedRequestError
-from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
-from ..errors.internal_server_error import InternalServerError
-from ..errors.not_implemented_error import NotImplementedError
-from ..types.not_implemented_error_body import NotImplementedErrorBody
-from ..errors.service_unavailable_error import ServiceUnavailableError
-from ..errors.gateway_timeout_error import GatewayTimeoutError
-from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
-from json.decoder import JSONDecodeError
-from ..core.api_error import ApiError
-from ..types.embed_input_type import EmbedInputType
-from ..types.embedding_type import EmbeddingType
-from .types.create_embed_job_request_truncate import CreateEmbedJobRequestTruncate
-from ..types.create_embed_job_response import CreateEmbedJobResponse
-from ..types.embed_job import EmbedJob
-from ..core.jsonable_encoder import jsonable_encoder
-from ..core.client_wrapper import AsyncClientWrapper
-
-# this is used as the default value for optional parameters
-OMIT = typing.cast(typing.Any, ...)
-
-
-class EmbedJobsClient:
-    def __init__(self, *, client_wrapper: SyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    def list(self, *, request_options: typing.Optional[RequestOptions] = None) -> ListEmbedJobResponse:
-        """
-        The list embed job endpoint allows users to view all embed jobs history for that specific user.
-
-        Parameters
-        ----------
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListEmbedJobResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.embed_jobs.list()
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/embed-jobs",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListEmbedJobResponse,
-                    construct_type(
-                        type_=ListEmbedJobResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def create(
-        self,
-        *,
-        model: str,
-        dataset_id: str,
-        input_type: EmbedInputType,
-        name: typing.Optional[str] = OMIT,
-        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
-        truncate: typing.Optional[CreateEmbedJobRequestTruncate] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> CreateEmbedJobResponse:
-        """
-        This API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.
-
-        Parameters
-        ----------
-        model : str
-            ID of the embedding model.
-
-            Available models and corresponding embedding dimensions:
-
-            - `embed-english-v3.0` : 1024
-            - `embed-multilingual-v3.0` : 1024
-            - `embed-english-light-v3.0` : 384
-            - `embed-multilingual-light-v3.0` : 384
-
-
-        dataset_id : str
-            ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`
-
-        input_type : EmbedInputType
-
-        name : typing.Optional[str]
-            The name of the embed job.
-
-        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
-            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
-
-            * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
-            * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
-            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
-            * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
-            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
-
-        truncate : typing.Optional[CreateEmbedJobRequestTruncate]
-            One of `START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        CreateEmbedJobResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.embed_jobs.create(
-            model="model",
-            dataset_id="dataset_id",
-            input_type="search_document",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/embed-jobs",
-            method="POST",
-            json={
-                "model": model,
-                "dataset_id": dataset_id,
-                "input_type": input_type,
-                "name": name,
-                "embedding_types": embedding_types,
-                "truncate": truncate,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    CreateEmbedJobResponse,
-                    construct_type(
-                        type_=CreateEmbedJobResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EmbedJob:
-        """
-        This API retrieves the details about an embed job started by the same user.
-
-        Parameters
-        ----------
-        id : str
-            The ID of the embed job to retrieve.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        EmbedJob
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.embed_jobs.get(
-            id="id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/embed-jobs/{jsonable_encoder(id)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    EmbedJob,
-                    construct_type(
-                        type_=EmbedJob,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
-        """
-        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.
-
-        Parameters
-        ----------
-        id : str
-            The ID of the embed job to cancel.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        None
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.embed_jobs.cancel(
-            id="id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/embed-jobs/{jsonable_encoder(id)}/cancel",
-            method="POST",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-
-class AsyncEmbedJobsClient:
-    def __init__(self, *, client_wrapper: AsyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    async def list(self, *, request_options: typing.Optional[RequestOptions] = None) -> ListEmbedJobResponse:
-        """
-        The list embed job endpoint allows users to view all embed jobs history for that specific user.
-
-        Parameters
-        ----------
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListEmbedJobResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.embed_jobs.list()
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/embed-jobs",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListEmbedJobResponse,
-                    construct_type(
-                        type_=ListEmbedJobResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def create(
-        self,
-        *,
-        model: str,
-        dataset_id: str,
-        input_type: EmbedInputType,
-        name: typing.Optional[str] = OMIT,
-        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
-        truncate: typing.Optional[CreateEmbedJobRequestTruncate] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> CreateEmbedJobResponse:
-        """
-        This API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.
-
-        Parameters
-        ----------
-        model : str
-            ID of the embedding model.
-
-            Available models and corresponding embedding dimensions:
-
-            - `embed-english-v3.0` : 1024
-            - `embed-multilingual-v3.0` : 1024
-            - `embed-english-light-v3.0` : 384
-            - `embed-multilingual-light-v3.0` : 384
-
-
-        dataset_id : str
-            ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`
-
-        input_type : EmbedInputType
-
-        name : typing.Optional[str]
-            The name of the embed job.
-
-        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
-            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
-
-            * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
-            * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
-            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
-            * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
-            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
-
-        truncate : typing.Optional[CreateEmbedJobRequestTruncate]
-            One of `START|END` to specify how the API will handle inputs longer than the maximum token length.
-
-            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
-
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        CreateEmbedJobResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.embed_jobs.create(
-                model="model",
-                dataset_id="dataset_id",
-                input_type="search_document",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/embed-jobs",
-            method="POST",
-            json={
-                "model": model,
-                "dataset_id": dataset_id,
-                "input_type": input_type,
-                "name": name,
-                "embedding_types": embedding_types,
-                "truncate": truncate,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    CreateEmbedJobResponse,
-                    construct_type(
-                        type_=CreateEmbedJobResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EmbedJob:
-        """
-        This API retrieves the details about an embed job started by the same user.
-
-        Parameters
-        ----------
-        id : str
-            The ID of the embed job to retrieve.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        EmbedJob
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.embed_jobs.get(
-                id="id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/embed-jobs/{jsonable_encoder(id)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    EmbedJob,
-                    construct_type(
-                        type_=EmbedJob,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
-        """
-        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.
-
-        Parameters
-        ----------
-        id : str
-            The ID of the embed job to cancel.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        None
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.embed_jobs.cancel(
-                id="id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/embed-jobs/{jsonable_encoder(id)}/cancel",
-            method="POST",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
diff --git a/src/cohere/embed_jobs/types/__init__.py b/src/cohere/embed_jobs/types/__init__.py
deleted file mode 100644
index ccdc7056a..000000000
--- a/src/cohere/embed_jobs/types/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from .create_embed_job_request_truncate import CreateEmbedJobRequestTruncate
-
-__all__ = ["CreateEmbedJobRequestTruncate"]
diff --git a/src/cohere/embed_jobs/types/create_embed_job_request_truncate.py b/src/cohere/embed_jobs/types/create_embed_job_request_truncate.py
deleted file mode 100644
index 30c12cb05..000000000
--- a/src/cohere/embed_jobs/types/create_embed_job_request_truncate.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-CreateEmbedJobRequestTruncate = typing.Union[typing.Literal["START", "END"], typing.Any]
diff --git a/src/cohere/finetuning/client.py b/src/cohere/finetuning/client.py
deleted file mode 100644
index 60f0ce961..000000000
--- a/src/cohere/finetuning/client.py
+++ /dev/null
@@ -1,1873 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-from ..core.client_wrapper import SyncClientWrapper
-from ..core.request_options import RequestOptions
-from .finetuning.types.list_finetuned_models_response import ListFinetunedModelsResponse
-from ..core.unchecked_base_model import construct_type
-from ..errors.bad_request_error import BadRequestError
-from ..errors.unauthorized_error import UnauthorizedError
-from ..errors.forbidden_error import ForbiddenError
-from ..errors.not_found_error import NotFoundError
-from ..errors.internal_server_error import InternalServerError
-from ..errors.service_unavailable_error import ServiceUnavailableError
-from json.decoder import JSONDecodeError
-from ..core.api_error import ApiError
-from .finetuning.types.finetuned_model import FinetunedModel
-from .finetuning.types.create_finetuned_model_response import CreateFinetunedModelResponse
-from ..core.serialization import convert_and_respect_annotation_metadata
-from .finetuning.types.get_finetuned_model_response import GetFinetunedModelResponse
-from ..core.jsonable_encoder import jsonable_encoder
-from .finetuning.types.delete_finetuned_model_response import DeleteFinetunedModelResponse
-from .finetuning.types.settings import Settings
-from .finetuning.types.status import Status
-import datetime as dt
-from .finetuning.types.update_finetuned_model_response import UpdateFinetunedModelResponse
-from .finetuning.types.list_events_response import ListEventsResponse
-from .finetuning.types.list_training_step_metrics_response import ListTrainingStepMetricsResponse
-from ..core.client_wrapper import AsyncClientWrapper
-
-# this is used as the default value for optional parameters
-OMIT = typing.cast(typing.Any, ...)
-
-
-class FinetuningClient:
-    def __init__(self, *, client_wrapper: SyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    def list_finetuned_models(
-        self,
-        *,
-        page_size: typing.Optional[int] = None,
-        page_token: typing.Optional[str] = None,
-        order_by: typing.Optional[str] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListFinetunedModelsResponse:
-        """
-        Parameters
-        ----------
-        page_size : typing.Optional[int]
-            Maximum number of results to be returned by the server. If 0, defaults to 50.
-
-        page_token : typing.Optional[str]
-            Request a specific page of the list results.
-
-        order_by : typing.Optional[str]
-            Comma separated list of fields. For example: "created_at,name". The default
-            sorting order is ascending. To specify descending order for a field, append
-            " desc" to the field name. For example: "created_at desc,name".
-
-            Supported sorting fields:
-
-            - created_at (default)
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListFinetunedModelsResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.finetuning.list_finetuned_models()
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/finetuning/finetuned-models",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-                "order_by": order_by,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListFinetunedModelsResponse,
-                    construct_type(
-                        type_=ListFinetunedModelsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def create_finetuned_model(
-        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None
-    ) -> CreateFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        request : FinetunedModel
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        CreateFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-        from cohere.finetuning.finetuning import BaseModel, FinetunedModel, Settings
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.finetuning.create_finetuned_model(
-            request=FinetunedModel(
-                name="api-test",
-                settings=Settings(
-                    base_model=BaseModel(
-                        base_type="BASE_TYPE_CHAT",
-                    ),
-                    dataset_id="my-dataset-id",
-                ),
-            ),
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/finetuning/finetuned-models",
-            method="POST",
-            json=convert_and_respect_annotation_metadata(object_=request, annotation=FinetunedModel, direction="write"),
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    CreateFinetunedModelResponse,
-                    construct_type(
-                        type_=CreateFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def get_finetuned_model(
-        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
-    ) -> GetFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        id : str
-            The fine-tuned model ID.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        GetFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.finetuning.get_finetuned_model(
-            id="id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    GetFinetunedModelResponse,
-                    construct_type(
-                        type_=GetFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def delete_finetuned_model(
-        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
-    ) -> DeleteFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        id : str
-            The fine-tuned model ID.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DeleteFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.finetuning.delete_finetuned_model(
-            id="id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
-            method="DELETE",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DeleteFinetunedModelResponse,
-                    construct_type(
-                        type_=DeleteFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def update_finetuned_model(
-        self,
-        id: str,
-        *,
-        name: str,
-        settings: Settings,
-        creator_id: typing.Optional[str] = OMIT,
-        organization_id: typing.Optional[str] = OMIT,
-        status: typing.Optional[Status] = OMIT,
-        created_at: typing.Optional[dt.datetime] = OMIT,
-        updated_at: typing.Optional[dt.datetime] = OMIT,
-        completed_at: typing.Optional[dt.datetime] = OMIT,
-        last_used: typing.Optional[dt.datetime] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> UpdateFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        id : str
-            FinetunedModel ID.
-
-        name : str
-            FinetunedModel name (e.g. `foobar`).
-
-        settings : Settings
-            FinetunedModel settings such as dataset, hyperparameters...
-
-        creator_id : typing.Optional[str]
-            User ID of the creator.
-
-        organization_id : typing.Optional[str]
-            Organization ID.
-
-        status : typing.Optional[Status]
-            Current stage in the life-cycle of the fine-tuned model.
-
-        created_at : typing.Optional[dt.datetime]
-            Creation timestamp.
-
-        updated_at : typing.Optional[dt.datetime]
-            Latest update timestamp.
-
-        completed_at : typing.Optional[dt.datetime]
-            Timestamp for the completed fine-tuning.
-
-        last_used : typing.Optional[dt.datetime]
-            Timestamp for the latest request to this fine-tuned model.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        UpdateFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-        from cohere.finetuning.finetuning import BaseModel, Settings
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.finetuning.update_finetuned_model(
-            id="id",
-            name="name",
-            settings=Settings(
-                base_model=BaseModel(
-                    base_type="BASE_TYPE_UNSPECIFIED",
-                ),
-                dataset_id="dataset_id",
-            ),
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
-            method="PATCH",
-            json={
-                "name": name,
-                "creator_id": creator_id,
-                "organization_id": organization_id,
-                "settings": convert_and_respect_annotation_metadata(
-                    object_=settings, annotation=Settings, direction="write"
-                ),
-                "status": status,
-                "created_at": created_at,
-                "updated_at": updated_at,
-                "completed_at": completed_at,
-                "last_used": last_used,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    UpdateFinetunedModelResponse,
-                    construct_type(
-                        type_=UpdateFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def list_events(
-        self,
-        finetuned_model_id: str,
-        *,
-        page_size: typing.Optional[int] = None,
-        page_token: typing.Optional[str] = None,
-        order_by: typing.Optional[str] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListEventsResponse:
-        """
-        Parameters
-        ----------
-        finetuned_model_id : str
-            The parent fine-tuned model ID.
-
-        page_size : typing.Optional[int]
-            Maximum number of results to be returned by the server. If 0, defaults to 50.
-
-        page_token : typing.Optional[str]
-            Request a specific page of the list results.
-
-        order_by : typing.Optional[str]
-            Comma separated list of fields. For example: "created_at,name". The default
-            sorting order is ascending. To specify descending order for a field, append
-            " desc" to the field name. For example: "created_at desc,name".
-
-            Supported sorting fields:
-
-            - created_at (default)
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListEventsResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.finetuning.list_events(
-            finetuned_model_id="finetuned_model_id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/events",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-                "order_by": order_by,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListEventsResponse,
-                    construct_type(
-                        type_=ListEventsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def list_training_step_metrics(
-        self,
-        finetuned_model_id: str,
-        *,
-        page_size: typing.Optional[int] = None,
-        page_token: typing.Optional[str] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListTrainingStepMetricsResponse:
-        """
-        Parameters
-        ----------
-        finetuned_model_id : str
-            The parent fine-tuned model ID.
-
-        page_size : typing.Optional[int]
-            Maximum number of results to be returned by the server. If 0, defaults to 50.
-
-        page_token : typing.Optional[str]
-            Request a specific page of the list results.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListTrainingStepMetricsResponse
-            A successful response.
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.finetuning.list_training_step_metrics(
-            finetuned_model_id="finetuned_model_id",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/training-step-metrics",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListTrainingStepMetricsResponse,
-                    construct_type(
-                        type_=ListTrainingStepMetricsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-
-class AsyncFinetuningClient:
-    def __init__(self, *, client_wrapper: AsyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    async def list_finetuned_models(
-        self,
-        *,
-        page_size: typing.Optional[int] = None,
-        page_token: typing.Optional[str] = None,
-        order_by: typing.Optional[str] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListFinetunedModelsResponse:
-        """
-        Parameters
-        ----------
-        page_size : typing.Optional[int]
-            Maximum number of results to be returned by the server. If 0, defaults to 50.
-
-        page_token : typing.Optional[str]
-            Request a specific page of the list results.
-
-        order_by : typing.Optional[str]
-            Comma separated list of fields. For example: "created_at,name". The default
-            sorting order is ascending. To specify descending order for a field, append
-            " desc" to the field name. For example: "created_at desc,name".
-
-            Supported sorting fields:
-
-            - created_at (default)
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListFinetunedModelsResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.finetuning.list_finetuned_models()
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/finetuning/finetuned-models",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-                "order_by": order_by,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListFinetunedModelsResponse,
-                    construct_type(
-                        type_=ListFinetunedModelsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def create_finetuned_model(
-        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None
-    ) -> CreateFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        request : FinetunedModel
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        CreateFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-        from cohere.finetuning.finetuning import BaseModel, FinetunedModel, Settings
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.finetuning.create_finetuned_model(
-                request=FinetunedModel(
-                    name="api-test",
-                    settings=Settings(
-                        base_model=BaseModel(
-                            base_type="BASE_TYPE_CHAT",
-                        ),
-                        dataset_id="my-dataset-id",
-                    ),
-                ),
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/finetuning/finetuned-models",
-            method="POST",
-            json=convert_and_respect_annotation_metadata(object_=request, annotation=FinetunedModel, direction="write"),
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    CreateFinetunedModelResponse,
-                    construct_type(
-                        type_=CreateFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def get_finetuned_model(
-        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
-    ) -> GetFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        id : str
-            The fine-tuned model ID.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        GetFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.finetuning.get_finetuned_model(
-                id="id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    GetFinetunedModelResponse,
-                    construct_type(
-                        type_=GetFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def delete_finetuned_model(
-        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
-    ) -> DeleteFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        id : str
-            The fine-tuned model ID.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        DeleteFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.finetuning.delete_finetuned_model(
-                id="id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
-            method="DELETE",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DeleteFinetunedModelResponse,
-                    construct_type(
-                        type_=DeleteFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def update_finetuned_model(
-        self,
-        id: str,
-        *,
-        name: str,
-        settings: Settings,
-        creator_id: typing.Optional[str] = OMIT,
-        organization_id: typing.Optional[str] = OMIT,
-        status: typing.Optional[Status] = OMIT,
-        created_at: typing.Optional[dt.datetime] = OMIT,
-        updated_at: typing.Optional[dt.datetime] = OMIT,
-        completed_at: typing.Optional[dt.datetime] = OMIT,
-        last_used: typing.Optional[dt.datetime] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> UpdateFinetunedModelResponse:
-        """
-        Parameters
-        ----------
-        id : str
-            FinetunedModel ID.
-
-        name : str
-            FinetunedModel name (e.g. `foobar`).
-
-        settings : Settings
-            FinetunedModel settings such as dataset, hyperparameters...
-
-        creator_id : typing.Optional[str]
-            User ID of the creator.
-
-        organization_id : typing.Optional[str]
-            Organization ID.
-
-        status : typing.Optional[Status]
-            Current stage in the life-cycle of the fine-tuned model.
-
-        created_at : typing.Optional[dt.datetime]
-            Creation timestamp.
-
-        updated_at : typing.Optional[dt.datetime]
-            Latest update timestamp.
-
-        completed_at : typing.Optional[dt.datetime]
-            Timestamp for the completed fine-tuning.
-
-        last_used : typing.Optional[dt.datetime]
-            Timestamp for the latest request to this fine-tuned model.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        UpdateFinetunedModelResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-        from cohere.finetuning.finetuning import BaseModel, Settings
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.finetuning.update_finetuned_model(
-                id="id",
-                name="name",
-                settings=Settings(
-                    base_model=BaseModel(
-                        base_type="BASE_TYPE_UNSPECIFIED",
-                    ),
-                    dataset_id="dataset_id",
-                ),
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
-            method="PATCH",
-            json={
-                "name": name,
-                "creator_id": creator_id,
-                "organization_id": organization_id,
-                "settings": convert_and_respect_annotation_metadata(
-                    object_=settings, annotation=Settings, direction="write"
-                ),
-                "status": status,
-                "created_at": created_at,
-                "updated_at": updated_at,
-                "completed_at": completed_at,
-                "last_used": last_used,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    UpdateFinetunedModelResponse,
-                    construct_type(
-                        type_=UpdateFinetunedModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def list_events(
-        self,
-        finetuned_model_id: str,
-        *,
-        page_size: typing.Optional[int] = None,
-        page_token: typing.Optional[str] = None,
-        order_by: typing.Optional[str] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListEventsResponse:
-        """
-        Parameters
-        ----------
-        finetuned_model_id : str
-            The parent fine-tuned model ID.
-
-        page_size : typing.Optional[int]
-            Maximum number of results to be returned by the server. If 0, defaults to 50.
-
-        page_token : typing.Optional[str]
-            Request a specific page of the list results.
-
-        order_by : typing.Optional[str]
-            Comma separated list of fields. For example: "created_at,name". The default
-            sorting order is ascending. To specify descending order for a field, append
-            " desc" to the field name. For example: "created_at desc,name".
-
-            Supported sorting fields:
-
-            - created_at (default)
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListEventsResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.finetuning.list_events(
-                finetuned_model_id="finetuned_model_id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/events",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-                "order_by": order_by,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListEventsResponse,
-                    construct_type(
-                        type_=ListEventsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def list_training_step_metrics(
-        self,
-        finetuned_model_id: str,
-        *,
-        page_size: typing.Optional[int] = None,
-        page_token: typing.Optional[str] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListTrainingStepMetricsResponse:
-        """
-        Parameters
-        ----------
-        finetuned_model_id : str
-            The parent fine-tuned model ID.
-
-        page_size : typing.Optional[int]
-            Maximum number of results to be returned by the server. If 0, defaults to 50.
-
-        page_token : typing.Optional[str]
-            Request a specific page of the list results.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListTrainingStepMetricsResponse
-            A successful response.
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.finetuning.list_training_step_metrics(
-                finetuned_model_id="finetuned_model_id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/training-step-metrics",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListTrainingStepMetricsResponse,
-                    construct_type(
-                        type_=ListTrainingStepMetricsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
diff --git a/src/cohere/models/__init__.py b/src/cohere/models/__init__.py
deleted file mode 100644
index f3ea2659b..000000000
--- a/src/cohere/models/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
diff --git a/src/cohere/models/client.py b/src/cohere/models/client.py
deleted file mode 100644
index 8022ad214..000000000
--- a/src/cohere/models/client.py
+++ /dev/null
@@ -1,729 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.client_wrapper import SyncClientWrapper
-import typing
-from ..core.request_options import RequestOptions
-from ..types.get_model_response import GetModelResponse
-from ..core.jsonable_encoder import jsonable_encoder
-from ..core.unchecked_base_model import construct_type
-from ..errors.bad_request_error import BadRequestError
-from ..errors.unauthorized_error import UnauthorizedError
-from ..errors.forbidden_error import ForbiddenError
-from ..errors.not_found_error import NotFoundError
-from ..errors.unprocessable_entity_error import UnprocessableEntityError
-from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
-from ..errors.too_many_requests_error import TooManyRequestsError
-from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
-from ..errors.client_closed_request_error import ClientClosedRequestError
-from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
-from ..errors.internal_server_error import InternalServerError
-from ..errors.not_implemented_error import NotImplementedError
-from ..types.not_implemented_error_body import NotImplementedErrorBody
-from ..errors.service_unavailable_error import ServiceUnavailableError
-from ..errors.gateway_timeout_error import GatewayTimeoutError
-from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
-from json.decoder import JSONDecodeError
-from ..core.api_error import ApiError
-from ..types.compatible_endpoint import CompatibleEndpoint
-from ..types.list_models_response import ListModelsResponse
-from ..core.client_wrapper import AsyncClientWrapper
-
-
-class ModelsClient:
-    def __init__(self, *, client_wrapper: SyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    def get(self, model: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetModelResponse:
-        """
-        Returns the details of a model, provided its name.
-
-        Parameters
-        ----------
-        model : str
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        GetModelResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.models.get(
-            model="command-r",
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            f"v1/models/{jsonable_encoder(model)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    GetModelResponse,
-                    construct_type(
-                        type_=GetModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def list(
-        self,
-        *,
-        page_size: typing.Optional[float] = None,
-        page_token: typing.Optional[str] = None,
-        endpoint: typing.Optional[CompatibleEndpoint] = None,
-        default_only: typing.Optional[bool] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListModelsResponse:
-        """
-        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
-
-        Parameters
-        ----------
-        page_size : typing.Optional[float]
-            Maximum number of models to include in a page
-            Defaults to `20`, min value of `1`, max value of `1000`.
-
-        page_token : typing.Optional[str]
-            Page token provided in the `next_page_token` field of a previous response.
-
-        endpoint : typing.Optional[CompatibleEndpoint]
-            When provided, filters the list of models to only those that are compatible with the specified endpoint.
-
-        default_only : typing.Optional[bool]
-            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListModelsResponse
-            OK
-
-        Examples
-        --------
-        from cohere import Client
-
-        client = Client(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-        client.models.list()
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v1/models",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-                "endpoint": endpoint,
-                "default_only": default_only,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListModelsResponse,
-                    construct_type(
-                        type_=ListModelsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-
-class AsyncModelsClient:
-    def __init__(self, *, client_wrapper: AsyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    async def get(self, model: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetModelResponse:
-        """
-        Returns the details of a model, provided its name.
-
-        Parameters
-        ----------
-        model : str
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        GetModelResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.models.get(
-                model="command-r",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"v1/models/{jsonable_encoder(model)}",
-            method="GET",
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    GetModelResponse,
-                    construct_type(
-                        type_=GetModelResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def list(
-        self,
-        *,
-        page_size: typing.Optional[float] = None,
-        page_token: typing.Optional[str] = None,
-        endpoint: typing.Optional[CompatibleEndpoint] = None,
-        default_only: typing.Optional[bool] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListModelsResponse:
-        """
-        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
-
-        Parameters
-        ----------
-        page_size : typing.Optional[float]
-            Maximum number of models to include in a page
-            Defaults to `20`, min value of `1`, max value of `1000`.
-
-        page_token : typing.Optional[str]
-            Page token provided in the `next_page_token` field of a previous response.
-
-        endpoint : typing.Optional[CompatibleEndpoint]
-            When provided, filters the list of models to only those that are compatible with the specified endpoint.
-
-        default_only : typing.Optional[bool]
-            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ListModelsResponse
-            OK
-
-        Examples
-        --------
-        import asyncio
-
-        from cohere import AsyncClient
-
-        client = AsyncClient(
-            client_name="YOUR_CLIENT_NAME",
-            token="YOUR_TOKEN",
-        )
-
-
-        async def main() -> None:
-            await client.models.list()
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/models",
-            method="GET",
-            params={
-                "page_size": page_size,
-                "page_token": page_token,
-                "endpoint": endpoint,
-                "default_only": default_only,
-            },
-            request_options=request_options,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ListModelsResponse,
-                    construct_type(
-                        type_=ListModelsResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 401:
-                raise UnauthorizedError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        UnprocessableEntityErrorBody,
-                        construct_type(
-                            type_=UnprocessableEntityErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 429:
-                raise TooManyRequestsError(
-                    typing.cast(
-                        TooManyRequestsErrorBody,
-                        construct_type(
-                            type_=TooManyRequestsErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 499:
-                raise ClientClosedRequestError(
-                    typing.cast(
-                        ClientClosedRequestErrorBody,
-                        construct_type(
-                            type_=ClientClosedRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 500:
-                raise InternalServerError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 501:
-                raise NotImplementedError(
-                    typing.cast(
-                        NotImplementedErrorBody,
-                        construct_type(
-                            type_=NotImplementedErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 503:
-                raise ServiceUnavailableError(
-                    typing.cast(
-                        typing.Optional[typing.Any],
-                        construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 504:
-                raise GatewayTimeoutError(
-                    typing.cast(
-                        GatewayTimeoutErrorBody,
-                        construct_type(
-                            type_=GatewayTimeoutErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
diff --git a/src/cohere/types/__init__.py b/src/cohere/types/__init__.py
index ea6d61264..71595edcc 100644
--- a/src/cohere/types/__init__.py
+++ b/src/cohere/types/__init__.py
@@ -1,279 +1,7 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from .api_meta import ApiMeta
-from .api_meta_api_version import ApiMetaApiVersion
-from .api_meta_billed_units import ApiMetaBilledUnits
-from .api_meta_tokens import ApiMetaTokens
-from .auth_token_type import AuthTokenType
-from .chat_citation import ChatCitation
-from .chat_citation_generation_event import ChatCitationGenerationEvent
-from .chat_connector import ChatConnector
-from .chat_data_metrics import ChatDataMetrics
 from .chat_document import ChatDocument
-from .chat_message import ChatMessage
-from .chat_request_citation_quality import ChatRequestCitationQuality
-from .chat_request_connectors_search_options import ChatRequestConnectorsSearchOptions
-from .chat_request_prompt_truncation import ChatRequestPromptTruncation
-from .chat_request_safety_mode import ChatRequestSafetyMode
-from .chat_search_queries_generation_event import ChatSearchQueriesGenerationEvent
-from .chat_search_query import ChatSearchQuery
-from .chat_search_result import ChatSearchResult
-from .chat_search_result_connector import ChatSearchResultConnector
-from .chat_search_results_event import ChatSearchResultsEvent
-from .chat_stream_end_event import ChatStreamEndEvent
-from .chat_stream_end_event_finish_reason import ChatStreamEndEventFinishReason
-from .chat_stream_event import ChatStreamEvent
-from .chat_stream_request_citation_quality import ChatStreamRequestCitationQuality
-from .chat_stream_request_connectors_search_options import ChatStreamRequestConnectorsSearchOptions
-from .chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
-from .chat_stream_request_safety_mode import ChatStreamRequestSafetyMode
-from .chat_stream_start_event import ChatStreamStartEvent
-from .chat_text_generation_event import ChatTextGenerationEvent
-from .chat_tool_calls_chunk_event import ChatToolCallsChunkEvent
-from .chat_tool_calls_generation_event import ChatToolCallsGenerationEvent
-from .check_api_key_response import CheckApiKeyResponse
-from .classify_data_metrics import ClassifyDataMetrics
-from .classify_example import ClassifyExample
-from .classify_request_truncate import ClassifyRequestTruncate
-from .classify_response import ClassifyResponse
-from .classify_response_classifications_item import ClassifyResponseClassificationsItem
-from .classify_response_classifications_item_classification_type import (
-    ClassifyResponseClassificationsItemClassificationType,
-)
-from .classify_response_classifications_item_labels_value import ClassifyResponseClassificationsItemLabelsValue
-from .client_closed_request_error_body import ClientClosedRequestErrorBody
-from .compatible_endpoint import CompatibleEndpoint
-from .connector import Connector
-from .connector_auth_status import ConnectorAuthStatus
-from .connector_o_auth import ConnectorOAuth
-from .create_connector_o_auth import CreateConnectorOAuth
-from .create_connector_response import CreateConnectorResponse
-from .create_connector_service_auth import CreateConnectorServiceAuth
-from .create_embed_job_response import CreateEmbedJobResponse
-from .dataset import Dataset
-from .dataset_part import DatasetPart
-from .dataset_type import DatasetType
-from .dataset_validation_status import DatasetValidationStatus
-from .delete_connector_response import DeleteConnectorResponse
-from .detokenize_response import DetokenizeResponse
-from .embed_by_type_response import EmbedByTypeResponse
-from .embed_by_type_response_embeddings import EmbedByTypeResponseEmbeddings
-from .embed_floats_response import EmbedFloatsResponse
-from .embed_input_type import EmbedInputType
-from .embed_job import EmbedJob
-from .embed_job_status import EmbedJobStatus
-from .embed_job_truncate import EmbedJobTruncate
-from .embed_request_truncate import EmbedRequestTruncate
-from .embed_response import EmbedResponse, EmbedResponse_EmbeddingsByType, EmbedResponse_EmbeddingsFloats
-from .embedding_type import EmbeddingType
-from .finetune_dataset_metrics import FinetuneDatasetMetrics
-from .finish_reason import FinishReason
-from .gateway_timeout_error_body import GatewayTimeoutErrorBody
-from .generate_request_return_likelihoods import GenerateRequestReturnLikelihoods
-from .generate_request_truncate import GenerateRequestTruncate
-from .generate_stream_end import GenerateStreamEnd
-from .generate_stream_end_response import GenerateStreamEndResponse
-from .generate_stream_error import GenerateStreamError
-from .generate_stream_event import GenerateStreamEvent
-from .generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods
-from .generate_stream_request_truncate import GenerateStreamRequestTruncate
-from .generate_stream_text import GenerateStreamText
-from .generate_streamed_response import (
-    GenerateStreamedResponse,
-    GenerateStreamedResponse_StreamEnd,
-    GenerateStreamedResponse_StreamError,
-    GenerateStreamedResponse_TextGeneration,
-)
-from .generation import Generation
-from .get_connector_response import GetConnectorResponse
-from .get_model_response import GetModelResponse
-from .json_response_format import JsonResponseFormat
-from .label_metric import LabelMetric
-from .list_connectors_response import ListConnectorsResponse
-from .list_embed_job_response import ListEmbedJobResponse
-from .list_models_response import ListModelsResponse
-from .message import Message, Message_Chatbot, Message_System, Message_Tool, Message_User
-from .metrics import Metrics
-from .metrics_embed_data import MetricsEmbedData
-from .metrics_embed_data_fields_item import MetricsEmbedDataFieldsItem
-from .non_streamed_chat_response import NonStreamedChatResponse
-from .not_implemented_error_body import NotImplementedErrorBody
-from .o_auth_authorize_response import OAuthAuthorizeResponse
-from .parse_info import ParseInfo
+from .components_schemas_text_content import ComponentsSchemasTextContent
 from .rerank_document import RerankDocument
-from .rerank_request_documents_item import RerankRequestDocumentsItem
-from .rerank_response import RerankResponse
-from .rerank_response_results_item import RerankResponseResultsItem
-from .rerank_response_results_item_document import RerankResponseResultsItemDocument
-from .reranker_data_metrics import RerankerDataMetrics
-from .response_format import ResponseFormat, ResponseFormat_JsonObject, ResponseFormat_Text
-from .single_generation import SingleGeneration
-from .single_generation_in_stream import SingleGenerationInStream
-from .single_generation_token_likelihoods_item import SingleGenerationTokenLikelihoodsItem
-from .streamed_chat_response import (
-    StreamedChatResponse,
-    StreamedChatResponse_CitationGeneration,
-    StreamedChatResponse_SearchQueriesGeneration,
-    StreamedChatResponse_SearchResults,
-    StreamedChatResponse_StreamEnd,
-    StreamedChatResponse_StreamStart,
-    StreamedChatResponse_TextGeneration,
-    StreamedChatResponse_ToolCallsChunk,
-    StreamedChatResponse_ToolCallsGeneration,
-)
-from .summarize_request_extractiveness import SummarizeRequestExtractiveness
-from .summarize_request_format import SummarizeRequestFormat
-from .summarize_request_length import SummarizeRequestLength
-from .summarize_response import SummarizeResponse
-from .text_response_format import TextResponseFormat
-from .tokenize_response import TokenizeResponse
-from .too_many_requests_error_body import TooManyRequestsErrorBody
-from .tool import Tool
-from .tool_call import ToolCall
-from .tool_call_delta import ToolCallDelta
-from .tool_message import ToolMessage
-from .tool_parameter_definitions_value import ToolParameterDefinitionsValue
-from .tool_result import ToolResult
-from .unprocessable_entity_error_body import UnprocessableEntityErrorBody
-from .update_connector_response import UpdateConnectorResponse
 
-__all__ = [
-    "ApiMeta",
-    "ApiMetaApiVersion",
-    "ApiMetaBilledUnits",
-    "ApiMetaTokens",
-    "AuthTokenType",
-    "ChatCitation",
-    "ChatCitationGenerationEvent",
-    "ChatConnector",
-    "ChatDataMetrics",
-    "ChatDocument",
-    "ChatMessage",
-    "ChatRequestCitationQuality",
-    "ChatRequestConnectorsSearchOptions",
-    "ChatRequestPromptTruncation",
-    "ChatRequestSafetyMode",
-    "ChatSearchQueriesGenerationEvent",
-    "ChatSearchQuery",
-    "ChatSearchResult",
-    "ChatSearchResultConnector",
-    "ChatSearchResultsEvent",
-    "ChatStreamEndEvent",
-    "ChatStreamEndEventFinishReason",
-    "ChatStreamEvent",
-    "ChatStreamRequestCitationQuality",
-    "ChatStreamRequestConnectorsSearchOptions",
-    "ChatStreamRequestPromptTruncation",
-    "ChatStreamRequestSafetyMode",
-    "ChatStreamStartEvent",
-    "ChatTextGenerationEvent",
-    "ChatToolCallsChunkEvent",
-    "ChatToolCallsGenerationEvent",
-    "CheckApiKeyResponse",
-    "ClassifyDataMetrics",
-    "ClassifyExample",
-    "ClassifyRequestTruncate",
-    "ClassifyResponse",
-    "ClassifyResponseClassificationsItem",
-    "ClassifyResponseClassificationsItemClassificationType",
-    "ClassifyResponseClassificationsItemLabelsValue",
-    "ClientClosedRequestErrorBody",
-    "CompatibleEndpoint",
-    "Connector",
-    "ConnectorAuthStatus",
-    "ConnectorOAuth",
-    "CreateConnectorOAuth",
-    "CreateConnectorResponse",
-    "CreateConnectorServiceAuth",
-    "CreateEmbedJobResponse",
-    "Dataset",
-    "DatasetPart",
-    "DatasetType",
-    "DatasetValidationStatus",
-    "DeleteConnectorResponse",
-    "DetokenizeResponse",
-    "EmbedByTypeResponse",
-    "EmbedByTypeResponseEmbeddings",
-    "EmbedFloatsResponse",
-    "EmbedInputType",
-    "EmbedJob",
-    "EmbedJobStatus",
-    "EmbedJobTruncate",
-    "EmbedRequestTruncate",
-    "EmbedResponse",
-    "EmbedResponse_EmbeddingsByType",
-    "EmbedResponse_EmbeddingsFloats",
-    "EmbeddingType",
-    "FinetuneDatasetMetrics",
-    "FinishReason",
-    "GatewayTimeoutErrorBody",
-    "GenerateRequestReturnLikelihoods",
-    "GenerateRequestTruncate",
-    "GenerateStreamEnd",
-    "GenerateStreamEndResponse",
-    "GenerateStreamError",
-    "GenerateStreamEvent",
-    "GenerateStreamRequestReturnLikelihoods",
-    "GenerateStreamRequestTruncate",
-    "GenerateStreamText",
-    "GenerateStreamedResponse",
-    "GenerateStreamedResponse_StreamEnd",
-    "GenerateStreamedResponse_StreamError",
-    "GenerateStreamedResponse_TextGeneration",
-    "Generation",
-    "GetConnectorResponse",
-    "GetModelResponse",
-    "JsonResponseFormat",
-    "LabelMetric",
-    "ListConnectorsResponse",
-    "ListEmbedJobResponse",
-    "ListModelsResponse",
-    "Message",
-    "Message_Chatbot",
-    "Message_System",
-    "Message_Tool",
-    "Message_User",
-    "Metrics",
-    "MetricsEmbedData",
-    "MetricsEmbedDataFieldsItem",
-    "NonStreamedChatResponse",
-    "NotImplementedErrorBody",
-    "OAuthAuthorizeResponse",
-    "ParseInfo",
-    "RerankDocument",
-    "RerankRequestDocumentsItem",
-    "RerankResponse",
-    "RerankResponseResultsItem",
-    "RerankResponseResultsItemDocument",
-    "RerankerDataMetrics",
-    "ResponseFormat",
-    "ResponseFormat_JsonObject",
-    "ResponseFormat_Text",
-    "SingleGeneration",
-    "SingleGenerationInStream",
-    "SingleGenerationTokenLikelihoodsItem",
-    "StreamedChatResponse",
-    "StreamedChatResponse_CitationGeneration",
-    "StreamedChatResponse_SearchQueriesGeneration",
-    "StreamedChatResponse_SearchResults",
-    "StreamedChatResponse_StreamEnd",
-    "StreamedChatResponse_StreamStart",
-    "StreamedChatResponse_TextGeneration",
-    "StreamedChatResponse_ToolCallsChunk",
-    "StreamedChatResponse_ToolCallsGeneration",
-    "SummarizeRequestExtractiveness",
-    "SummarizeRequestFormat",
-    "SummarizeRequestLength",
-    "SummarizeResponse",
-    "TextResponseFormat",
-    "TokenizeResponse",
-    "TooManyRequestsErrorBody",
-    "Tool",
-    "ToolCall",
-    "ToolCallDelta",
-    "ToolMessage",
-    "ToolParameterDefinitionsValue",
-    "ToolResult",
-    "UnprocessableEntityErrorBody",
-    "UpdateConnectorResponse",
-]
+__all__ = ["ChatDocument", "ComponentsSchemasTextContent", "RerankDocument"]
diff --git a/src/cohere/types/chat_data_metrics.py b/src/cohere/types/chat_data_metrics.py
deleted file mode 100644
index fba215409..000000000
--- a/src/cohere/types/chat_data_metrics.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class ChatDataMetrics(UncheckedBaseModel):
-    num_train_turns: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The sum of all turns of valid train examples.
-    """
-
-    num_eval_turns: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The sum of all turns of valid eval examples.
-    """
-
-    preamble: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    The preamble of this dataset.
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/chat_stream_event.py b/src/cohere/types/chat_stream_event.py
deleted file mode 100644
index 5c21ba71f..000000000
--- a/src/cohere/types/chat_stream_event.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import typing
-import pydantic
-
-
-class ChatStreamEvent(UncheckedBaseModel):
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/classify_data_metrics.py b/src/cohere/types/classify_data_metrics.py
deleted file mode 100644
index 7f045655e..000000000
--- a/src/cohere/types/classify_data_metrics.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .label_metric import LabelMetric
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-
-
-class ClassifyDataMetrics(UncheckedBaseModel):
-    label_metrics: typing.Optional[typing.List[LabelMetric]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/components_schemas_text_content.py b/src/cohere/types/components_schemas_text_content.py
new file mode 100644
index 000000000..669400431
--- /dev/null
+++ b/src/cohere/types/components_schemas_text_content.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ..v2.v2.types.text_content import TextContent
+
+ComponentsSchemasTextContent = TextContent
diff --git a/src/cohere/types/dataset.py b/src/cohere/types/dataset.py
deleted file mode 100644
index c297f2955..000000000
--- a/src/cohere/types/dataset.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import pydantic
-import datetime as dt
-from .dataset_type import DatasetType
-from .dataset_validation_status import DatasetValidationStatus
-import typing
-import typing_extensions
-from ..core.serialization import FieldMetadata
-from .dataset_part import DatasetPart
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class Dataset(UncheckedBaseModel):
-    id: str = pydantic.Field()
-    """
-    The dataset ID
-    """
-
-    name: str = pydantic.Field()
-    """
-    The name of the dataset
-    """
-
-    created_at: dt.datetime = pydantic.Field()
-    """
-    The creation date
-    """
-
-    updated_at: dt.datetime = pydantic.Field()
-    """
-    The last update date
-    """
-
-    dataset_type: DatasetType
-    validation_status: DatasetValidationStatus
-    validation_error: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    Errors found during validation
-    """
-
-    schema_: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="schema")] = pydantic.Field(
-        default=None
-    )
-    """
-    the avro schema of the dataset
-    """
-
-    required_fields: typing.Optional[typing.List[str]] = None
-    preserve_fields: typing.Optional[typing.List[str]] = None
-    dataset_parts: typing.Optional[typing.List[DatasetPart]] = pydantic.Field(default=None)
-    """
-    the underlying files that make up the dataset
-    """
-
-    validation_warnings: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
-    """
-    warnings found during validation
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/dataset_part.py b/src/cohere/types/dataset_part.py
deleted file mode 100644
index 9f6a62fb8..000000000
--- a/src/cohere/types/dataset_part.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import pydantic
-import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class DatasetPart(UncheckedBaseModel):
-    id: str = pydantic.Field()
-    """
-    The dataset part ID
-    """
-
-    name: str = pydantic.Field()
-    """
-    The name of the dataset part
-    """
-
-    url: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    The download url of the file
-    """
-
-    index: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The index of the file
-    """
-
-    size_bytes: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The size of the file in bytes
-    """
-
-    num_rows: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The number of rows in the file
-    """
-
-    original_url: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    The download url of the original file
-    """
-
-    samples: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
-    """
-    The first few rows of the parsed file
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/dataset_type.py b/src/cohere/types/dataset_type.py
deleted file mode 100644
index b9e0c1441..000000000
--- a/src/cohere/types/dataset_type.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-DatasetType = typing.Union[
-    typing.Literal[
-        "embed-input",
-        "embed-result",
-        "cluster-result",
-        "cluster-outliers",
-        "reranker-finetune-input",
-        "single-label-classification-finetune-input",
-        "chat-finetune-input",
-        "multi-label-classification-finetune-input",
-    ],
-    typing.Any,
-]
diff --git a/src/cohere/types/dataset_validation_status.py b/src/cohere/types/dataset_validation_status.py
deleted file mode 100644
index 057e79892..000000000
--- a/src/cohere/types/dataset_validation_status.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-DatasetValidationStatus = typing.Union[
-    typing.Literal["unknown", "queued", "processing", "failed", "validated", "skipped"], typing.Any
-]
diff --git a/src/cohere/types/embed_job.py b/src/cohere/types/embed_job.py
deleted file mode 100644
index f1eea4c3d..000000000
--- a/src/cohere/types/embed_job.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import pydantic
-import typing
-from .embed_job_status import EmbedJobStatus
-import datetime as dt
-from .embed_job_truncate import EmbedJobTruncate
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class EmbedJob(UncheckedBaseModel):
-    job_id: str = pydantic.Field()
-    """
-    ID of the embed job
-    """
-
-    name: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    The name of the embed job
-    """
-
-    status: EmbedJobStatus = pydantic.Field()
-    """
-    The status of the embed job
-    """
-
-    created_at: dt.datetime = pydantic.Field()
-    """
-    The creation date of the embed job
-    """
-
-    input_dataset_id: str = pydantic.Field()
-    """
-    ID of the input dataset
-    """
-
-    output_dataset_id: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    ID of the resulting output dataset
-    """
-
-    model: str = pydantic.Field()
-    """
-    ID of the model used to embed
-    """
-
-    truncate: EmbedJobTruncate = pydantic.Field()
-    """
-    The truncation option used
-    """
-
-    meta: typing.Optional[ApiMeta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/embed_job_status.py b/src/cohere/types/embed_job_status.py
deleted file mode 100644
index 0fb8f727f..000000000
--- a/src/cohere/types/embed_job_status.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-EmbedJobStatus = typing.Union[typing.Literal["processing", "complete", "cancelling", "cancelled", "failed"], typing.Any]
diff --git a/src/cohere/types/embed_response.py b/src/cohere/types/embed_response.py
deleted file mode 100644
index d1d6083f1..000000000
--- a/src/cohere/types/embed_response.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-from .embed_by_type_response_embeddings import EmbedByTypeResponseEmbeddings
-import typing_extensions
-from ..core.unchecked_base_model import UnionMetadata
-
-
-class EmbedResponse_EmbeddingsFloats(UncheckedBaseModel):
-    response_type: typing.Literal["embeddings_floats"] = "embeddings_floats"
-    id: str
-    embeddings: typing.List[typing.List[float]]
-    texts: typing.List[str]
-    meta: typing.Optional[ApiMeta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class EmbedResponse_EmbeddingsByType(UncheckedBaseModel):
-    response_type: typing.Literal["embeddings_by_type"] = "embeddings_by_type"
-    id: str
-    embeddings: EmbedByTypeResponseEmbeddings
-    texts: typing.List[str]
-    meta: typing.Optional[ApiMeta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-EmbedResponse = typing_extensions.Annotated[
-    typing.Union[EmbedResponse_EmbeddingsFloats, EmbedResponse_EmbeddingsByType],
-    UnionMetadata(discriminant="response_type"),
-]
diff --git a/src/cohere/types/finetune_dataset_metrics.py b/src/cohere/types/finetune_dataset_metrics.py
deleted file mode 100644
index 7617091f6..000000000
--- a/src/cohere/types/finetune_dataset_metrics.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class FinetuneDatasetMetrics(UncheckedBaseModel):
-    trainable_token_count: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The number of tokens of valid examples that can be used for training.
-    """
-
-    total_examples: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The overall number of examples.
-    """
-
-    train_examples: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The number of training examples.
-    """
-
-    train_size_bytes: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The size in bytes of all training examples.
-    """
-
-    eval_examples: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    Number of evaluation examples.
-    """
-
-    eval_size_bytes: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The size in bytes of all eval examples.
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/generate_stream_event.py b/src/cohere/types/generate_stream_event.py
deleted file mode 100644
index 978131b12..000000000
--- a/src/cohere/types/generate_stream_event.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import typing
-import pydantic
-
-
-class GenerateStreamEvent(UncheckedBaseModel):
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/generate_streamed_response.py b/src/cohere/types/generate_streamed_response.py
deleted file mode 100644
index d6b74f4bc..000000000
--- a/src/cohere/types/generate_streamed_response.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-from .finish_reason import FinishReason
-from .generate_stream_end_response import GenerateStreamEndResponse
-import typing_extensions
-from ..core.unchecked_base_model import UnionMetadata
-
-
-class GenerateStreamedResponse_TextGeneration(UncheckedBaseModel):
-    """
-    Response in content type stream when `stream` is `true` in the request parameters. Generation tokens are streamed with the GenerationStream response. The final response is of type GenerationFinalResponse.
-    """
-
-    event_type: typing.Literal["text-generation"] = "text-generation"
-    text: str
-    index: typing.Optional[int] = None
-    is_finished: bool
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class GenerateStreamedResponse_StreamEnd(UncheckedBaseModel):
-    """
-    Response in content type stream when `stream` is `true` in the request parameters. Generation tokens are streamed with the GenerationStream response. The final response is of type GenerationFinalResponse.
-    """
-
-    event_type: typing.Literal["stream-end"] = "stream-end"
-    is_finished: bool
-    finish_reason: typing.Optional[FinishReason] = None
-    response: GenerateStreamEndResponse
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class GenerateStreamedResponse_StreamError(UncheckedBaseModel):
-    """
-    Response in content type stream when `stream` is `true` in the request parameters. Generation tokens are streamed with the GenerationStream response. The final response is of type GenerationFinalResponse.
-    """
-
-    event_type: typing.Literal["stream-error"] = "stream-error"
-    index: typing.Optional[int] = None
-    is_finished: bool
-    finish_reason: FinishReason
-    err: str
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-GenerateStreamedResponse = typing_extensions.Annotated[
-    typing.Union[
-        GenerateStreamedResponse_TextGeneration,
-        GenerateStreamedResponse_StreamEnd,
-        GenerateStreamedResponse_StreamError,
-    ],
-    UnionMetadata(discriminant="event_type"),
-]
diff --git a/src/cohere/types/label_metric.py b/src/cohere/types/label_metric.py
deleted file mode 100644
index bbc303f1e..000000000
--- a/src/cohere/types/label_metric.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class LabelMetric(UncheckedBaseModel):
-    total_examples: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    Total number of examples for this label
-    """
-
-    label: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    value of the label
-    """
-
-    samples: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
-    """
-    samples for this label
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/list_connectors_response.py b/src/cohere/types/list_connectors_response.py
deleted file mode 100644
index 4fc64ff9b..000000000
--- a/src/cohere/types/list_connectors_response.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .connector import Connector
-import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class ListConnectorsResponse(UncheckedBaseModel):
-    connectors: typing.List[Connector]
-    total_count: typing.Optional[float] = pydantic.Field(default=None)
-    """
-    Total number of connectors.
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/list_embed_job_response.py b/src/cohere/types/list_embed_job_response.py
deleted file mode 100644
index e2c1523f5..000000000
--- a/src/cohere/types/list_embed_job_response.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .embed_job import EmbedJob
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-
-
-class ListEmbedJobResponse(UncheckedBaseModel):
-    embed_jobs: typing.Optional[typing.List[EmbedJob]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/message.py b/src/cohere/types/message.py
deleted file mode 100644
index d4edc57e8..000000000
--- a/src/cohere/types/message.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .tool_call import ToolCall
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-from .tool_result import ToolResult
-import typing_extensions
-from ..core.unchecked_base_model import UnionMetadata
-
-
-class Message_Chatbot(UncheckedBaseModel):
-    role: typing.Literal["CHATBOT"] = "CHATBOT"
-    message: str
-    tool_calls: typing.Optional[typing.List[ToolCall]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class Message_System(UncheckedBaseModel):
-    role: typing.Literal["SYSTEM"] = "SYSTEM"
-    message: str
-    tool_calls: typing.Optional[typing.List[ToolCall]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class Message_User(UncheckedBaseModel):
-    role: typing.Literal["USER"] = "USER"
-    message: str
-    tool_calls: typing.Optional[typing.List[ToolCall]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class Message_Tool(UncheckedBaseModel):
-    role: typing.Literal["TOOL"] = "TOOL"
-    tool_results: typing.Optional[typing.List[ToolResult]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-Message = typing_extensions.Annotated[
-    typing.Union[Message_Chatbot, Message_System, Message_User, Message_Tool], UnionMetadata(discriminant="role")
-]
diff --git a/src/cohere/types/metrics.py b/src/cohere/types/metrics.py
deleted file mode 100644
index f4a177c63..000000000
--- a/src/cohere/types/metrics.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .finetune_dataset_metrics import FinetuneDatasetMetrics
-from .metrics_embed_data import MetricsEmbedData
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-
-
-class Metrics(UncheckedBaseModel):
-    finetune_dataset_metrics: typing.Optional[FinetuneDatasetMetrics] = None
-    embed_data: typing.Optional[MetricsEmbedData] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/metrics_embed_data.py b/src/cohere/types/metrics_embed_data.py
deleted file mode 100644
index f7a8d5033..000000000
--- a/src/cohere/types/metrics_embed_data.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .metrics_embed_data_fields_item import MetricsEmbedDataFieldsItem
-import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class MetricsEmbedData(UncheckedBaseModel):
-    fields: typing.Optional[typing.List[MetricsEmbedDataFieldsItem]] = pydantic.Field(default=None)
-    """
-    the fields in the dataset
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/metrics_embed_data_fields_item.py b/src/cohere/types/metrics_embed_data_fields_item.py
deleted file mode 100644
index 42f8f8eee..000000000
--- a/src/cohere/types/metrics_embed_data_fields_item.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class MetricsEmbedDataFieldsItem(UncheckedBaseModel):
-    name: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    the name of the field
-    """
-
-    count: typing.Optional[float] = pydantic.Field(default=None)
-    """
-    the number of times the field appears in the dataset
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/parse_info.py b/src/cohere/types/parse_info.py
deleted file mode 100644
index 6f93159b8..000000000
--- a/src/cohere/types/parse_info.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-
-
-class ParseInfo(UncheckedBaseModel):
-    separator: typing.Optional[str] = None
-    delimiter: typing.Optional[str] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/reranker_data_metrics.py b/src/cohere/types/reranker_data_metrics.py
deleted file mode 100644
index bb14b4fe3..000000000
--- a/src/cohere/types/reranker_data_metrics.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-
-
-class RerankerDataMetrics(UncheckedBaseModel):
-    num_train_queries: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The number of training queries.
-    """
-
-    num_train_relevant_passages: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The sum of all relevant passages of valid training examples.
-    """
-
-    num_train_hard_negatives: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The sum of all hard negatives of valid training examples.
-    """
-
-    num_eval_queries: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The number of evaluation queries.
-    """
-
-    num_eval_relevant_passages: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The sum of all relevant passages of valid eval examples.
-    """
-
-    num_eval_hard_negatives: typing.Optional[int] = pydantic.Field(default=None)
-    """
-    The sum of all hard negatives of valid eval examples.
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/response_format.py b/src/cohere/types/response_format.py
deleted file mode 100644
index 458244ff5..000000000
--- a/src/cohere/types/response_format.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-import typing_extensions
-from ..core.serialization import FieldMetadata
-from ..core.unchecked_base_model import UnionMetadata
-
-
-class ResponseFormat_Text(UncheckedBaseModel):
-    """
-    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R 03-2024](https://docs.cohere.com/docs/command-r), [Command R+ 04-2024](https://docs.cohere.com/docs/command-r-plus) and newer models.
-
-    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
-
-    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.
-
-    **Note**: When using `{ "type": "json_object" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _"Generate a JSON ..."_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.
-    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).
-    """
-
-    type: typing.Literal["text"] = "text"
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class ResponseFormat_JsonObject(UncheckedBaseModel):
-    """
-    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R 03-2024](https://docs.cohere.com/docs/command-r), [Command R+ 04-2024](https://docs.cohere.com/docs/command-r-plus) and newer models.
-
-    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
-
-    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.
-
-    **Note**: When using `{ "type": "json_object" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _"Generate a JSON ..."_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.
-    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).
-    """
-
-    type: typing.Literal["json_object"] = "json_object"
-    schema_: typing_extensions.Annotated[
-        typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]], FieldMetadata(alias="schema")
-    ] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-ResponseFormat = typing_extensions.Annotated[
-    typing.Union[ResponseFormat_Text, ResponseFormat_JsonObject], UnionMetadata(discriminant="type")
-]
diff --git a/src/cohere/types/streamed_chat_response.py b/src/cohere/types/streamed_chat_response.py
deleted file mode 100644
index 0576316a8..000000000
--- a/src/cohere/types/streamed_chat_response.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-from .chat_search_query import ChatSearchQuery
-from .chat_search_result import ChatSearchResult
-from .chat_document import ChatDocument
-from .chat_citation import ChatCitation
-from .tool_call import ToolCall
-from .chat_stream_end_event_finish_reason import ChatStreamEndEventFinishReason
-from .non_streamed_chat_response import NonStreamedChatResponse
-from .tool_call_delta import ToolCallDelta
-import typing_extensions
-from ..core.unchecked_base_model import UnionMetadata
-
-
-class StreamedChatResponse_StreamStart(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["stream-start"] = "stream-start"
-    generation_id: str
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse_SearchQueriesGeneration(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["search-queries-generation"] = "search-queries-generation"
-    search_queries: typing.List[ChatSearchQuery]
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse_SearchResults(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["search-results"] = "search-results"
-    search_results: typing.Optional[typing.List[ChatSearchResult]] = None
-    documents: typing.Optional[typing.List[ChatDocument]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse_TextGeneration(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["text-generation"] = "text-generation"
-    text: str
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse_CitationGeneration(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["citation-generation"] = "citation-generation"
-    citations: typing.List[ChatCitation]
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse_ToolCallsGeneration(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["tool-calls-generation"] = "tool-calls-generation"
-    text: typing.Optional[str] = None
-    tool_calls: typing.List[ToolCall]
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse_StreamEnd(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["stream-end"] = "stream-end"
-    finish_reason: ChatStreamEndEventFinishReason
-    response: NonStreamedChatResponse
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse_ToolCallsChunk(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    event_type: typing.Literal["tool-calls-chunk"] = "tool-calls-chunk"
-    tool_call_delta: ToolCallDelta
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-StreamedChatResponse = typing_extensions.Annotated[
-    typing.Union[
-        StreamedChatResponse_StreamStart,
-        StreamedChatResponse_SearchQueriesGeneration,
-        StreamedChatResponse_SearchResults,
-        StreamedChatResponse_TextGeneration,
-        StreamedChatResponse_CitationGeneration,
-        StreamedChatResponse_ToolCallsGeneration,
-        StreamedChatResponse_StreamEnd,
-        StreamedChatResponse_ToolCallsChunk,
-    ],
-    UnionMetadata(discriminant="event_type"),
-]
diff --git a/src/cohere/v1/__init__.py b/src/cohere/v1/__init__.py
new file mode 100644
index 000000000..53912f2c1
--- /dev/null
+++ b/src/cohere/v1/__init__.py
@@ -0,0 +1,236 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from .types import (
+    ApiMeta,
+    ApiMetaApiVersion,
+    ApiMetaBilledUnits,
+    ApiMetaTokens,
+    AuthTokenType,
+    ChatCitation,
+    ChatCitationGenerationEvent,
+    ChatConnector,
+    ChatMessage,
+    ChatSearchQueriesGenerationEvent,
+    ChatSearchQuery,
+    ChatSearchResult,
+    ChatSearchResultConnector,
+    ChatSearchResultsEvent,
+    ChatStreamEndEvent,
+    ChatStreamEndEventFinishReason,
+    ChatStreamEvent,
+    ChatStreamEventEventType,
+    ChatStreamStartEvent,
+    ChatTextGenerationEvent,
+    ChatToolCallsChunkEvent,
+    ChatToolCallsGenerationEvent,
+    ClassifyExample,
+    ClientClosedRequestErrorBody,
+    CompatibleEndpoint,
+    Connector,
+    ConnectorAuthStatus,
+    ConnectorOAuth,
+    CreateConnectorOAuth,
+    CreateConnectorResponse,
+    CreateConnectorServiceAuth,
+    DatasetsGetUsageResponse,
+    DeleteConnectorResponse,
+    EmbedByTypeResponse,
+    EmbedByTypeResponseEmbeddings,
+    EmbedFloatsResponse,
+    EmbedInputType,
+    EmbeddingType,
+    FinishReason,
+    GatewayTimeoutErrorBody,
+    GenerateStreamEnd,
+    GenerateStreamEndResponse,
+    GenerateStreamError,
+    GenerateStreamEvent,
+    GenerateStreamEventEventType,
+    GenerateStreamText,
+    GenerateStreamedResponse,
+    Generation,
+    GetModelResponse,
+    JsonResponseFormat,
+    ListModelsResponse,
+    Message,
+    NonStreamedChatResponse,
+    NotImplementedErrorBody,
+    OAuthAuthorizeResponse,
+    ResponseFormat,
+    SingleGeneration,
+    SingleGenerationInStream,
+    SingleGenerationTokenLikelihoodsItem,
+    StreamedChatResponse,
+    TextResponseFormat,
+    TooManyRequestsErrorBody,
+    Tool,
+    ToolCall,
+    ToolCallDelta,
+    ToolMessage,
+    ToolParameterDefinitionsValue,
+    ToolResult,
+    UnprocessableEntityErrorBody,
+    UpdateConnectorResponse,
+)
+from .errors import (
+    BadRequestError,
+    ClientClosedRequestError,
+    ForbiddenError,
+    GatewayTimeoutError,
+    InternalServerError,
+    NotFoundError,
+    NotImplementedError,
+    ServiceUnavailableError,
+    TooManyRequestsError,
+    UnauthorizedError,
+    UnprocessableEntityError,
+)
+from . import finetuning, v1
+from .v1 import (
+    ChatRequestCitationQuality,
+    ChatRequestConnectorsSearchOptions,
+    ChatRequestPromptTruncation,
+    ChatRequestSafetyMode,
+    ChatStreamRequestCitationQuality,
+    ChatStreamRequestConnectorsSearchOptions,
+    ChatStreamRequestPromptTruncation,
+    ChatStreamRequestSafetyMode,
+    CheckApiKeyResponse,
+    ClassifyRequestTruncate,
+    ClassifyResponse,
+    ClassifyResponseClassificationsItem,
+    ClassifyResponseClassificationsItemClassificationType,
+    ClassifyResponseClassificationsItemLabelsValue,
+    DetokenizeResponse,
+    EmbedRequestTruncate,
+    EmbedResponse,
+    GenerateRequestReturnLikelihoods,
+    GenerateRequestTruncate,
+    GenerateStreamRequestReturnLikelihoods,
+    GenerateStreamRequestTruncate,
+    RerankRequestDocumentsItem,
+    RerankResponse,
+    RerankResponseResultsItem,
+    RerankResponseResultsItemDocument,
+    SummarizeRequestExtractiveness,
+    SummarizeRequestFormat,
+    SummarizeRequestLength,
+    SummarizeResponse,
+    TokenizeResponse,
+)
+
+__all__ = [
+    "ApiMeta",
+    "ApiMetaApiVersion",
+    "ApiMetaBilledUnits",
+    "ApiMetaTokens",
+    "AuthTokenType",
+    "BadRequestError",
+    "ChatCitation",
+    "ChatCitationGenerationEvent",
+    "ChatConnector",
+    "ChatMessage",
+    "ChatRequestCitationQuality",
+    "ChatRequestConnectorsSearchOptions",
+    "ChatRequestPromptTruncation",
+    "ChatRequestSafetyMode",
+    "ChatSearchQueriesGenerationEvent",
+    "ChatSearchQuery",
+    "ChatSearchResult",
+    "ChatSearchResultConnector",
+    "ChatSearchResultsEvent",
+    "ChatStreamEndEvent",
+    "ChatStreamEndEventFinishReason",
+    "ChatStreamEvent",
+    "ChatStreamEventEventType",
+    "ChatStreamRequestCitationQuality",
+    "ChatStreamRequestConnectorsSearchOptions",
+    "ChatStreamRequestPromptTruncation",
+    "ChatStreamRequestSafetyMode",
+    "ChatStreamStartEvent",
+    "ChatTextGenerationEvent",
+    "ChatToolCallsChunkEvent",
+    "ChatToolCallsGenerationEvent",
+    "CheckApiKeyResponse",
+    "ClassifyExample",
+    "ClassifyRequestTruncate",
+    "ClassifyResponse",
+    "ClassifyResponseClassificationsItem",
+    "ClassifyResponseClassificationsItemClassificationType",
+    "ClassifyResponseClassificationsItemLabelsValue",
+    "ClientClosedRequestError",
+    "ClientClosedRequestErrorBody",
+    "CompatibleEndpoint",
+    "Connector",
+    "ConnectorAuthStatus",
+    "ConnectorOAuth",
+    "CreateConnectorOAuth",
+    "CreateConnectorResponse",
+    "CreateConnectorServiceAuth",
+    "DatasetsGetUsageResponse",
+    "DeleteConnectorResponse",
+    "DetokenizeResponse",
+    "EmbedByTypeResponse",
+    "EmbedByTypeResponseEmbeddings",
+    "EmbedFloatsResponse",
+    "EmbedInputType",
+    "EmbedRequestTruncate",
+    "EmbedResponse",
+    "EmbeddingType",
+    "FinishReason",
+    "ForbiddenError",
+    "GatewayTimeoutError",
+    "GatewayTimeoutErrorBody",
+    "GenerateRequestReturnLikelihoods",
+    "GenerateRequestTruncate",
+    "GenerateStreamEnd",
+    "GenerateStreamEndResponse",
+    "GenerateStreamError",
+    "GenerateStreamEvent",
+    "GenerateStreamEventEventType",
+    "GenerateStreamRequestReturnLikelihoods",
+    "GenerateStreamRequestTruncate",
+    "GenerateStreamText",
+    "GenerateStreamedResponse",
+    "Generation",
+    "GetModelResponse",
+    "InternalServerError",
+    "JsonResponseFormat",
+    "ListModelsResponse",
+    "Message",
+    "NonStreamedChatResponse",
+    "NotFoundError",
+    "NotImplementedError",
+    "NotImplementedErrorBody",
+    "OAuthAuthorizeResponse",
+    "RerankRequestDocumentsItem",
+    "RerankResponse",
+    "RerankResponseResultsItem",
+    "RerankResponseResultsItemDocument",
+    "ResponseFormat",
+    "ServiceUnavailableError",
+    "SingleGeneration",
+    "SingleGenerationInStream",
+    "SingleGenerationTokenLikelihoodsItem",
+    "StreamedChatResponse",
+    "SummarizeRequestExtractiveness",
+    "SummarizeRequestFormat",
+    "SummarizeRequestLength",
+    "SummarizeResponse",
+    "TextResponseFormat",
+    "TokenizeResponse",
+    "TooManyRequestsError",
+    "TooManyRequestsErrorBody",
+    "Tool",
+    "ToolCall",
+    "ToolCallDelta",
+    "ToolMessage",
+    "ToolParameterDefinitionsValue",
+    "ToolResult",
+    "UnauthorizedError",
+    "UnprocessableEntityError",
+    "UnprocessableEntityErrorBody",
+    "UpdateConnectorResponse",
+    "finetuning",
+    "v1",
+]
diff --git a/src/cohere/connectors/client.py b/src/cohere/v1/client.py
similarity index 73%
rename from src/cohere/connectors/client.py
rename to src/cohere/v1/client.py
index 0b29de18e..29be61be0 100644
--- a/src/cohere/connectors/client.py
+++ b/src/cohere/v1/client.py
@@ -2,70 +2,84 @@
 
 import typing
 from ..core.client_wrapper import SyncClientWrapper
+from .v1.client import V1Client as v1_v1_client_V1Client
+from .types.compatible_endpoint import CompatibleEndpoint
 from ..core.request_options import RequestOptions
-from ..types.list_connectors_response import ListConnectorsResponse
+from .types.list_models_response import ListModelsResponse
 from ..core.unchecked_base_model import construct_type
-from ..errors.bad_request_error import BadRequestError
-from ..errors.unauthorized_error import UnauthorizedError
-from ..errors.forbidden_error import ForbiddenError
-from ..errors.not_found_error import NotFoundError
-from ..errors.unprocessable_entity_error import UnprocessableEntityError
-from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
-from ..errors.too_many_requests_error import TooManyRequestsError
-from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
-from ..errors.client_closed_request_error import ClientClosedRequestError
-from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
-from ..errors.internal_server_error import InternalServerError
-from ..errors.not_implemented_error import NotImplementedError
-from ..types.not_implemented_error_body import NotImplementedErrorBody
-from ..errors.service_unavailable_error import ServiceUnavailableError
-from ..errors.gateway_timeout_error import GatewayTimeoutError
-from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
+from .errors.bad_request_error import BadRequestError
+from .errors.unauthorized_error import UnauthorizedError
+from .errors.forbidden_error import ForbiddenError
+from .errors.not_found_error import NotFoundError
+from .errors.unprocessable_entity_error import UnprocessableEntityError
+from .types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
+from .errors.too_many_requests_error import TooManyRequestsError
+from .types.too_many_requests_error_body import TooManyRequestsErrorBody
+from .errors.client_closed_request_error import ClientClosedRequestError
+from .types.client_closed_request_error_body import ClientClosedRequestErrorBody
+from .errors.internal_server_error import InternalServerError
+from .errors.not_implemented_error import NotImplementedError
+from .types.not_implemented_error_body import NotImplementedErrorBody
+from .errors.service_unavailable_error import ServiceUnavailableError
+from .errors.gateway_timeout_error import GatewayTimeoutError
+from .types.gateway_timeout_error_body import GatewayTimeoutErrorBody
 from json.decoder import JSONDecodeError
 from ..core.api_error import ApiError
-from ..types.create_connector_o_auth import CreateConnectorOAuth
-from ..types.create_connector_service_auth import CreateConnectorServiceAuth
-from ..types.create_connector_response import CreateConnectorResponse
+from .types.create_connector_o_auth import CreateConnectorOAuth
+from .types.create_connector_service_auth import CreateConnectorServiceAuth
+from .types.create_connector_response import CreateConnectorResponse
 from ..core.serialization import convert_and_respect_annotation_metadata
-from ..types.get_connector_response import GetConnectorResponse
+from .types.get_model_response import GetModelResponse
 from ..core.jsonable_encoder import jsonable_encoder
-from ..types.delete_connector_response import DeleteConnectorResponse
-from ..types.update_connector_response import UpdateConnectorResponse
-from ..types.o_auth_authorize_response import OAuthAuthorizeResponse
+from .types.datasets_get_usage_response import DatasetsGetUsageResponse
+from .types.delete_connector_response import DeleteConnectorResponse
+from .types.update_connector_response import UpdateConnectorResponse
+from .types.o_auth_authorize_response import OAuthAuthorizeResponse
 from ..core.client_wrapper import AsyncClientWrapper
+from .v1.client import AsyncV1Client as v1_v1_client_AsyncV1Client
 
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
 
 
-class ConnectorsClient:
+class V1Client:
     def __init__(self, *, client_wrapper: SyncClientWrapper):
         self._client_wrapper = client_wrapper
+        self.v1 = v1_v1_client_V1Client(client_wrapper=self._client_wrapper)
 
     def list(
         self,
         *,
-        limit: typing.Optional[float] = None,
-        offset: typing.Optional[float] = None,
+        page_size: typing.Optional[float] = None,
+        page_token: typing.Optional[str] = None,
+        endpoint: typing.Optional[CompatibleEndpoint] = None,
+        default_only: typing.Optional[bool] = None,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListConnectorsResponse:
+    ) -> ListModelsResponse:
         """
-        Returns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.
+        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
 
         Parameters
         ----------
-        limit : typing.Optional[float]
-            Maximum number of connectors to return [0, 100].
+        page_size : typing.Optional[float]
+            Maximum number of models to include in a page
+            Defaults to `20`, min value of `1`, max value of `1000`.
 
-        offset : typing.Optional[float]
-            Number of connectors to skip before returning results [0, inf].
+        page_token : typing.Optional[str]
+            Page token provided in the `next_page_token` field of a previous response.
+
+        endpoint : typing.Optional[CompatibleEndpoint]
+            When provided, filters the list of models to only those that are compatible with the specified endpoint.
+
+        default_only : typing.Optional[bool]
+            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        ListConnectorsResponse
+        ListModelsResponse
             OK
 
         Examples
@@ -76,23 +90,25 @@ def list(
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-        client.connectors.list()
+        client.v1.list()
         """
         _response = self._client_wrapper.httpx_client.request(
-            "v1/connectors",
+            "v1/models",
             method="GET",
             params={
-                "limit": limit,
-                "offset": offset,
+                "page_size": page_size,
+                "page_token": page_token,
+                "endpoint": endpoint,
+                "default_only": default_only,
             },
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    ListConnectorsResponse,
+                    ListModelsResponse,
                     construct_type(
-                        type_=ListConnectorsResponse,  # type: ignore
+                        type_=ListModelsResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -269,7 +285,7 @@ def create(
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-        client.connectors.create(
+        client.v1.create(
             name="name",
             url="url",
         )
@@ -418,21 +434,20 @@ def create(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetConnectorResponse:
+    def get(self, model: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetModelResponse:
         """
-        Retrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.
+        Returns the details of a model, provided its name.
 
         Parameters
         ----------
-        id : str
-            The ID of the connector to retrieve.
+        model : str
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        GetConnectorResponse
+        GetModelResponse
             OK
 
         Examples
@@ -443,21 +458,21 @@ def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = Non
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-        client.connectors.get(
-            id="id",
+        client.v1.get(
+            model="command-r",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"v1/connectors/{jsonable_encoder(id)}",
+            f"v1/models/{jsonable_encoder(model)}",
             method="GET",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    GetConnectorResponse,
+                    GetModelResponse,
                     construct_type(
-                        type_=GetConnectorResponse,  # type: ignore
+                        type_=GetModelResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -576,22 +591,21 @@ def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = Non
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> DeleteConnectorResponse:
+    def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
         """
-        Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.
+        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.
 
         Parameters
         ----------
         id : str
-            The ID of the connector to delete.
+            The ID of the embed job to cancel.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        DeleteConnectorResponse
-            OK
+        None
 
         Examples
         --------
@@ -601,24 +615,18 @@ def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] =
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-        client.connectors.delete(
+        client.v1.cancel(
             id="id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"v1/connectors/{jsonable_encoder(id)}",
-            method="DELETE",
+            f"v1/embed-jobs/{jsonable_encoder(id)}/cancel",
+            method="POST",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    DeleteConnectorResponse,
-                    construct_type(
-                        type_=DeleteConnectorResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
+                return
             if _response.status_code == 400:
                 raise BadRequestError(
                     typing.cast(
@@ -734,53 +742,19 @@ def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] =
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def update(
-        self,
-        id: str,
-        *,
-        name: typing.Optional[str] = OMIT,
-        url: typing.Optional[str] = OMIT,
-        excludes: typing.Optional[typing.Sequence[str]] = OMIT,
-        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,
-        active: typing.Optional[bool] = OMIT,
-        continue_on_failure: typing.Optional[bool] = OMIT,
-        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> UpdateConnectorResponse:
+    def get_usage(self, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetUsageResponse:
         """
-        Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.
+        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.
 
         Parameters
         ----------
-        id : str
-            The ID of the connector to update.
-
-        name : typing.Optional[str]
-            A human-readable name for the connector.
-
-        url : typing.Optional[str]
-            The URL of the connector that will be used to search for documents.
-
-        excludes : typing.Optional[typing.Sequence[str]]
-            A list of fields to exclude from the prompt (fields remain in the document).
-
-        oauth : typing.Optional[CreateConnectorOAuth]
-            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
-
-        active : typing.Optional[bool]
-
-        continue_on_failure : typing.Optional[bool]
-
-        service_auth : typing.Optional[CreateConnectorServiceAuth]
-            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        UpdateConnectorResponse
-            OK
+        DatasetsGetUsageResponse
+            A successful response.
 
         Examples
         --------
@@ -790,35 +764,19 @@ def update(
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-        client.connectors.update(
-            id="id",
-        )
+        client.v1.get_usage()
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"v1/connectors/{jsonable_encoder(id)}",
-            method="PATCH",
-            json={
-                "name": name,
-                "url": url,
-                "excludes": excludes,
-                "oauth": convert_and_respect_annotation_metadata(
-                    object_=oauth, annotation=CreateConnectorOAuth, direction="write"
-                ),
-                "active": active,
-                "continue_on_failure": continue_on_failure,
-                "service_auth": convert_and_respect_annotation_metadata(
-                    object_=service_auth, annotation=CreateConnectorServiceAuth, direction="write"
-                ),
-            },
+            "v1/datasets/usage",
+            method="GET",
             request_options=request_options,
-            omit=OMIT,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    UpdateConnectorResponse,
+                    DatasetsGetUsageResponse,
                     construct_type(
-                        type_=UpdateConnectorResponse,  # type: ignore
+                        type_=DatasetsGetUsageResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -937,30 +895,21 @@ def update(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def o_auth_authorize(
-        self,
-        id: str,
-        *,
-        after_token_redirect: typing.Optional[str] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> OAuthAuthorizeResponse:
+    def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> DeleteConnectorResponse:
         """
-        Authorize the connector with the given ID for the connector oauth app. See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.
+        Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.
 
         Parameters
         ----------
         id : str
-            The ID of the connector to authorize.
-
-        after_token_redirect : typing.Optional[str]
-            The URL to redirect to after the connector has been authorized.
+            The ID of the connector to delete.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        OAuthAuthorizeResponse
+        DeleteConnectorResponse
             OK
 
         Examples
@@ -971,24 +920,21 @@ def o_auth_authorize(
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-        client.connectors.o_auth_authorize(
+        client.v1.delete(
             id="id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"v1/connectors/{jsonable_encoder(id)}/oauth/authorize",
-            method="POST",
-            params={
-                "after_token_redirect": after_token_redirect,
-            },
+            f"v1/connectors/{jsonable_encoder(id)}",
+            method="DELETE",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    OAuthAuthorizeResponse,
+                    DeleteConnectorResponse,
                     construct_type(
-                        type_=OAuthAuthorizeResponse,  # type: ignore
+                        type_=DeleteConnectorResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1107,70 +1053,91 @@ def o_auth_authorize(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-
-class AsyncConnectorsClient:
-    def __init__(self, *, client_wrapper: AsyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    async def list(
+    def update(
         self,
+        id: str,
         *,
-        limit: typing.Optional[float] = None,
-        offset: typing.Optional[float] = None,
+        name: typing.Optional[str] = OMIT,
+        url: typing.Optional[str] = OMIT,
+        excludes: typing.Optional[typing.Sequence[str]] = OMIT,
+        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,
+        active: typing.Optional[bool] = OMIT,
+        continue_on_failure: typing.Optional[bool] = OMIT,
+        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> ListConnectorsResponse:
+    ) -> UpdateConnectorResponse:
         """
-        Returns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.
+        Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.
 
         Parameters
         ----------
-        limit : typing.Optional[float]
-            Maximum number of connectors to return [0, 100].
+        id : str
+            The ID of the connector to update.
+
+        name : typing.Optional[str]
+            A human-readable name for the connector.
+
+        url : typing.Optional[str]
+            The URL of the connector that will be used to search for documents.
+
+        excludes : typing.Optional[typing.Sequence[str]]
+            A list of fields to exclude from the prompt (fields remain in the document).
+
+        oauth : typing.Optional[CreateConnectorOAuth]
+            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
+
+        active : typing.Optional[bool]
 
-        offset : typing.Optional[float]
-            Number of connectors to skip before returning results [0, inf].
+        continue_on_failure : typing.Optional[bool]
+
+        service_auth : typing.Optional[CreateConnectorServiceAuth]
+            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        ListConnectorsResponse
+        UpdateConnectorResponse
             OK
 
         Examples
         --------
-        import asyncio
-
-        from cohere import AsyncClient
+        from cohere import Client
 
-        client = AsyncClient(
+        client = Client(
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-
-
-        async def main() -> None:
-            await client.connectors.list()
-
-
-        asyncio.run(main())
+        client.v1.update(
+            id="id",
+        )
         """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/connectors",
-            method="GET",
-            params={
-                "limit": limit,
-                "offset": offset,
+        _response = self._client_wrapper.httpx_client.request(
+            f"v1/connectors/{jsonable_encoder(id)}",
+            method="PATCH",
+            json={
+                "name": name,
+                "url": url,
+                "excludes": excludes,
+                "oauth": convert_and_respect_annotation_metadata(
+                    object_=oauth, annotation=CreateConnectorOAuth, direction="write"
+                ),
+                "active": active,
+                "continue_on_failure": continue_on_failure,
+                "service_auth": convert_and_respect_annotation_metadata(
+                    object_=service_auth, annotation=CreateConnectorServiceAuth, direction="write"
+                ),
             },
             request_options=request_options,
+            omit=OMIT,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    ListConnectorsResponse,
+                    UpdateConnectorResponse,
                     construct_type(
-                        type_=ListConnectorsResponse,  # type: ignore
+                        type_=UpdateConnectorResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1289,103 +1256,632 @@ async def main() -> None:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def create(
+    def o_auth_authorize(
         self,
+        id: str,
         *,
-        name: str,
-        url: str,
-        description: typing.Optional[str] = OMIT,
-        excludes: typing.Optional[typing.Sequence[str]] = OMIT,
-        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,
-        active: typing.Optional[bool] = OMIT,
-        continue_on_failure: typing.Optional[bool] = OMIT,
-        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,
+        after_token_redirect: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> CreateConnectorResponse:
+    ) -> OAuthAuthorizeResponse:
         """
-        Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/docs/creating-and-deploying-a-connector) for more information.
+        Authorize the connector with the given ID for the connector oauth app. See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.
 
         Parameters
         ----------
-        name : str
-            A human-readable name for the connector.
-
-        url : str
-            The URL of the connector that will be used to search for documents.
-
-        description : typing.Optional[str]
-            A description of the connector.
-
-        excludes : typing.Optional[typing.Sequence[str]]
-            A list of fields to exclude from the prompt (fields remain in the document).
-
-        oauth : typing.Optional[CreateConnectorOAuth]
-            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
-
-        active : typing.Optional[bool]
-            Whether the connector is active or not.
-
-        continue_on_failure : typing.Optional[bool]
-            Whether a chat request should continue or not if the request to this connector fails.
+        id : str
+            The ID of the connector to authorize.
 
-        service_auth : typing.Optional[CreateConnectorServiceAuth]
-            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
+        after_token_redirect : typing.Optional[str]
+            The URL to redirect to after the connector has been authorized.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        CreateConnectorResponse
+        OAuthAuthorizeResponse
             OK
 
         Examples
         --------
-        import asyncio
-
-        from cohere import AsyncClient
+        from cohere import Client
 
-        client = AsyncClient(
+        client = Client(
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
-
-
-        async def main() -> None:
-            await client.connectors.create(
-                name="name",
-                url="url",
-            )
-
-
-        asyncio.run(main())
+        client.v1.o_auth_authorize(
+            id="id",
+        )
         """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v1/connectors",
+        _response = self._client_wrapper.httpx_client.request(
+            f"v1/connectors/{jsonable_encoder(id)}/oauth/authorize",
             method="POST",
-            json={
-                "name": name,
-                "description": description,
-                "url": url,
-                "excludes": excludes,
-                "oauth": convert_and_respect_annotation_metadata(
-                    object_=oauth, annotation=CreateConnectorOAuth, direction="write"
-                ),
-                "active": active,
-                "continue_on_failure": continue_on_failure,
+            params={
+                "after_token_redirect": after_token_redirect,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    OAuthAuthorizeResponse,
+                    construct_type(
+                        type_=OAuthAuthorizeResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+
+class AsyncV1Client:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+        self.v1 = v1_v1_client_AsyncV1Client(client_wrapper=self._client_wrapper)
+
+    async def list(
+        self,
+        *,
+        page_size: typing.Optional[float] = None,
+        page_token: typing.Optional[str] = None,
+        endpoint: typing.Optional[CompatibleEndpoint] = None,
+        default_only: typing.Optional[bool] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ListModelsResponse:
+        """
+        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
+
+        Parameters
+        ----------
+        page_size : typing.Optional[float]
+            Maximum number of models to include in a page
+            Defaults to `20`, min value of `1`, max value of `1000`.
+
+        page_token : typing.Optional[str]
+            Page token provided in the `next_page_token` field of a previous response.
+
+        endpoint : typing.Optional[CompatibleEndpoint]
+            When provided, filters the list of models to only those that are compatible with the specified endpoint.
+
+        default_only : typing.Optional[bool]
+            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ListModelsResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.list()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/models",
+            method="GET",
+            params={
+                "page_size": page_size,
+                "page_token": page_token,
+                "endpoint": endpoint,
+                "default_only": default_only,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ListModelsResponse,
+                    construct_type(
+                        type_=ListModelsResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def create(
+        self,
+        *,
+        name: str,
+        url: str,
+        description: typing.Optional[str] = OMIT,
+        excludes: typing.Optional[typing.Sequence[str]] = OMIT,
+        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,
+        active: typing.Optional[bool] = OMIT,
+        continue_on_failure: typing.Optional[bool] = OMIT,
+        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> CreateConnectorResponse:
+        """
+        Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/docs/creating-and-deploying-a-connector) for more information.
+
+        Parameters
+        ----------
+        name : str
+            A human-readable name for the connector.
+
+        url : str
+            The URL of the connector that will be used to search for documents.
+
+        description : typing.Optional[str]
+            A description of the connector.
+
+        excludes : typing.Optional[typing.Sequence[str]]
+            A list of fields to exclude from the prompt (fields remain in the document).
+
+        oauth : typing.Optional[CreateConnectorOAuth]
+            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.
+
+        active : typing.Optional[bool]
+            Whether the connector is active or not.
+
+        continue_on_failure : typing.Optional[bool]
+            Whether a chat request should continue or not if the request to this connector fails.
+
+        service_auth : typing.Optional[CreateConnectorServiceAuth]
+            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        CreateConnectorResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.create(
+                name="name",
+                url="url",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/connectors",
+            method="POST",
+            json={
+                "name": name,
+                "description": description,
+                "url": url,
+                "excludes": excludes,
+                "oauth": convert_and_respect_annotation_metadata(
+                    object_=oauth, annotation=CreateConnectorOAuth, direction="write"
+                ),
+                "active": active,
+                "continue_on_failure": continue_on_failure,
                 "service_auth": convert_and_respect_annotation_metadata(
                     object_=service_auth, annotation=CreateConnectorServiceAuth, direction="write"
                 ),
             },
             request_options=request_options,
-            omit=OMIT,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    CreateConnectorResponse,
+                    construct_type(
+                        type_=CreateConnectorResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def get(self, model: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetModelResponse:
+        """
+        Returns the details of a model, provided its name.
+
+        Parameters
+        ----------
+        model : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        GetModelResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.get(
+                model="command-r",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v1/models/{jsonable_encoder(model)}",
+            method="GET",
+            request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    CreateConnectorResponse,
+                    GetModelResponse,
                     construct_type(
-                        type_=CreateConnectorResponse,  # type: ignore
+                        type_=GetModelResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1504,22 +2000,21 @@ async def main() -> None:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetConnectorResponse:
+    async def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
         """
-        Retrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.
+        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.
 
         Parameters
         ----------
         id : str
-            The ID of the connector to retrieve.
+            The ID of the embed job to cancel.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        GetConnectorResponse
-            OK
+        None
 
         Examples
         --------
@@ -1534,7 +2029,7 @@ async def get(self, id: str, *, request_options: typing.Optional[RequestOptions]
 
 
         async def main() -> None:
-            await client.connectors.get(
+            await client.v1.cancel(
                 id="id",
             )
 
@@ -1542,16 +2037,171 @@ async def main() -> None:
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            f"v1/connectors/{jsonable_encoder(id)}",
+            f"v1/embed-jobs/{jsonable_encoder(id)}/cancel",
+            method="POST",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def get_usage(self, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetUsageResponse:
+        """
+        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.
+
+        Parameters
+        ----------
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DatasetsGetUsageResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.get_usage()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/datasets/usage",
             method="GET",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    GetConnectorResponse,
+                    DatasetsGetUsageResponse,
                     construct_type(
-                        type_=GetConnectorResponse,  # type: ignore
+                        type_=DatasetsGetUsageResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1702,7 +2352,7 @@ async def delete(
 
 
         async def main() -> None:
-            await client.connectors.delete(
+            await client.v1.delete(
                 id="id",
             )
 
@@ -1899,7 +2549,7 @@ async def update(
 
 
         async def main() -> None:
-            await client.connectors.update(
+            await client.v1.update(
                 id="id",
             )
 
@@ -2088,7 +2738,7 @@ async def o_auth_authorize(
 
 
         async def main() -> None:
-            await client.connectors.o_auth_authorize(
+            await client.v1.o_auth_authorize(
                 id="id",
             )
 
diff --git a/src/cohere/errors/__init__.py b/src/cohere/v1/errors/__init__.py
similarity index 100%
rename from src/cohere/errors/__init__.py
rename to src/cohere/v1/errors/__init__.py
diff --git a/src/cohere/errors/bad_request_error.py b/src/cohere/v1/errors/bad_request_error.py
similarity index 85%
rename from src/cohere/errors/bad_request_error.py
rename to src/cohere/v1/errors/bad_request_error.py
index 9c13c61f9..66ac1e335 100644
--- a/src/cohere/errors/bad_request_error.py
+++ b/src/cohere/v1/errors/bad_request_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 import typing
 
 
diff --git a/src/cohere/errors/client_closed_request_error.py b/src/cohere/v1/errors/client_closed_request_error.py
similarity index 88%
rename from src/cohere/errors/client_closed_request_error.py
rename to src/cohere/v1/errors/client_closed_request_error.py
index 83e68c96d..2e08bde1c 100644
--- a/src/cohere/errors/client_closed_request_error.py
+++ b/src/cohere/v1/errors/client_closed_request_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
 
 
diff --git a/src/cohere/errors/forbidden_error.py b/src/cohere/v1/errors/forbidden_error.py
similarity index 85%
rename from src/cohere/errors/forbidden_error.py
rename to src/cohere/v1/errors/forbidden_error.py
index d17eb4b96..cc295c87e 100644
--- a/src/cohere/errors/forbidden_error.py
+++ b/src/cohere/v1/errors/forbidden_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 import typing
 
 
diff --git a/src/cohere/errors/gateway_timeout_error.py b/src/cohere/v1/errors/gateway_timeout_error.py
similarity index 87%
rename from src/cohere/errors/gateway_timeout_error.py
rename to src/cohere/v1/errors/gateway_timeout_error.py
index 8d1617d80..354bb0488 100644
--- a/src/cohere/errors/gateway_timeout_error.py
+++ b/src/cohere/v1/errors/gateway_timeout_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
 
 
diff --git a/src/cohere/errors/internal_server_error.py b/src/cohere/v1/errors/internal_server_error.py
similarity index 85%
rename from src/cohere/errors/internal_server_error.py
rename to src/cohere/v1/errors/internal_server_error.py
index db0460cb1..2bd9889f5 100644
--- a/src/cohere/errors/internal_server_error.py
+++ b/src/cohere/v1/errors/internal_server_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 import typing
 
 
diff --git a/src/cohere/errors/not_found_error.py b/src/cohere/v1/errors/not_found_error.py
similarity index 85%
rename from src/cohere/errors/not_found_error.py
rename to src/cohere/v1/errors/not_found_error.py
index a1235b87f..f5fb99391 100644
--- a/src/cohere/errors/not_found_error.py
+++ b/src/cohere/v1/errors/not_found_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 import typing
 
 
diff --git a/src/cohere/errors/not_implemented_error.py b/src/cohere/v1/errors/not_implemented_error.py
similarity index 87%
rename from src/cohere/errors/not_implemented_error.py
rename to src/cohere/v1/errors/not_implemented_error.py
index 7dc79f06c..e07f0b851 100644
--- a/src/cohere/errors/not_implemented_error.py
+++ b/src/cohere/v1/errors/not_implemented_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 from ..types.not_implemented_error_body import NotImplementedErrorBody
 
 
diff --git a/src/cohere/errors/service_unavailable_error.py b/src/cohere/v1/errors/service_unavailable_error.py
similarity index 85%
rename from src/cohere/errors/service_unavailable_error.py
rename to src/cohere/v1/errors/service_unavailable_error.py
index 20830fcc0..1cdf06fc3 100644
--- a/src/cohere/errors/service_unavailable_error.py
+++ b/src/cohere/v1/errors/service_unavailable_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 import typing
 
 
diff --git a/src/cohere/errors/too_many_requests_error.py b/src/cohere/v1/errors/too_many_requests_error.py
similarity index 88%
rename from src/cohere/errors/too_many_requests_error.py
rename to src/cohere/v1/errors/too_many_requests_error.py
index ca82908cd..091dffcab 100644
--- a/src/cohere/errors/too_many_requests_error.py
+++ b/src/cohere/v1/errors/too_many_requests_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
 
 
diff --git a/src/cohere/errors/unauthorized_error.py b/src/cohere/v1/errors/unauthorized_error.py
similarity index 85%
rename from src/cohere/errors/unauthorized_error.py
rename to src/cohere/v1/errors/unauthorized_error.py
index 1c00f98ab..b8bb12b44 100644
--- a/src/cohere/errors/unauthorized_error.py
+++ b/src/cohere/v1/errors/unauthorized_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 import typing
 
 
diff --git a/src/cohere/errors/unprocessable_entity_error.py b/src/cohere/v1/errors/unprocessable_entity_error.py
similarity index 88%
rename from src/cohere/errors/unprocessable_entity_error.py
rename to src/cohere/v1/errors/unprocessable_entity_error.py
index e27104ea7..061f6bec1 100644
--- a/src/cohere/errors/unprocessable_entity_error.py
+++ b/src/cohere/v1/errors/unprocessable_entity_error.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.api_error import ApiError
+from ...core.api_error import ApiError
 from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
 
 
diff --git a/src/cohere/finetuning/__init__.py b/src/cohere/v1/finetuning/__init__.py
similarity index 100%
rename from src/cohere/finetuning/__init__.py
rename to src/cohere/v1/finetuning/__init__.py
diff --git a/src/cohere/finetuning/finetuning/__init__.py b/src/cohere/v1/finetuning/finetuning/__init__.py
similarity index 100%
rename from src/cohere/finetuning/finetuning/__init__.py
rename to src/cohere/v1/finetuning/finetuning/__init__.py
diff --git a/src/cohere/finetuning/finetuning/types/__init__.py b/src/cohere/v1/finetuning/finetuning/types/__init__.py
similarity index 100%
rename from src/cohere/finetuning/finetuning/types/__init__.py
rename to src/cohere/v1/finetuning/finetuning/types/__init__.py
diff --git a/src/cohere/finetuning/finetuning/types/base_model.py b/src/cohere/v1/finetuning/finetuning/types/base_model.py
similarity index 89%
rename from src/cohere/finetuning/finetuning/types/base_model.py
rename to src/cohere/v1/finetuning/finetuning/types/base_model.py
index 44ad40c31..b327c5615 100644
--- a/src/cohere/finetuning/finetuning/types/base_model.py
+++ b/src/cohere/v1/finetuning/finetuning/types/base_model.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .base_type import BaseType
 from .strategy import Strategy
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class BaseModel(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/base_type.py b/src/cohere/v1/finetuning/finetuning/types/base_type.py
similarity index 100%
rename from src/cohere/finetuning/finetuning/types/base_type.py
rename to src/cohere/v1/finetuning/finetuning/types/base_type.py
diff --git a/src/cohere/finetuning/finetuning/types/create_finetuned_model_response.py b/src/cohere/v1/finetuning/finetuning/types/create_finetuned_model_response.py
similarity index 85%
rename from src/cohere/finetuning/finetuning/types/create_finetuned_model_response.py
rename to src/cohere/v1/finetuning/finetuning/types/create_finetuned_model_response.py
index 7dd49fa29..eba71cabd 100644
--- a/src/cohere/finetuning/finetuning/types/create_finetuned_model_response.py
+++ b/src/cohere/v1/finetuning/finetuning/types/create_finetuned_model_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .finetuned_model import FinetunedModel
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class CreateFinetunedModelResponse(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/delete_finetuned_model_response.py b/src/cohere/v1/finetuning/finetuning/types/delete_finetuned_model_response.py
similarity index 100%
rename from src/cohere/finetuning/finetuning/types/delete_finetuned_model_response.py
rename to src/cohere/v1/finetuning/finetuning/types/delete_finetuned_model_response.py
diff --git a/src/cohere/finetuning/finetuning/types/event.py b/src/cohere/v1/finetuning/finetuning/types/event.py
similarity index 88%
rename from src/cohere/finetuning/finetuning/types/event.py
rename to src/cohere/v1/finetuning/finetuning/types/event.py
index f7f1f93b0..f7ccae948 100644
--- a/src/cohere/finetuning/finetuning/types/event.py
+++ b/src/cohere/v1/finetuning/finetuning/types/event.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .status import Status
 import datetime as dt
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class Event(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/finetuned_model.py b/src/cohere/v1/finetuning/finetuning/types/finetuned_model.py
similarity index 94%
rename from src/cohere/finetuning/finetuning/types/finetuned_model.py
rename to src/cohere/v1/finetuning/finetuning/types/finetuned_model.py
index d554af2d2..47fa810ed 100644
--- a/src/cohere/finetuning/finetuning/types/finetuned_model.py
+++ b/src/cohere/v1/finetuning/finetuning/types/finetuned_model.py
@@ -1,12 +1,12 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .settings import Settings
 from .status import Status
 import datetime as dt
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class FinetunedModel(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/get_finetuned_model_response.py b/src/cohere/v1/finetuning/finetuning/types/get_finetuned_model_response.py
similarity index 85%
rename from src/cohere/finetuning/finetuning/types/get_finetuned_model_response.py
rename to src/cohere/v1/finetuning/finetuning/types/get_finetuned_model_response.py
index 1b06b0ed4..cdd98ab7b 100644
--- a/src/cohere/finetuning/finetuning/types/get_finetuned_model_response.py
+++ b/src/cohere/v1/finetuning/finetuning/types/get_finetuned_model_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .finetuned_model import FinetunedModel
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class GetFinetunedModelResponse(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/hyperparameters.py b/src/cohere/v1/finetuning/finetuning/types/hyperparameters.py
similarity index 91%
rename from src/cohere/finetuning/finetuning/types/hyperparameters.py
rename to src/cohere/v1/finetuning/finetuning/types/hyperparameters.py
index 229549b46..2e02a37c7 100644
--- a/src/cohere/finetuning/finetuning/types/hyperparameters.py
+++ b/src/cohere/v1/finetuning/finetuning/types/hyperparameters.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class Hyperparameters(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/list_events_response.py b/src/cohere/v1/finetuning/finetuning/types/list_events_response.py
similarity index 89%
rename from src/cohere/finetuning/finetuning/types/list_events_response.py
rename to src/cohere/v1/finetuning/finetuning/types/list_events_response.py
index 9439c2efe..6643fefe3 100644
--- a/src/cohere/finetuning/finetuning/types/list_events_response.py
+++ b/src/cohere/v1/finetuning/finetuning/types/list_events_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .event import Event
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ListEventsResponse(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/list_finetuned_models_response.py b/src/cohere/v1/finetuning/finetuning/types/list_finetuned_models_response.py
similarity index 89%
rename from src/cohere/finetuning/finetuning/types/list_finetuned_models_response.py
rename to src/cohere/v1/finetuning/finetuning/types/list_finetuned_models_response.py
index 36353317e..a51af657c 100644
--- a/src/cohere/finetuning/finetuning/types/list_finetuned_models_response.py
+++ b/src/cohere/v1/finetuning/finetuning/types/list_finetuned_models_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .finetuned_model import FinetunedModel
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ListFinetunedModelsResponse(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/list_training_step_metrics_response.py b/src/cohere/v1/finetuning/finetuning/types/list_training_step_metrics_response.py
similarity index 89%
rename from src/cohere/finetuning/finetuning/types/list_training_step_metrics_response.py
rename to src/cohere/v1/finetuning/finetuning/types/list_training_step_metrics_response.py
index cc465ab02..59836fc1b 100644
--- a/src/cohere/finetuning/finetuning/types/list_training_step_metrics_response.py
+++ b/src/cohere/v1/finetuning/finetuning/types/list_training_step_metrics_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .training_step_metrics import TrainingStepMetrics
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ListTrainingStepMetricsResponse(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/settings.py b/src/cohere/v1/finetuning/finetuning/types/settings.py
similarity index 91%
rename from src/cohere/finetuning/finetuning/types/settings.py
rename to src/cohere/v1/finetuning/finetuning/types/settings.py
index 9f1238bdb..9e53b8a22 100644
--- a/src/cohere/finetuning/finetuning/types/settings.py
+++ b/src/cohere/v1/finetuning/finetuning/types/settings.py
@@ -1,12 +1,12 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 from .base_model import BaseModel
 import pydantic
 import typing
 from .hyperparameters import Hyperparameters
 from .wandb_config import WandbConfig
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class Settings(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/status.py b/src/cohere/v1/finetuning/finetuning/types/status.py
similarity index 100%
rename from src/cohere/finetuning/finetuning/types/status.py
rename to src/cohere/v1/finetuning/finetuning/types/status.py
diff --git a/src/cohere/finetuning/finetuning/types/strategy.py b/src/cohere/v1/finetuning/finetuning/types/strategy.py
similarity index 100%
rename from src/cohere/finetuning/finetuning/types/strategy.py
rename to src/cohere/v1/finetuning/finetuning/types/strategy.py
diff --git a/src/cohere/finetuning/finetuning/types/training_step_metrics.py b/src/cohere/v1/finetuning/finetuning/types/training_step_metrics.py
similarity index 88%
rename from src/cohere/finetuning/finetuning/types/training_step_metrics.py
rename to src/cohere/v1/finetuning/finetuning/types/training_step_metrics.py
index 18d323501..a132014fe 100644
--- a/src/cohere/finetuning/finetuning/types/training_step_metrics.py
+++ b/src/cohere/v1/finetuning/finetuning/types/training_step_metrics.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import datetime as dt
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class TrainingStepMetrics(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/update_finetuned_model_response.py b/src/cohere/v1/finetuning/finetuning/types/update_finetuned_model_response.py
similarity index 85%
rename from src/cohere/finetuning/finetuning/types/update_finetuned_model_response.py
rename to src/cohere/v1/finetuning/finetuning/types/update_finetuned_model_response.py
index 4bf0c4894..4f3f74954 100644
--- a/src/cohere/finetuning/finetuning/types/update_finetuned_model_response.py
+++ b/src/cohere/v1/finetuning/finetuning/types/update_finetuned_model_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .finetuned_model import FinetunedModel
 import pydantic
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class UpdateFinetunedModelResponse(UncheckedBaseModel):
diff --git a/src/cohere/finetuning/finetuning/types/wandb_config.py b/src/cohere/v1/finetuning/finetuning/types/wandb_config.py
similarity index 87%
rename from src/cohere/finetuning/finetuning/types/wandb_config.py
rename to src/cohere/v1/finetuning/finetuning/types/wandb_config.py
index 8c77de27b..1d45aa642 100644
--- a/src/cohere/finetuning/finetuning/types/wandb_config.py
+++ b/src/cohere/v1/finetuning/finetuning/types/wandb_config.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ....core.unchecked_base_model import UncheckedBaseModel
+from .....core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
-from ....core.pydantic_utilities import IS_PYDANTIC_V2
+from .....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class WandbConfig(UncheckedBaseModel):
diff --git a/src/cohere/v1/types/__init__.py b/src/cohere/v1/types/__init__.py
new file mode 100644
index 000000000..192fd2c34
--- /dev/null
+++ b/src/cohere/v1/types/__init__.py
@@ -0,0 +1,145 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from .api_meta import ApiMeta
+from .api_meta_api_version import ApiMetaApiVersion
+from .api_meta_billed_units import ApiMetaBilledUnits
+from .api_meta_tokens import ApiMetaTokens
+from .auth_token_type import AuthTokenType
+from .chat_citation import ChatCitation
+from .chat_citation_generation_event import ChatCitationGenerationEvent
+from .chat_connector import ChatConnector
+from .chat_message import ChatMessage
+from .chat_search_queries_generation_event import ChatSearchQueriesGenerationEvent
+from .chat_search_query import ChatSearchQuery
+from .chat_search_result import ChatSearchResult
+from .chat_search_result_connector import ChatSearchResultConnector
+from .chat_search_results_event import ChatSearchResultsEvent
+from .chat_stream_end_event import ChatStreamEndEvent
+from .chat_stream_end_event_finish_reason import ChatStreamEndEventFinishReason
+from .chat_stream_event import ChatStreamEvent
+from .chat_stream_event_event_type import ChatStreamEventEventType
+from .chat_stream_start_event import ChatStreamStartEvent
+from .chat_text_generation_event import ChatTextGenerationEvent
+from .chat_tool_calls_chunk_event import ChatToolCallsChunkEvent
+from .chat_tool_calls_generation_event import ChatToolCallsGenerationEvent
+from .classify_example import ClassifyExample
+from .client_closed_request_error_body import ClientClosedRequestErrorBody
+from .compatible_endpoint import CompatibleEndpoint
+from .connector import Connector
+from .connector_auth_status import ConnectorAuthStatus
+from .connector_o_auth import ConnectorOAuth
+from .create_connector_o_auth import CreateConnectorOAuth
+from .create_connector_response import CreateConnectorResponse
+from .create_connector_service_auth import CreateConnectorServiceAuth
+from .datasets_get_usage_response import DatasetsGetUsageResponse
+from .delete_connector_response import DeleteConnectorResponse
+from .embed_by_type_response import EmbedByTypeResponse
+from .embed_by_type_response_embeddings import EmbedByTypeResponseEmbeddings
+from .embed_floats_response import EmbedFloatsResponse
+from .embed_input_type import EmbedInputType
+from .embedding_type import EmbeddingType
+from .finish_reason import FinishReason
+from .gateway_timeout_error_body import GatewayTimeoutErrorBody
+from .generate_stream_end import GenerateStreamEnd
+from .generate_stream_end_response import GenerateStreamEndResponse
+from .generate_stream_error import GenerateStreamError
+from .generate_stream_event import GenerateStreamEvent
+from .generate_stream_event_event_type import GenerateStreamEventEventType
+from .generate_stream_text import GenerateStreamText
+from .generate_streamed_response import GenerateStreamedResponse
+from .generation import Generation
+from .get_model_response import GetModelResponse
+from .json_response_format import JsonResponseFormat
+from .list_models_response import ListModelsResponse
+from .message import Message
+from .non_streamed_chat_response import NonStreamedChatResponse
+from .not_implemented_error_body import NotImplementedErrorBody
+from .o_auth_authorize_response import OAuthAuthorizeResponse
+from .response_format import ResponseFormat
+from .single_generation import SingleGeneration
+from .single_generation_in_stream import SingleGenerationInStream
+from .single_generation_token_likelihoods_item import SingleGenerationTokenLikelihoodsItem
+from .streamed_chat_response import StreamedChatResponse
+from .text_response_format import TextResponseFormat
+from .too_many_requests_error_body import TooManyRequestsErrorBody
+from .tool import Tool
+from .tool_call import ToolCall
+from .tool_call_delta import ToolCallDelta
+from .tool_message import ToolMessage
+from .tool_parameter_definitions_value import ToolParameterDefinitionsValue
+from .tool_result import ToolResult
+from .unprocessable_entity_error_body import UnprocessableEntityErrorBody
+from .update_connector_response import UpdateConnectorResponse
+
+__all__ = [
+    "ApiMeta",
+    "ApiMetaApiVersion",
+    "ApiMetaBilledUnits",
+    "ApiMetaTokens",
+    "AuthTokenType",
+    "ChatCitation",
+    "ChatCitationGenerationEvent",
+    "ChatConnector",
+    "ChatMessage",
+    "ChatSearchQueriesGenerationEvent",
+    "ChatSearchQuery",
+    "ChatSearchResult",
+    "ChatSearchResultConnector",
+    "ChatSearchResultsEvent",
+    "ChatStreamEndEvent",
+    "ChatStreamEndEventFinishReason",
+    "ChatStreamEvent",
+    "ChatStreamEventEventType",
+    "ChatStreamStartEvent",
+    "ChatTextGenerationEvent",
+    "ChatToolCallsChunkEvent",
+    "ChatToolCallsGenerationEvent",
+    "ClassifyExample",
+    "ClientClosedRequestErrorBody",
+    "CompatibleEndpoint",
+    "Connector",
+    "ConnectorAuthStatus",
+    "ConnectorOAuth",
+    "CreateConnectorOAuth",
+    "CreateConnectorResponse",
+    "CreateConnectorServiceAuth",
+    "DatasetsGetUsageResponse",
+    "DeleteConnectorResponse",
+    "EmbedByTypeResponse",
+    "EmbedByTypeResponseEmbeddings",
+    "EmbedFloatsResponse",
+    "EmbedInputType",
+    "EmbeddingType",
+    "FinishReason",
+    "GatewayTimeoutErrorBody",
+    "GenerateStreamEnd",
+    "GenerateStreamEndResponse",
+    "GenerateStreamError",
+    "GenerateStreamEvent",
+    "GenerateStreamEventEventType",
+    "GenerateStreamText",
+    "GenerateStreamedResponse",
+    "Generation",
+    "GetModelResponse",
+    "JsonResponseFormat",
+    "ListModelsResponse",
+    "Message",
+    "NonStreamedChatResponse",
+    "NotImplementedErrorBody",
+    "OAuthAuthorizeResponse",
+    "ResponseFormat",
+    "SingleGeneration",
+    "SingleGenerationInStream",
+    "SingleGenerationTokenLikelihoodsItem",
+    "StreamedChatResponse",
+    "TextResponseFormat",
+    "TooManyRequestsErrorBody",
+    "Tool",
+    "ToolCall",
+    "ToolCallDelta",
+    "ToolMessage",
+    "ToolParameterDefinitionsValue",
+    "ToolResult",
+    "UnprocessableEntityErrorBody",
+    "UpdateConnectorResponse",
+]
diff --git a/src/cohere/types/api_meta.py b/src/cohere/v1/types/api_meta.py
similarity index 87%
rename from src/cohere/types/api_meta.py
rename to src/cohere/v1/types/api_meta.py
index 3aee6abb4..2d96aba64 100644
--- a/src/cohere/types/api_meta.py
+++ b/src/cohere/v1/types/api_meta.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .api_meta_api_version import ApiMetaApiVersion
 from .api_meta_billed_units import ApiMetaBilledUnits
 from .api_meta_tokens import ApiMetaTokens
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/api_meta_api_version.py b/src/cohere/v1/types/api_meta_api_version.py
similarity index 82%
rename from src/cohere/types/api_meta_api_version.py
rename to src/cohere/v1/types/api_meta_api_version.py
index 1035b81cb..a96cda58e 100644
--- a/src/cohere/types/api_meta_api_version.py
+++ b/src/cohere/v1/types/api_meta_api_version.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/api_meta_billed_units.py b/src/cohere/v1/types/api_meta_billed_units.py
similarity index 89%
rename from src/cohere/types/api_meta_billed_units.py
rename to src/cohere/v1/types/api_meta_billed_units.py
index c4c934d35..e7fe956c9 100644
--- a/src/cohere/types/api_meta_billed_units.py
+++ b/src/cohere/v1/types/api_meta_billed_units.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ApiMetaBilledUnits(UncheckedBaseModel):
diff --git a/src/cohere/types/api_meta_tokens.py b/src/cohere/v1/types/api_meta_tokens.py
similarity index 86%
rename from src/cohere/types/api_meta_tokens.py
rename to src/cohere/v1/types/api_meta_tokens.py
index c71a8f9be..f7c87c695 100644
--- a/src/cohere/types/api_meta_tokens.py
+++ b/src/cohere/v1/types/api_meta_tokens.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ApiMetaTokens(UncheckedBaseModel):
diff --git a/src/cohere/types/auth_token_type.py b/src/cohere/v1/types/auth_token_type.py
similarity index 100%
rename from src/cohere/types/auth_token_type.py
rename to src/cohere/v1/types/auth_token_type.py
diff --git a/src/cohere/types/chat_citation.py b/src/cohere/v1/types/chat_citation.py
similarity index 92%
rename from src/cohere/types/chat_citation.py
rename to src/cohere/v1/types/chat_citation.py
index 07f82a05d..012ff639e 100644
--- a/src/cohere/types/chat_citation.py
+++ b/src/cohere/v1/types/chat_citation.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatCitation(UncheckedBaseModel):
diff --git a/src/cohere/types/chat_citation_generation_event.py b/src/cohere/v1/types/chat_citation_generation_event.py
similarity index 92%
rename from src/cohere/types/chat_citation_generation_event.py
rename to src/cohere/v1/types/chat_citation_generation_event.py
index 36cd620bc..df192f323 100644
--- a/src/cohere/types/chat_citation_generation_event.py
+++ b/src/cohere/v1/types/chat_citation_generation_event.py
@@ -4,7 +4,7 @@
 import typing
 from .chat_citation import ChatCitation
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatCitationGenerationEvent(ChatStreamEvent):
diff --git a/src/cohere/types/chat_connector.py b/src/cohere/v1/types/chat_connector.py
similarity index 92%
rename from src/cohere/types/chat_connector.py
rename to src/cohere/v1/types/chat_connector.py
index 65e574740..49181c4d0 100644
--- a/src/cohere/types/chat_connector.py
+++ b/src/cohere/v1/types/chat_connector.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatConnector(UncheckedBaseModel):
diff --git a/src/cohere/types/chat_message.py b/src/cohere/v1/types/chat_message.py
similarity index 87%
rename from src/cohere/types/chat_message.py
rename to src/cohere/v1/types/chat_message.py
index 7a24cd541..66d90233e 100644
--- a/src/cohere/types/chat_message.py
+++ b/src/cohere/v1/types/chat_message.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
-import pydantic
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
+import pydantic
 from .tool_call import ToolCall
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatMessage(UncheckedBaseModel):
@@ -14,6 +14,7 @@ class ChatMessage(UncheckedBaseModel):
     The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
     """
 
+    role: typing.Literal["USER"] = "USER"
     message: str = pydantic.Field()
     """
     Contents of the chat message.
diff --git a/src/cohere/types/chat_search_queries_generation_event.py b/src/cohere/v1/types/chat_search_queries_generation_event.py
similarity index 92%
rename from src/cohere/types/chat_search_queries_generation_event.py
rename to src/cohere/v1/types/chat_search_queries_generation_event.py
index eb0d6237d..f63da226f 100644
--- a/src/cohere/types/chat_search_queries_generation_event.py
+++ b/src/cohere/v1/types/chat_search_queries_generation_event.py
@@ -4,7 +4,7 @@
 import typing
 from .chat_search_query import ChatSearchQuery
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatSearchQueriesGenerationEvent(ChatStreamEvent):
diff --git a/src/cohere/types/chat_search_query.py b/src/cohere/v1/types/chat_search_query.py
similarity index 87%
rename from src/cohere/types/chat_search_query.py
rename to src/cohere/v1/types/chat_search_query.py
index f0695a93f..02199e2a9 100644
--- a/src/cohere/types/chat_search_query.py
+++ b/src/cohere/v1/types/chat_search_query.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 
 
diff --git a/src/cohere/types/chat_search_result.py b/src/cohere/v1/types/chat_search_result.py
similarity index 91%
rename from src/cohere/types/chat_search_result.py
rename to src/cohere/v1/types/chat_search_result.py
index 14ab32f58..f869d704d 100644
--- a/src/cohere/types/chat_search_result.py
+++ b/src/cohere/v1/types/chat_search_result.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_search_query import ChatSearchQuery
 from .chat_search_result_connector import ChatSearchResultConnector
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatSearchResult(UncheckedBaseModel):
diff --git a/src/cohere/types/chat_search_result_connector.py b/src/cohere/v1/types/chat_search_result_connector.py
similarity index 83%
rename from src/cohere/types/chat_search_result_connector.py
rename to src/cohere/v1/types/chat_search_result_connector.py
index 22cab6a56..c771a136c 100644
--- a/src/cohere/types/chat_search_result_connector.py
+++ b/src/cohere/v1/types/chat_search_result_connector.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 
 
diff --git a/src/cohere/types/chat_search_results_event.py b/src/cohere/v1/types/chat_search_results_event.py
similarity index 89%
rename from src/cohere/types/chat_search_results_event.py
rename to src/cohere/v1/types/chat_search_results_event.py
index 85948ba85..0b16a8d20 100644
--- a/src/cohere/types/chat_search_results_event.py
+++ b/src/cohere/v1/types/chat_search_results_event.py
@@ -4,8 +4,8 @@
 import typing
 from .chat_search_result import ChatSearchResult
 import pydantic
-from .chat_document import ChatDocument
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...types.chat_document import ChatDocument
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatSearchResultsEvent(ChatStreamEvent):
diff --git a/src/cohere/types/chat_stream_end_event.py b/src/cohere/v1/types/chat_stream_end_event.py
similarity index 96%
rename from src/cohere/types/chat_stream_end_event.py
rename to src/cohere/v1/types/chat_stream_end_event.py
index d42e34550..e3c84373a 100644
--- a/src/cohere/types/chat_stream_end_event.py
+++ b/src/cohere/v1/types/chat_stream_end_event.py
@@ -4,7 +4,7 @@
 from .chat_stream_end_event_finish_reason import ChatStreamEndEventFinishReason
 import pydantic
 from .non_streamed_chat_response import NonStreamedChatResponse
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 
 
diff --git a/src/cohere/types/chat_stream_end_event_finish_reason.py b/src/cohere/v1/types/chat_stream_end_event_finish_reason.py
similarity index 100%
rename from src/cohere/types/chat_stream_end_event_finish_reason.py
rename to src/cohere/v1/types/chat_stream_end_event_finish_reason.py
diff --git a/src/cohere/datasets/types/datasets_create_response.py b/src/cohere/v1/types/chat_stream_event.py
similarity index 76%
rename from src/cohere/datasets/types/datasets_create_response.py
rename to src/cohere/v1/types/chat_stream_event.py
index 4e8bb19e0..ae0e1c8cf 100644
--- a/src/cohere/datasets/types/datasets_create_response.py
+++ b/src/cohere/v1/types/chat_stream_event.py
@@ -1,16 +1,14 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from ...core.unchecked_base_model import UncheckedBaseModel
+from .chat_stream_event_event_type import ChatStreamEventEventType
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
-class DatasetsCreateResponse(UncheckedBaseModel):
-    id: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    The dataset ID
-    """
+class ChatStreamEvent(UncheckedBaseModel):
+    event_type: ChatStreamEventEventType
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/v1/types/chat_stream_event_event_type.py b/src/cohere/v1/types/chat_stream_event_event_type.py
new file mode 100644
index 000000000..be47dba3f
--- /dev/null
+++ b/src/cohere/v1/types/chat_stream_event_event_type.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+ChatStreamEventEventType = typing.Union[
+    typing.Literal[
+        "stream-start",
+        "search-queries-generation",
+        "search-results",
+        "text-generation",
+        "citation-generation",
+        "stream-end",
+    ],
+    typing.Any,
+]
diff --git a/src/cohere/types/chat_stream_start_event.py b/src/cohere/v1/types/chat_stream_start_event.py
similarity index 91%
rename from src/cohere/types/chat_stream_start_event.py
rename to src/cohere/v1/types/chat_stream_start_event.py
index e255ad1f0..40c34769e 100644
--- a/src/cohere/types/chat_stream_start_event.py
+++ b/src/cohere/v1/types/chat_stream_start_event.py
@@ -2,7 +2,7 @@
 
 from .chat_stream_event import ChatStreamEvent
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 
 
diff --git a/src/cohere/types/chat_text_generation_event.py b/src/cohere/v1/types/chat_text_generation_event.py
similarity index 91%
rename from src/cohere/types/chat_text_generation_event.py
rename to src/cohere/v1/types/chat_text_generation_event.py
index c7c6ae88f..d085075fb 100644
--- a/src/cohere/types/chat_text_generation_event.py
+++ b/src/cohere/v1/types/chat_text_generation_event.py
@@ -2,7 +2,7 @@
 
 from .chat_stream_event import ChatStreamEvent
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 
 
diff --git a/src/cohere/types/chat_tool_calls_chunk_event.py b/src/cohere/v1/types/chat_tool_calls_chunk_event.py
similarity index 91%
rename from src/cohere/types/chat_tool_calls_chunk_event.py
rename to src/cohere/v1/types/chat_tool_calls_chunk_event.py
index 80a32e822..c5c3a8d31 100644
--- a/src/cohere/types/chat_tool_calls_chunk_event.py
+++ b/src/cohere/v1/types/chat_tool_calls_chunk_event.py
@@ -2,7 +2,7 @@
 
 from .chat_stream_event import ChatStreamEvent
 from .tool_call_delta import ToolCallDelta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 import pydantic
 
diff --git a/src/cohere/types/chat_tool_calls_generation_event.py b/src/cohere/v1/types/chat_tool_calls_generation_event.py
similarity index 92%
rename from src/cohere/types/chat_tool_calls_generation_event.py
rename to src/cohere/v1/types/chat_tool_calls_generation_event.py
index f13810618..5d7f5572c 100644
--- a/src/cohere/types/chat_tool_calls_generation_event.py
+++ b/src/cohere/v1/types/chat_tool_calls_generation_event.py
@@ -4,7 +4,7 @@
 import typing
 import pydantic
 from .tool_call import ToolCall
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatToolCallsGenerationEvent(ChatStreamEvent):
diff --git a/src/cohere/types/classify_example.py b/src/cohere/v1/types/classify_example.py
similarity index 81%
rename from src/cohere/types/classify_example.py
rename to src/cohere/v1/types/classify_example.py
index 3132af4b5..cda1c809f 100644
--- a/src/cohere/types/classify_example.py
+++ b/src/cohere/v1/types/classify_example.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/datasets/types/datasets_get_response.py b/src/cohere/v1/types/client_closed_request_error_body.py
similarity index 82%
rename from src/cohere/datasets/types/datasets_get_response.py
rename to src/cohere/v1/types/client_closed_request_error_body.py
index 9f621dea3..f178bf8bb 100644
--- a/src/cohere/datasets/types/datasets_get_response.py
+++ b/src/cohere/v1/types/client_closed_request_error_body.py
@@ -1,14 +1,13 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from ...core.unchecked_base_model import UncheckedBaseModel
-from ...types.dataset import Dataset
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
-class DatasetsGetResponse(UncheckedBaseModel):
-    dataset: Dataset
+class ClientClosedRequestErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/types/compatible_endpoint.py b/src/cohere/v1/types/compatible_endpoint.py
similarity index 100%
rename from src/cohere/types/compatible_endpoint.py
rename to src/cohere/v1/types/compatible_endpoint.py
diff --git a/src/cohere/types/connector.py b/src/cohere/v1/types/connector.py
similarity index 96%
rename from src/cohere/types/connector.py
rename to src/cohere/v1/types/connector.py
index 77ae89c80..e9cd41d72 100644
--- a/src/cohere/types/connector.py
+++ b/src/cohere/v1/types/connector.py
@@ -1,12 +1,12 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
 import datetime as dt
 from .connector_o_auth import ConnectorOAuth
 from .connector_auth_status import ConnectorAuthStatus
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class Connector(UncheckedBaseModel):
diff --git a/src/cohere/types/connector_auth_status.py b/src/cohere/v1/types/connector_auth_status.py
similarity index 100%
rename from src/cohere/types/connector_auth_status.py
rename to src/cohere/v1/types/connector_auth_status.py
diff --git a/src/cohere/types/connector_o_auth.py b/src/cohere/v1/types/connector_o_auth.py
similarity index 91%
rename from src/cohere/types/connector_o_auth.py
rename to src/cohere/v1/types/connector_o_auth.py
index bbb090875..eabdf11bd 100644
--- a/src/cohere/types/connector_o_auth.py
+++ b/src/cohere/v1/types/connector_o_auth.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ConnectorOAuth(UncheckedBaseModel):
diff --git a/src/cohere/types/create_connector_o_auth.py b/src/cohere/v1/types/create_connector_o_auth.py
similarity index 91%
rename from src/cohere/types/create_connector_o_auth.py
rename to src/cohere/v1/types/create_connector_o_auth.py
index 6ff0e7b35..e2fbe375b 100644
--- a/src/cohere/types/create_connector_o_auth.py
+++ b/src/cohere/v1/types/create_connector_o_auth.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class CreateConnectorOAuth(UncheckedBaseModel):
diff --git a/src/cohere/types/create_connector_response.py b/src/cohere/v1/types/create_connector_response.py
similarity index 81%
rename from src/cohere/types/create_connector_response.py
rename to src/cohere/v1/types/create_connector_response.py
index b26aed526..a7e032725 100644
--- a/src/cohere/types/create_connector_response.py
+++ b/src/cohere/v1/types/create_connector_response.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 from .connector import Connector
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 import pydantic
 
diff --git a/src/cohere/types/create_connector_service_auth.py b/src/cohere/v1/types/create_connector_service_auth.py
similarity index 86%
rename from src/cohere/types/create_connector_service_auth.py
rename to src/cohere/v1/types/create_connector_service_auth.py
index faf087541..9c2b4cc37 100644
--- a/src/cohere/types/create_connector_service_auth.py
+++ b/src/cohere/v1/types/create_connector_service_auth.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 from .auth_token_type import AuthTokenType
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 
 
diff --git a/src/cohere/datasets/types/datasets_get_usage_response.py b/src/cohere/v1/types/datasets_get_usage_response.py
similarity index 100%
rename from src/cohere/datasets/types/datasets_get_usage_response.py
rename to src/cohere/v1/types/datasets_get_usage_response.py
diff --git a/src/cohere/types/delete_connector_response.py b/src/cohere/v1/types/delete_connector_response.py
similarity index 100%
rename from src/cohere/types/delete_connector_response.py
rename to src/cohere/v1/types/delete_connector_response.py
diff --git a/src/cohere/types/embed_by_type_response.py b/src/cohere/v1/types/embed_by_type_response.py
similarity index 83%
rename from src/cohere/types/embed_by_type_response.py
rename to src/cohere/v1/types/embed_by_type_response.py
index 04dac46c0..bde68761d 100644
--- a/src/cohere/types/embed_by_type_response.py
+++ b/src/cohere/v1/types/embed_by_type_response.py
@@ -1,14 +1,15 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
 from .embed_by_type_response_embeddings import EmbedByTypeResponseEmbeddings
 import pydantic
-import typing
 from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class EmbedByTypeResponse(UncheckedBaseModel):
+    response_type: typing.Literal["embeddings_by_type"] = "embeddings_by_type"
     id: str
     embeddings: EmbedByTypeResponseEmbeddings = pydantic.Field()
     """
diff --git a/src/cohere/types/embed_by_type_response_embeddings.py b/src/cohere/v1/types/embed_by_type_response_embeddings.py
similarity index 91%
rename from src/cohere/types/embed_by_type_response_embeddings.py
rename to src/cohere/v1/types/embed_by_type_response_embeddings.py
index 039c38cb1..5e054428b 100644
--- a/src/cohere/types/embed_by_type_response_embeddings.py
+++ b/src/cohere/v1/types/embed_by_type_response_embeddings.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing_extensions
 import typing
-from ..core.serialization import FieldMetadata
+from ...core.serialization import FieldMetadata
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class EmbedByTypeResponseEmbeddings(UncheckedBaseModel):
diff --git a/src/cohere/types/embed_floats_response.py b/src/cohere/v1/types/embed_floats_response.py
similarity index 82%
rename from src/cohere/types/embed_floats_response.py
rename to src/cohere/v1/types/embed_floats_response.py
index d462891c8..f6094f100 100644
--- a/src/cohere/types/embed_floats_response.py
+++ b/src/cohere/v1/types/embed_floats_response.py
@@ -1,13 +1,14 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class EmbedFloatsResponse(UncheckedBaseModel):
+    response_type: typing.Literal["embeddings_floats"] = "embeddings_floats"
     id: str
     embeddings: typing.List[typing.List[float]] = pydantic.Field()
     """
diff --git a/src/cohere/types/embed_input_type.py b/src/cohere/v1/types/embed_input_type.py
similarity index 100%
rename from src/cohere/types/embed_input_type.py
rename to src/cohere/v1/types/embed_input_type.py
diff --git a/src/cohere/types/embedding_type.py b/src/cohere/v1/types/embedding_type.py
similarity index 100%
rename from src/cohere/types/embedding_type.py
rename to src/cohere/v1/types/embedding_type.py
diff --git a/src/cohere/types/finish_reason.py b/src/cohere/v1/types/finish_reason.py
similarity index 100%
rename from src/cohere/types/finish_reason.py
rename to src/cohere/v1/types/finish_reason.py
diff --git a/src/cohere/v2/types/chat_stream_event_type.py b/src/cohere/v1/types/gateway_timeout_error_body.py
similarity index 84%
rename from src/cohere/v2/types/chat_stream_event_type.py
rename to src/cohere/v1/types/gateway_timeout_error_body.py
index 0bd3c55ba..880893f4b 100644
--- a/src/cohere/v2/types/chat_stream_event_type.py
+++ b/src/cohere/v1/types/gateway_timeout_error_body.py
@@ -1,15 +1,13 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from ...core.unchecked_base_model import UncheckedBaseModel
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
-class ChatStreamEventType(UncheckedBaseModel):
-    """
-    The streamed event types
-    """
+class GatewayTimeoutErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/types/generate_stream_end.py b/src/cohere/v1/types/generate_stream_end.py
similarity index 93%
rename from src/cohere/types/generate_stream_end.py
rename to src/cohere/v1/types/generate_stream_end.py
index de4d7b082..5d6786aae 100644
--- a/src/cohere/types/generate_stream_end.py
+++ b/src/cohere/v1/types/generate_stream_end.py
@@ -4,7 +4,7 @@
 import typing
 from .finish_reason import FinishReason
 from .generate_stream_end_response import GenerateStreamEndResponse
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/generate_stream_end_response.py b/src/cohere/v1/types/generate_stream_end_response.py
similarity index 84%
rename from src/cohere/types/generate_stream_end_response.py
rename to src/cohere/v1/types/generate_stream_end_response.py
index ac4998548..3dd4207e9 100644
--- a/src/cohere/types/generate_stream_end_response.py
+++ b/src/cohere/v1/types/generate_stream_end_response.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .single_generation_in_stream import SingleGenerationInStream
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/generate_stream_error.py b/src/cohere/v1/types/generate_stream_error.py
similarity index 93%
rename from src/cohere/types/generate_stream_error.py
rename to src/cohere/v1/types/generate_stream_error.py
index 53d4b6320..6d7211020 100644
--- a/src/cohere/types/generate_stream_error.py
+++ b/src/cohere/v1/types/generate_stream_error.py
@@ -4,7 +4,7 @@
 import typing
 import pydantic
 from .finish_reason import FinishReason
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class GenerateStreamError(GenerateStreamEvent):
diff --git a/src/cohere/datasets/types/datasets_list_response.py b/src/cohere/v1/types/generate_stream_event.py
similarity index 75%
rename from src/cohere/datasets/types/datasets_list_response.py
rename to src/cohere/v1/types/generate_stream_event.py
index 506223056..2b1b5e826 100644
--- a/src/cohere/datasets/types/datasets_list_response.py
+++ b/src/cohere/v1/types/generate_stream_event.py
@@ -1,14 +1,14 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ...types.dataset import Dataset
+from .generate_stream_event_event_type import GenerateStreamEventEventType
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import typing
 import pydantic
 
 
-class DatasetsListResponse(UncheckedBaseModel):
-    datasets: typing.Optional[typing.List[Dataset]] = None
+class GenerateStreamEvent(UncheckedBaseModel):
+    event_type: GenerateStreamEventEventType
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/v1/types/generate_stream_event_event_type.py b/src/cohere/v1/types/generate_stream_event_event_type.py
new file mode 100644
index 000000000..2cd23ca9e
--- /dev/null
+++ b/src/cohere/v1/types/generate_stream_event_event_type.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+GenerateStreamEventEventType = typing.Union[typing.Literal["text-generation", "stream-end", "stream-error"], typing.Any]
diff --git a/src/cohere/types/generate_stream_text.py b/src/cohere/v1/types/generate_stream_text.py
similarity index 93%
rename from src/cohere/types/generate_stream_text.py
rename to src/cohere/v1/types/generate_stream_text.py
index a8a959e51..838105a34 100644
--- a/src/cohere/types/generate_stream_text.py
+++ b/src/cohere/v1/types/generate_stream_text.py
@@ -3,7 +3,7 @@
 from .generate_stream_event import GenerateStreamEvent
 import pydantic
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class GenerateStreamText(GenerateStreamEvent):
diff --git a/src/cohere/v1/types/generate_streamed_response.py b/src/cohere/v1/types/generate_streamed_response.py
new file mode 100644
index 000000000..f97dcd8fa
--- /dev/null
+++ b/src/cohere/v1/types/generate_streamed_response.py
@@ -0,0 +1,8 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .generate_stream_text import GenerateStreamText
+from .generate_stream_end import GenerateStreamEnd
+from .generate_stream_error import GenerateStreamError
+
+GenerateStreamedResponse = typing.Union[GenerateStreamText, GenerateStreamEnd, GenerateStreamError]
diff --git a/src/cohere/types/generation.py b/src/cohere/v1/types/generation.py
similarity index 87%
rename from src/cohere/types/generation.py
rename to src/cohere/v1/types/generation.py
index abf5bdb0f..e956207ab 100644
--- a/src/cohere/types/generation.py
+++ b/src/cohere/v1/types/generation.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .single_generation import SingleGeneration
 from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class Generation(UncheckedBaseModel):
diff --git a/src/cohere/types/get_model_response.py b/src/cohere/v1/types/get_model_response.py
similarity index 93%
rename from src/cohere/types/get_model_response.py
rename to src/cohere/v1/types/get_model_response.py
index d73bd6c24..f23621c87 100644
--- a/src/cohere/types/get_model_response.py
+++ b/src/cohere/v1/types/get_model_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .compatible_endpoint import CompatibleEndpoint
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class GetModelResponse(UncheckedBaseModel):
diff --git a/src/cohere/types/json_response_format.py b/src/cohere/v1/types/json_response_format.py
similarity index 84%
rename from src/cohere/types/json_response_format.py
rename to src/cohere/v1/types/json_response_format.py
index f6a2005fd..d94e54ff5 100644
--- a/src/cohere/types/json_response_format.py
+++ b/src/cohere/v1/types/json_response_format.py
@@ -1,14 +1,15 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
-import typing_extensions
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.serialization import FieldMetadata
+import typing_extensions
+from ...core.serialization import FieldMetadata
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class JsonResponseFormat(UncheckedBaseModel):
+    type: typing.Literal["json_object"] = "json_object"
     schema_: typing_extensions.Annotated[
         typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]], FieldMetadata(alias="schema")
     ] = pydantic.Field(default=None)
diff --git a/src/cohere/types/list_models_response.py b/src/cohere/v1/types/list_models_response.py
similarity index 86%
rename from src/cohere/types/list_models_response.py
rename to src/cohere/v1/types/list_models_response.py
index 4a046fc84..4a54ebd21 100644
--- a/src/cohere/types/list_models_response.py
+++ b/src/cohere/v1/types/list_models_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .get_model_response import GetModelResponse
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ListModelsResponse(UncheckedBaseModel):
diff --git a/src/cohere/v1/types/message.py b/src/cohere/v1/types/message.py
new file mode 100644
index 000000000..29d57cfc2
--- /dev/null
+++ b/src/cohere/v1/types/message.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .chat_message import ChatMessage
+from .tool_message import ToolMessage
+
+Message = typing.Union[ChatMessage, ToolMessage]
diff --git a/src/cohere/types/non_streamed_chat_response.py b/src/cohere/v1/types/non_streamed_chat_response.py
similarity index 93%
rename from src/cohere/types/non_streamed_chat_response.py
rename to src/cohere/v1/types/non_streamed_chat_response.py
index 55c275271..14470f1e3 100644
--- a/src/cohere/types/non_streamed_chat_response.py
+++ b/src/cohere/v1/types/non_streamed_chat_response.py
@@ -1,17 +1,17 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
 from .chat_citation import ChatCitation
-from .chat_document import ChatDocument
+from ...types.chat_document import ChatDocument
 from .chat_search_query import ChatSearchQuery
 from .chat_search_result import ChatSearchResult
 from .finish_reason import FinishReason
 from .tool_call import ToolCall
 from .message import Message
 from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class NonStreamedChatResponse(UncheckedBaseModel):
diff --git a/src/cohere/types/not_implemented_error_body.py b/src/cohere/v1/types/not_implemented_error_body.py
similarity index 80%
rename from src/cohere/types/not_implemented_error_body.py
rename to src/cohere/v1/types/not_implemented_error_body.py
index 657825c31..c2d212986 100644
--- a/src/cohere/types/not_implemented_error_body.py
+++ b/src/cohere/v1/types/not_implemented_error_body.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/o_auth_authorize_response.py b/src/cohere/v1/types/o_auth_authorize_response.py
similarity index 84%
rename from src/cohere/types/o_auth_authorize_response.py
rename to src/cohere/v1/types/o_auth_authorize_response.py
index fd9a84eb4..d4ca9f1d7 100644
--- a/src/cohere/types/o_auth_authorize_response.py
+++ b/src/cohere/v1/types/o_auth_authorize_response.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class OAuthAuthorizeResponse(UncheckedBaseModel):
diff --git a/src/cohere/v1/types/response_format.py b/src/cohere/v1/types/response_format.py
new file mode 100644
index 000000000..e56c42c1f
--- /dev/null
+++ b/src/cohere/v1/types/response_format.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .text_response_format import TextResponseFormat
+from .json_response_format import JsonResponseFormat
+
+ResponseFormat = typing.Union[TextResponseFormat, JsonResponseFormat]
diff --git a/src/cohere/types/single_generation.py b/src/cohere/v1/types/single_generation.py
similarity index 92%
rename from src/cohere/types/single_generation.py
rename to src/cohere/v1/types/single_generation.py
index ec79c7c9c..68e1ad140 100644
--- a/src/cohere/types/single_generation.py
+++ b/src/cohere/v1/types/single_generation.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .single_generation_token_likelihoods_item import SingleGenerationTokenLikelihoodsItem
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class SingleGeneration(UncheckedBaseModel):
diff --git a/src/cohere/types/single_generation_in_stream.py b/src/cohere/v1/types/single_generation_in_stream.py
similarity index 87%
rename from src/cohere/types/single_generation_in_stream.py
rename to src/cohere/v1/types/single_generation_in_stream.py
index 67a873661..8c639df49 100644
--- a/src/cohere/types/single_generation_in_stream.py
+++ b/src/cohere/v1/types/single_generation_in_stream.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
 from .finish_reason import FinishReason
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class SingleGenerationInStream(UncheckedBaseModel):
diff --git a/src/cohere/types/single_generation_token_likelihoods_item.py b/src/cohere/v1/types/single_generation_token_likelihoods_item.py
similarity index 81%
rename from src/cohere/types/single_generation_token_likelihoods_item.py
rename to src/cohere/v1/types/single_generation_token_likelihoods_item.py
index e5e1ab039..275dd91e8 100644
--- a/src/cohere/types/single_generation_token_likelihoods_item.py
+++ b/src/cohere/v1/types/single_generation_token_likelihoods_item.py
@@ -1,7 +1,7 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 import pydantic
 
diff --git a/src/cohere/v1/types/streamed_chat_response.py b/src/cohere/v1/types/streamed_chat_response.py
new file mode 100644
index 000000000..bab8837bb
--- /dev/null
+++ b/src/cohere/v1/types/streamed_chat_response.py
@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .chat_stream_start_event import ChatStreamStartEvent
+from .chat_search_queries_generation_event import ChatSearchQueriesGenerationEvent
+from .chat_search_results_event import ChatSearchResultsEvent
+from .chat_text_generation_event import ChatTextGenerationEvent
+from .chat_citation_generation_event import ChatCitationGenerationEvent
+from .chat_tool_calls_generation_event import ChatToolCallsGenerationEvent
+from .chat_stream_end_event import ChatStreamEndEvent
+from .chat_tool_calls_chunk_event import ChatToolCallsChunkEvent
+
+StreamedChatResponse = typing.Union[
+    ChatStreamStartEvent,
+    ChatSearchQueriesGenerationEvent,
+    ChatSearchResultsEvent,
+    ChatTextGenerationEvent,
+    ChatCitationGenerationEvent,
+    ChatToolCallsGenerationEvent,
+    ChatStreamEndEvent,
+    ChatToolCallsChunkEvent,
+]
diff --git a/src/cohere/v2/types/tool_call2function.py b/src/cohere/v1/types/text_response_format.py
similarity index 80%
rename from src/cohere/v2/types/tool_call2function.py
rename to src/cohere/v1/types/text_response_format.py
index 1adce7fcd..022825811 100644
--- a/src/cohere/v2/types/tool_call2function.py
+++ b/src/cohere/v1/types/text_response_format.py
@@ -6,9 +6,8 @@
 import pydantic
 
 
-class ToolCall2Function(UncheckedBaseModel):
-    name: typing.Optional[str] = None
-    arguments: typing.Optional[str] = None
+class TextResponseFormat(UncheckedBaseModel):
+    type: typing.Literal["text"] = "text"
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/types/too_many_requests_error_body.py b/src/cohere/v1/types/too_many_requests_error_body.py
similarity index 80%
rename from src/cohere/types/too_many_requests_error_body.py
rename to src/cohere/v1/types/too_many_requests_error_body.py
index 91fdb17ad..99f763209 100644
--- a/src/cohere/types/too_many_requests_error_body.py
+++ b/src/cohere/v1/types/too_many_requests_error_body.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/tool.py b/src/cohere/v1/types/tool.py
similarity index 92%
rename from src/cohere/types/tool.py
rename to src/cohere/v1/types/tool.py
index 2d6bc4859..abe0d33fd 100644
--- a/src/cohere/types/tool.py
+++ b/src/cohere/v1/types/tool.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
 from .tool_parameter_definitions_value import ToolParameterDefinitionsValue
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class Tool(UncheckedBaseModel):
diff --git a/src/cohere/types/tool_call.py b/src/cohere/v1/types/tool_call.py
similarity index 87%
rename from src/cohere/types/tool_call.py
rename to src/cohere/v1/types/tool_call.py
index ac747f004..188a5f4ea 100644
--- a/src/cohere/types/tool_call.py
+++ b/src/cohere/v1/types/tool_call.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ToolCall(UncheckedBaseModel):
diff --git a/src/cohere/types/tool_call_delta.py b/src/cohere/v1/types/tool_call_delta.py
similarity index 89%
rename from src/cohere/types/tool_call_delta.py
rename to src/cohere/v1/types/tool_call_delta.py
index 40403e68c..9c7dd2648 100644
--- a/src/cohere/types/tool_call_delta.py
+++ b/src/cohere/v1/types/tool_call_delta.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ToolCallDelta(UncheckedBaseModel):
diff --git a/src/cohere/types/tool_message.py b/src/cohere/v1/types/tool_message.py
similarity index 79%
rename from src/cohere/types/tool_message.py
rename to src/cohere/v1/types/tool_message.py
index 623cbdea9..10158aad5 100644
--- a/src/cohere/types/tool_message.py
+++ b/src/cohere/v1/types/tool_message.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .tool_result import ToolResult
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
@@ -12,6 +12,7 @@ class ToolMessage(UncheckedBaseModel):
     Represents tool result in the chat history.
     """
 
+    role: typing.Literal["TOOL"] = "TOOL"
     tool_results: typing.Optional[typing.List[ToolResult]] = None
 
     if IS_PYDANTIC_V2:
diff --git a/src/cohere/types/tool_parameter_definitions_value.py b/src/cohere/v1/types/tool_parameter_definitions_value.py
similarity index 88%
rename from src/cohere/types/tool_parameter_definitions_value.py
rename to src/cohere/v1/types/tool_parameter_definitions_value.py
index 806875f1e..c9fa0c2a8 100644
--- a/src/cohere/types/tool_parameter_definitions_value.py
+++ b/src/cohere/v1/types/tool_parameter_definitions_value.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ToolParameterDefinitionsValue(UncheckedBaseModel):
diff --git a/src/cohere/types/tool_result.py b/src/cohere/v1/types/tool_result.py
similarity index 82%
rename from src/cohere/types/tool_result.py
rename to src/cohere/v1/types/tool_result.py
index b98403171..c150150a2 100644
--- a/src/cohere/types/tool_result.py
+++ b/src/cohere/v1/types/tool_result.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 from .tool_call import ToolCall
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/unprocessable_entity_error_body.py b/src/cohere/v1/types/unprocessable_entity_error_body.py
similarity index 80%
rename from src/cohere/types/unprocessable_entity_error_body.py
rename to src/cohere/v1/types/unprocessable_entity_error_body.py
index 7dbea4a02..34c3adea2 100644
--- a/src/cohere/types/unprocessable_entity_error_body.py
+++ b/src/cohere/v1/types/unprocessable_entity_error_body.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/update_connector_response.py b/src/cohere/v1/types/update_connector_response.py
similarity index 81%
rename from src/cohere/types/update_connector_response.py
rename to src/cohere/v1/types/update_connector_response.py
index ac296dc65..46e526b45 100644
--- a/src/cohere/types/update_connector_response.py
+++ b/src/cohere/v1/types/update_connector_response.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 from .connector import Connector
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 import pydantic
 
diff --git a/src/cohere/v1/v1/__init__.py b/src/cohere/v1/v1/__init__.py
new file mode 100644
index 000000000..6772b401d
--- /dev/null
+++ b/src/cohere/v1/v1/__init__.py
@@ -0,0 +1,67 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from .types import (
+    ChatRequestCitationQuality,
+    ChatRequestConnectorsSearchOptions,
+    ChatRequestPromptTruncation,
+    ChatRequestSafetyMode,
+    ChatStreamRequestCitationQuality,
+    ChatStreamRequestConnectorsSearchOptions,
+    ChatStreamRequestPromptTruncation,
+    ChatStreamRequestSafetyMode,
+    CheckApiKeyResponse,
+    ClassifyRequestTruncate,
+    ClassifyResponse,
+    ClassifyResponseClassificationsItem,
+    ClassifyResponseClassificationsItemClassificationType,
+    ClassifyResponseClassificationsItemLabelsValue,
+    DetokenizeResponse,
+    EmbedRequestTruncate,
+    EmbedResponse,
+    GenerateRequestReturnLikelihoods,
+    GenerateRequestTruncate,
+    GenerateStreamRequestReturnLikelihoods,
+    GenerateStreamRequestTruncate,
+    RerankRequestDocumentsItem,
+    RerankResponse,
+    RerankResponseResultsItem,
+    RerankResponseResultsItemDocument,
+    SummarizeRequestExtractiveness,
+    SummarizeRequestFormat,
+    SummarizeRequestLength,
+    SummarizeResponse,
+    TokenizeResponse,
+)
+
+__all__ = [
+    "ChatRequestCitationQuality",
+    "ChatRequestConnectorsSearchOptions",
+    "ChatRequestPromptTruncation",
+    "ChatRequestSafetyMode",
+    "ChatStreamRequestCitationQuality",
+    "ChatStreamRequestConnectorsSearchOptions",
+    "ChatStreamRequestPromptTruncation",
+    "ChatStreamRequestSafetyMode",
+    "CheckApiKeyResponse",
+    "ClassifyRequestTruncate",
+    "ClassifyResponse",
+    "ClassifyResponseClassificationsItem",
+    "ClassifyResponseClassificationsItemClassificationType",
+    "ClassifyResponseClassificationsItemLabelsValue",
+    "DetokenizeResponse",
+    "EmbedRequestTruncate",
+    "EmbedResponse",
+    "GenerateRequestReturnLikelihoods",
+    "GenerateRequestTruncate",
+    "GenerateStreamRequestReturnLikelihoods",
+    "GenerateStreamRequestTruncate",
+    "RerankRequestDocumentsItem",
+    "RerankResponse",
+    "RerankResponseResultsItem",
+    "RerankResponseResultsItemDocument",
+    "SummarizeRequestExtractiveness",
+    "SummarizeRequestFormat",
+    "SummarizeRequestLength",
+    "SummarizeResponse",
+    "TokenizeResponse",
+]
diff --git a/src/cohere/v1/v1/client.py b/src/cohere/v1/v1/client.py
new file mode 100644
index 000000000..bedafb8e8
--- /dev/null
+++ b/src/cohere/v1/v1/client.py
@@ -0,0 +1,7870 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from ...core.client_wrapper import SyncClientWrapper
+from ..types.message import Message
+from .types.chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
+from ..types.chat_connector import ChatConnector
+from ...types.chat_document import ChatDocument
+from .types.chat_stream_request_citation_quality import ChatStreamRequestCitationQuality
+from ..types.tool import Tool
+from ..types.tool_result import ToolResult
+from ..types.response_format import ResponseFormat
+from .types.chat_stream_request_safety_mode import ChatStreamRequestSafetyMode
+from ...core.request_options import RequestOptions
+from ..types.streamed_chat_response import StreamedChatResponse
+from ...core.serialization import convert_and_respect_annotation_metadata
+from ...core.unchecked_base_model import construct_type
+import json
+from ..errors.bad_request_error import BadRequestError
+from ..errors.unauthorized_error import UnauthorizedError
+from ..errors.forbidden_error import ForbiddenError
+from ..errors.not_found_error import NotFoundError
+from ..errors.unprocessable_entity_error import UnprocessableEntityError
+from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
+from ..errors.too_many_requests_error import TooManyRequestsError
+from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
+from ..errors.client_closed_request_error import ClientClosedRequestError
+from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
+from ..errors.internal_server_error import InternalServerError
+from ..errors.not_implemented_error import NotImplementedError
+from ..types.not_implemented_error_body import NotImplementedErrorBody
+from ..errors.service_unavailable_error import ServiceUnavailableError
+from ..errors.gateway_timeout_error import GatewayTimeoutError
+from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
+from json.decoder import JSONDecodeError
+from ...core.api_error import ApiError
+from .types.chat_request_prompt_truncation import ChatRequestPromptTruncation
+from .types.chat_request_citation_quality import ChatRequestCitationQuality
+from .types.chat_request_safety_mode import ChatRequestSafetyMode
+from ..types.non_streamed_chat_response import NonStreamedChatResponse
+from .types.generate_stream_request_truncate import GenerateStreamRequestTruncate
+from .types.generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods
+from ..types.generate_streamed_response import GenerateStreamedResponse
+from .types.generate_request_truncate import GenerateRequestTruncate
+from .types.generate_request_return_likelihoods import GenerateRequestReturnLikelihoods
+from ..types.generation import Generation
+from ..types.embed_input_type import EmbedInputType
+from ..types.embedding_type import EmbeddingType
+from .types.embed_request_truncate import EmbedRequestTruncate
+from .types.embed_response import EmbedResponse
+from .types.rerank_request_documents_item import RerankRequestDocumentsItem
+from .types.rerank_response import RerankResponse
+from ..types.classify_example import ClassifyExample
+from .types.classify_request_truncate import ClassifyRequestTruncate
+from .types.classify_response import ClassifyResponse
+from .types.summarize_request_length import SummarizeRequestLength
+from .types.summarize_request_format import SummarizeRequestFormat
+from .types.summarize_request_extractiveness import SummarizeRequestExtractiveness
+from .types.summarize_response import SummarizeResponse
+from .types.tokenize_response import TokenizeResponse
+from .types.detokenize_response import DetokenizeResponse
+from .types.check_api_key_response import CheckApiKeyResponse
+from ..finetuning.finetuning.types.list_finetuned_models_response import ListFinetunedModelsResponse
+from ..finetuning.finetuning.types.finetuned_model import FinetunedModel
+from ..finetuning.finetuning.types.create_finetuned_model_response import CreateFinetunedModelResponse
+from ..finetuning.finetuning.types.get_finetuned_model_response import GetFinetunedModelResponse
+from ...core.jsonable_encoder import jsonable_encoder
+from ..finetuning.finetuning.types.delete_finetuned_model_response import DeleteFinetunedModelResponse
+from ..finetuning.finetuning.types.settings import Settings
+from ..finetuning.finetuning.types.status import Status
+import datetime as dt
+from ..finetuning.finetuning.types.update_finetuned_model_response import UpdateFinetunedModelResponse
+from ..finetuning.finetuning.types.list_events_response import ListEventsResponse
+from ..finetuning.finetuning.types.list_training_step_metrics_response import ListTrainingStepMetricsResponse
+from ...core.client_wrapper import AsyncClientWrapper
+
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+
+
+class V1Client:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    def chat_stream(
+        self,
+        *,
+        message: str,
+        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
+        model: typing.Optional[str] = OMIT,
+        preamble: typing.Optional[str] = OMIT,
+        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
+        conversation_id: typing.Optional[str] = OMIT,
+        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
+        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
+        search_queries_only: typing.Optional[bool] = OMIT,
+        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
+        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        max_input_tokens: typing.Optional[int] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        return_prompt: typing.Optional[bool] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
+        force_single_step: typing.Optional[bool] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[StreamedChatResponse]:
+        """
+        Generates a text response to a user message.
+        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
+
+        Parameters
+        ----------
+        message : str
+            Text input for the model to respond to.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        accepts : typing.Optional[typing.Literal["text/event-stream"]]
+            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
+
+        model : typing.Optional[str]
+            Defaults to `command-r-plus-08-2024`.
+
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
+            Compatible Deployments: Cohere Platform, Private Deployments
+
+
+        preamble : typing.Optional[str]
+            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
+
+            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        chat_history : typing.Optional[typing.Sequence[Message]]
+            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
+
+            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
+
+            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        conversation_id : typing.Optional[str]
+            An alternative to `chat_history`.
+
+            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
+            Compatible Deployments: Cohere Platform
+
+
+        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]
+            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+
+            Dictates how the prompt will be constructed.
+
+            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
+
+            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
+
+            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        connectors : typing.Optional[typing.Sequence[ChatConnector]]
+            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+
+            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
+            Compatible Deployments: Cohere Platform
+
+
+        search_queries_only : typing.Optional[bool]
+            Defaults to `false`.
+
+            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        documents : typing.Optional[typing.Sequence[ChatDocument]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
+
+            Example:
+            ```
+            [
+              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
+              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
+            ]
+            ```
+
+            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
+
+            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
+
+            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
+
+            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
+
+            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
+            Defaults to `"accurate"`.
+
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        temperature : typing.Optional[float]
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_input_tokens : typing.Optional[int]
+            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
+
+            Input will be truncated according to the `prompt_truncation` parameter.
+
+            Compatible Deployments: Cohere Platform
+
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        frequency_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without
+            any pre-processing.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        return_prompt : typing.Optional[bool]
+            The prompt is returned in the `prompt` response field when this is enabled.
+
+        tools : typing.Optional[typing.Sequence[Tool]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        tool_results : typing.Optional[typing.Sequence[ToolResult]]
+            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
+            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
+
+            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
+            ```
+            tool_results = [
+              {
+                "call": {
+                  "name": <tool name>,
+                  "parameters": {
+                    <param name>: <param value>
+                  }
+                },
+                "outputs": [{
+                  <key>: <value>
+                }]
+              },
+              ...
+            ]
+            ```
+            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        force_single_step : typing.Optional[bool]
+            Forces the chat to be single step. Defaults to `false`.
+
+        response_format : typing.Optional[ResponseFormat]
+
+        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.Iterator[StreamedChatResponse]
+
+
+        Examples
+        --------
+        from cohere import Client
+        from cohere.v1 import (
+            ChatConnector,
+            ChatMessage,
+            TextResponseFormat,
+            Tool,
+            ToolCall,
+            ToolParameterDefinitionsValue,
+            ToolResult,
+        )
+        from cohere.v1.v1 import ChatStreamRequestConnectorsSearchOptions
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        response = client.v1.v1.chat_stream(
+            message="string",
+            model="string",
+            preamble="string",
+            chat_history=[
+                ChatMessage(
+                    message="string",
+                    tool_calls=[
+                        ToolCall(
+                            name="string",
+                            parameters={"string": {"key": "value"}},
+                        )
+                    ],
+                )
+            ],
+            conversation_id="string",
+            prompt_truncation="OFF",
+            connectors=[
+                ChatConnector(
+                    id="string",
+                    user_access_token="string",
+                    continue_on_failure=True,
+                    options={"string": {"key": "value"}},
+                )
+            ],
+            search_queries_only=True,
+            documents=[{"string": {"key": "value"}}],
+            citation_quality="fast",
+            temperature=1.1,
+            max_tokens=1,
+            max_input_tokens=1,
+            k=1,
+            p=1.1,
+            seed=1,
+            stop_sequences=["string"],
+            connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
+                seed=1,
+            ),
+            frequency_penalty=1.1,
+            presence_penalty=1.1,
+            raw_prompting=True,
+            return_prompt=True,
+            tools=[
+                Tool(
+                    name="string",
+                    description="string",
+                    parameter_definitions={
+                        "string": ToolParameterDefinitionsValue(
+                            description="string",
+                            type="string",
+                            required=True,
+                        )
+                    },
+                )
+            ],
+            tool_results=[
+                ToolResult(
+                    call=ToolCall(
+                        name="string",
+                        parameters={"string": {"key": "value"}},
+                    ),
+                    outputs=[{"string": {"key": "value"}}],
+                )
+            ],
+            force_single_step=True,
+            response_format=TextResponseFormat(),
+            safety_mode="CONTEXTUAL",
+        )
+        for chunk in response:
+            yield chunk
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/chat",
+            method="POST",
+            json={
+                "message": message,
+                "model": model,
+                "preamble": preamble,
+                "chat_history": convert_and_respect_annotation_metadata(
+                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
+                ),
+                "conversation_id": conversation_id,
+                "prompt_truncation": prompt_truncation,
+                "connectors": convert_and_respect_annotation_metadata(
+                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
+                ),
+                "search_queries_only": search_queries_only,
+                "documents": documents,
+                "citation_quality": citation_quality,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+                "max_input_tokens": max_input_tokens,
+                "k": k,
+                "p": p,
+                "seed": seed,
+                "stop_sequences": stop_sequences,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "raw_prompting": raw_prompting,
+                "return_prompt": return_prompt,
+                "tools": convert_and_respect_annotation_metadata(
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "tool_results": convert_and_respect_annotation_metadata(
+                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
+                ),
+                "force_single_step": force_single_step,
+                "response_format": convert_and_respect_annotation_metadata(
+                    object_=response_format, annotation=ResponseFormat, direction="write"
+                ),
+                "safety_mode": safety_mode,
+                "stream": True,
+            },
+            headers={
+                "Accepts": str(accepts) if accepts is not None else None,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            try:
+                if 200 <= _response.status_code < 300:
+                    for _text in _response.iter_lines():
+                        try:
+                            if len(_text) == 0:
+                                continue
+                            yield typing.cast(
+                                StreamedChatResponse,
+                                construct_type(
+                                    type_=StreamedChatResponse,  # type: ignore
+                                    object_=json.loads(_text),
+                                ),
+                            )
+                        except:
+                            pass
+                    return
+                _response.read()
+                if _response.status_code == 400:
+                    raise BadRequestError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 401:
+                    raise UnauthorizedError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 403:
+                    raise ForbiddenError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 404:
+                    raise NotFoundError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 422:
+                    raise UnprocessableEntityError(
+                        typing.cast(
+                            UnprocessableEntityErrorBody,
+                            construct_type(
+                                type_=UnprocessableEntityErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 429:
+                    raise TooManyRequestsError(
+                        typing.cast(
+                            TooManyRequestsErrorBody,
+                            construct_type(
+                                type_=TooManyRequestsErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 499:
+                    raise ClientClosedRequestError(
+                        typing.cast(
+                            ClientClosedRequestErrorBody,
+                            construct_type(
+                                type_=ClientClosedRequestErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 500:
+                    raise InternalServerError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 501:
+                    raise NotImplementedError(
+                        typing.cast(
+                            NotImplementedErrorBody,
+                            construct_type(
+                                type_=NotImplementedErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 503:
+                    raise ServiceUnavailableError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 504:
+                    raise GatewayTimeoutError(
+                        typing.cast(
+                            GatewayTimeoutErrorBody,
+                            construct_type(
+                                type_=GatewayTimeoutErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def chat(
+        self,
+        *,
+        message: str,
+        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
+        model: typing.Optional[str] = OMIT,
+        preamble: typing.Optional[str] = OMIT,
+        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
+        conversation_id: typing.Optional[str] = OMIT,
+        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
+        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
+        search_queries_only: typing.Optional[bool] = OMIT,
+        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
+        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        max_input_tokens: typing.Optional[int] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        return_prompt: typing.Optional[bool] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
+        force_single_step: typing.Optional[bool] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> NonStreamedChatResponse:
+        """
+        Generates a text response to a user message.
+        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
+
+        Parameters
+        ----------
+        message : str
+            Text input for the model to respond to.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        accepts : typing.Optional[typing.Literal["text/event-stream"]]
+            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
+
+        model : typing.Optional[str]
+            Defaults to `command-r-plus-08-2024`.
+
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
+            Compatible Deployments: Cohere Platform, Private Deployments
+
+
+        preamble : typing.Optional[str]
+            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
+
+            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        chat_history : typing.Optional[typing.Sequence[Message]]
+            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
+
+            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
+
+            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        conversation_id : typing.Optional[str]
+            An alternative to `chat_history`.
+
+            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
+            Compatible Deployments: Cohere Platform
+
+
+        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]
+            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+
+            Dictates how the prompt will be constructed.
+
+            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
+
+            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
+
+            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        connectors : typing.Optional[typing.Sequence[ChatConnector]]
+            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+
+            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
+            Compatible Deployments: Cohere Platform
+
+
+        search_queries_only : typing.Optional[bool]
+            Defaults to `false`.
+
+            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        documents : typing.Optional[typing.Sequence[ChatDocument]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
+
+            Example:
+            ```
+            [
+              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
+              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
+            ]
+            ```
+
+            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
+
+            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
+
+            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
+
+            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
+
+            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        citation_quality : typing.Optional[ChatRequestCitationQuality]
+            Defaults to `"accurate"`.
+
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        temperature : typing.Optional[float]
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_input_tokens : typing.Optional[int]
+            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
+
+            Input will be truncated according to the `prompt_truncation` parameter.
+
+            Compatible Deployments: Cohere Platform
+
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        frequency_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without
+            any pre-processing.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        return_prompt : typing.Optional[bool]
+            The prompt is returned in the `prompt` response field when this is enabled.
+
+        tools : typing.Optional[typing.Sequence[Tool]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        tool_results : typing.Optional[typing.Sequence[ToolResult]]
+            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
+            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
+
+            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
+            ```
+            tool_results = [
+              {
+                "call": {
+                  "name": <tool name>,
+                  "parameters": {
+                    <param name>: <param value>
+                  }
+                },
+                "outputs": [{
+                  <key>: <value>
+                }]
+              },
+              ...
+            ]
+            ```
+            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        force_single_step : typing.Optional[bool]
+            Forces the chat to be single step. Defaults to `false`.
+
+        response_format : typing.Optional[ResponseFormat]
+
+        safety_mode : typing.Optional[ChatRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        NonStreamedChatResponse
+
+
+        Examples
+        --------
+        from cohere import Client
+        from cohere.v1 import ChatMessage
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.chat(
+            message="Can you give me a global market overview of solar panels?",
+            chat_history=[
+                ChatMessage(
+                    message="Hi!",
+                ),
+                ChatMessage(
+                    message="How can I help you today?",
+                ),
+            ],
+            prompt_truncation="OFF",
+            temperature=0.3,
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/chat",
+            method="POST",
+            json={
+                "message": message,
+                "model": model,
+                "preamble": preamble,
+                "chat_history": convert_and_respect_annotation_metadata(
+                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
+                ),
+                "conversation_id": conversation_id,
+                "prompt_truncation": prompt_truncation,
+                "connectors": convert_and_respect_annotation_metadata(
+                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
+                ),
+                "search_queries_only": search_queries_only,
+                "documents": documents,
+                "citation_quality": citation_quality,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+                "max_input_tokens": max_input_tokens,
+                "k": k,
+                "p": p,
+                "seed": seed,
+                "stop_sequences": stop_sequences,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "raw_prompting": raw_prompting,
+                "return_prompt": return_prompt,
+                "tools": convert_and_respect_annotation_metadata(
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "tool_results": convert_and_respect_annotation_metadata(
+                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
+                ),
+                "force_single_step": force_single_step,
+                "response_format": convert_and_respect_annotation_metadata(
+                    object_=response_format, annotation=ResponseFormat, direction="write"
+                ),
+                "safety_mode": safety_mode,
+                "stream": False,
+            },
+            headers={
+                "Accepts": str(accepts) if accepts is not None else None,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    NonStreamedChatResponse,
+                    construct_type(
+                        type_=NonStreamedChatResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def generate_stream(
+        self,
+        *,
+        prompt: str,
+        model: typing.Optional[str] = OMIT,
+        num_generations: typing.Optional[int] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        preset: typing.Optional[str] = OMIT,
+        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[GenerateStreamedResponse]:
+        """
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
+        Generates realistic text conditioned on a given input.
+
+        Parameters
+        ----------
+        prompt : str
+            The input text that serves as the starting point for generating the response.
+            Note: The prompt will be pre-processed and modified before reaching the model.
+
+
+        model : typing.Optional[str]
+            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
+            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+
+        num_generations : typing.Optional[int]
+            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
+
+            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
+
+
+        truncate : typing.Optional[GenerateStreamRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        temperature : typing.Optional[float]
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
+            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        preset : typing.Optional[str]
+            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
+            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
+
+
+        end_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+
+        frequency_penalty : typing.Optional[float]
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]
+            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
+
+            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
+
+            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without any pre-processing.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.Iterator[GenerateStreamedResponse]
+
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        response = client.v1.v1.generate_stream(
+            prompt="string",
+            model="string",
+            num_generations=1,
+            max_tokens=1,
+            truncate="NONE",
+            temperature=1.1,
+            seed=1,
+            preset="string",
+            end_sequences=["string"],
+            stop_sequences=["string"],
+            k=1,
+            p=1.1,
+            frequency_penalty=1.1,
+            presence_penalty=1.1,
+            return_likelihoods="GENERATION",
+            raw_prompting=True,
+        )
+        for chunk in response:
+            yield chunk
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/generate",
+            method="POST",
+            json={
+                "prompt": prompt,
+                "model": model,
+                "num_generations": num_generations,
+                "max_tokens": max_tokens,
+                "truncate": truncate,
+                "temperature": temperature,
+                "seed": seed,
+                "preset": preset,
+                "end_sequences": end_sequences,
+                "stop_sequences": stop_sequences,
+                "k": k,
+                "p": p,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "return_likelihoods": return_likelihoods,
+                "raw_prompting": raw_prompting,
+                "stream": True,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            try:
+                if 200 <= _response.status_code < 300:
+                    for _text in _response.iter_lines():
+                        try:
+                            if len(_text) == 0:
+                                continue
+                            yield typing.cast(
+                                GenerateStreamedResponse,
+                                construct_type(
+                                    type_=GenerateStreamedResponse,  # type: ignore
+                                    object_=json.loads(_text),
+                                ),
+                            )
+                        except:
+                            pass
+                    return
+                _response.read()
+                if _response.status_code == 400:
+                    raise BadRequestError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 401:
+                    raise UnauthorizedError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 403:
+                    raise ForbiddenError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 404:
+                    raise NotFoundError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 422:
+                    raise UnprocessableEntityError(
+                        typing.cast(
+                            UnprocessableEntityErrorBody,
+                            construct_type(
+                                type_=UnprocessableEntityErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 429:
+                    raise TooManyRequestsError(
+                        typing.cast(
+                            TooManyRequestsErrorBody,
+                            construct_type(
+                                type_=TooManyRequestsErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 499:
+                    raise ClientClosedRequestError(
+                        typing.cast(
+                            ClientClosedRequestErrorBody,
+                            construct_type(
+                                type_=ClientClosedRequestErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 500:
+                    raise InternalServerError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 501:
+                    raise NotImplementedError(
+                        typing.cast(
+                            NotImplementedErrorBody,
+                            construct_type(
+                                type_=NotImplementedErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 503:
+                    raise ServiceUnavailableError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 504:
+                    raise GatewayTimeoutError(
+                        typing.cast(
+                            GatewayTimeoutErrorBody,
+                            construct_type(
+                                type_=GatewayTimeoutErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def generate(
+        self,
+        *,
+        prompt: str,
+        model: typing.Optional[str] = OMIT,
+        num_generations: typing.Optional[int] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        preset: typing.Optional[str] = OMIT,
+        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Generation:
+        """
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
+        Generates realistic text conditioned on a given input.
+
+        Parameters
+        ----------
+        prompt : str
+            The input text that serves as the starting point for generating the response.
+            Note: The prompt will be pre-processed and modified before reaching the model.
+
+
+        model : typing.Optional[str]
+            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
+            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+
+        num_generations : typing.Optional[int]
+            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
+
+            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
+
+
+        truncate : typing.Optional[GenerateRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        temperature : typing.Optional[float]
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
+            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        preset : typing.Optional[str]
+            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
+            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
+
+
+        end_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+
+        frequency_penalty : typing.Optional[float]
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]
+            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
+
+            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
+
+            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without any pre-processing.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Generation
+
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.generate(
+            prompt="Please explain to me how LLMs work",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/generate",
+            method="POST",
+            json={
+                "prompt": prompt,
+                "model": model,
+                "num_generations": num_generations,
+                "max_tokens": max_tokens,
+                "truncate": truncate,
+                "temperature": temperature,
+                "seed": seed,
+                "preset": preset,
+                "end_sequences": end_sequences,
+                "stop_sequences": stop_sequences,
+                "k": k,
+                "p": p,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "return_likelihoods": return_likelihoods,
+                "raw_prompting": raw_prompting,
+                "stream": False,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    Generation,
+                    construct_type(
+                        type_=Generation,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def embed(
+        self,
+        *,
+        texts: typing.Optional[typing.Sequence[str]] = OMIT,
+        images: typing.Optional[typing.Sequence[str]] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        input_type: typing.Optional[EmbedInputType] = OMIT,
+        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
+        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EmbedResponse:
+        """
+        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
+
+        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
+
+        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
+
+        Parameters
+        ----------
+        texts : typing.Optional[typing.Sequence[str]]
+            An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
+
+        images : typing.Optional[typing.Sequence[str]]
+            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
+
+            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+
+        model : typing.Optional[str]
+            Defaults to embed-english-v2.0
+
+            The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+
+            Available models and corresponding embedding dimensions:
+
+            * `embed-english-v3.0`  1024
+            * `embed-multilingual-v3.0`  1024
+            * `embed-english-light-v3.0`  384
+            * `embed-multilingual-light-v3.0`  384
+
+            * `embed-english-v2.0`  4096
+            * `embed-english-light-v2.0`  1024
+            * `embed-multilingual-v2.0`  768
+
+        input_type : typing.Optional[EmbedInputType]
+
+        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
+            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
+
+            * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
+            * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
+            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
+            * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
+            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
+
+        truncate : typing.Optional[EmbedRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EmbedResponse
+            OK
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.embed()
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/embed",
+            method="POST",
+            json={
+                "texts": texts,
+                "images": images,
+                "model": model,
+                "input_type": input_type,
+                "embedding_types": embedding_types,
+                "truncate": truncate,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EmbedResponse,
+                    construct_type(
+                        type_=EmbedResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def rerank(
+        self,
+        *,
+        query: str,
+        documents: typing.Sequence[RerankRequestDocumentsItem],
+        model: typing.Optional[str] = OMIT,
+        top_n: typing.Optional[int] = OMIT,
+        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,
+        return_documents: typing.Optional[bool] = OMIT,
+        max_chunks_per_doc: typing.Optional[int] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> RerankResponse:
+        """
+        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
+
+        Parameters
+        ----------
+        query : str
+            The search query
+
+        documents : typing.Sequence[RerankRequestDocumentsItem]
+            A list of document objects or strings to rerank.
+            If a document is provided the text fields is required and all other fields will be preserved in the response.
+
+            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
+
+            We recommend a maximum of 1,000 documents for optimal endpoint performance.
+
+        model : typing.Optional[str]
+            The identifier of the model to use, one of : `rerank-english-v3.0`, `rerank-multilingual-v3.0`, `rerank-english-v2.0`, `rerank-multilingual-v2.0`
+
+        top_n : typing.Optional[int]
+            The number of most relevant documents or indices to return, defaults to the length of the documents
+
+        rank_fields : typing.Optional[typing.Sequence[str]]
+            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.
+
+        return_documents : typing.Optional[bool]
+            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
+            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
+
+        max_chunks_per_doc : typing.Optional[int]
+            The maximum number of chunks to produce internally from a document
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        RerankResponse
+            OK
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.rerank(
+            query="query",
+            documents=["documents"],
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/rerank",
+            method="POST",
+            json={
+                "model": model,
+                "query": query,
+                "documents": convert_and_respect_annotation_metadata(
+                    object_=documents, annotation=typing.Sequence[RerankRequestDocumentsItem], direction="write"
+                ),
+                "top_n": top_n,
+                "rank_fields": rank_fields,
+                "return_documents": return_documents,
+                "max_chunks_per_doc": max_chunks_per_doc,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    RerankResponse,
+                    construct_type(
+                        type_=RerankResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def classify(
+        self,
+        *,
+        inputs: typing.Sequence[str],
+        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        preset: typing.Optional[str] = OMIT,
+        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ClassifyResponse:
+        """
+        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
+        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
+
+        Parameters
+        ----------
+        inputs : typing.Sequence[str]
+            A list of up to 96 texts to be classified. Each one must be a non-empty string.
+            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
+            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
+
+        examples : typing.Optional[typing.Sequence[ClassifyExample]]
+            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
+            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
+
+        model : typing.Optional[str]
+            The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
+
+        preset : typing.Optional[str]
+            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
+
+        truncate : typing.Optional[ClassifyRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ClassifyResponse
+            OK
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.classify(
+            inputs=["inputs"],
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/classify",
+            method="POST",
+            json={
+                "inputs": inputs,
+                "examples": convert_and_respect_annotation_metadata(
+                    object_=examples, annotation=typing.Sequence[ClassifyExample], direction="write"
+                ),
+                "model": model,
+                "preset": preset,
+                "truncate": truncate,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ClassifyResponse,
+                    construct_type(
+                        type_=ClassifyResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def summarize(
+        self,
+        *,
+        text: str,
+        length: typing.Optional[SummarizeRequestLength] = OMIT,
+        format: typing.Optional[SummarizeRequestFormat] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        additional_command: typing.Optional[str] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> SummarizeResponse:
+        """
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
+        Generates a summary in English for a given text.
+
+        Parameters
+        ----------
+        text : str
+            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
+
+        length : typing.Optional[SummarizeRequestLength]
+            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
+
+        format : typing.Optional[SummarizeRequestFormat]
+            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
+
+        model : typing.Optional[str]
+            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
+
+        extractiveness : typing.Optional[SummarizeRequestExtractiveness]
+            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
+
+        temperature : typing.Optional[float]
+            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
+
+        additional_command : typing.Optional[str]
+            A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        SummarizeResponse
+            OK
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.summarize(
+            text="text",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/summarize",
+            method="POST",
+            json={
+                "text": text,
+                "length": length,
+                "format": format,
+                "model": model,
+                "extractiveness": extractiveness,
+                "temperature": temperature,
+                "additional_command": additional_command,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    SummarizeResponse,
+                    construct_type(
+                        type_=SummarizeResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def tokenize(
+        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None
+    ) -> TokenizeResponse:
+        """
+        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
+
+        Parameters
+        ----------
+        text : str
+            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
+
+        model : str
+            An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        TokenizeResponse
+            OK
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.tokenize(
+            text="tokenize me! :D",
+            model="command",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/tokenize",
+            method="POST",
+            json={
+                "text": text,
+                "model": model,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    TokenizeResponse,
+                    construct_type(
+                        type_=TokenizeResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def detokenize(
+        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None
+    ) -> DetokenizeResponse:
+        """
+        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
+
+        Parameters
+        ----------
+        tokens : typing.Sequence[int]
+            The list of tokens to be detokenized.
+
+        model : str
+            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DetokenizeResponse
+            OK
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.detokenize(
+            tokens=[1],
+            model="model",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/detokenize",
+            method="POST",
+            json={
+                "tokens": tokens,
+                "model": model,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    DetokenizeResponse,
+                    construct_type(
+                        type_=DetokenizeResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def check_api_key(self, *, request_options: typing.Optional[RequestOptions] = None) -> CheckApiKeyResponse:
+        """
+        Checks that the api key in the Authorization header is valid and active
+
+        Parameters
+        ----------
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        CheckApiKeyResponse
+            OK
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.check_api_key()
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/check-api-key",
+            method="POST",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    CheckApiKeyResponse,
+                    construct_type(
+                        type_=CheckApiKeyResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def list_finetuned_models(
+        self,
+        *,
+        page_size: typing.Optional[int] = None,
+        page_token: typing.Optional[str] = None,
+        order_by: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ListFinetunedModelsResponse:
+        """
+        Parameters
+        ----------
+        page_size : typing.Optional[int]
+            Maximum number of results to be returned by the server. If 0, defaults to 50.
+
+        page_token : typing.Optional[str]
+            Request a specific page of the list results.
+
+        order_by : typing.Optional[str]
+            Comma separated list of fields. For example: "created_at,name". The default
+            sorting order is ascending. To specify descending order for a field, append
+            " desc" to the field name. For example: "created_at desc,name".
+
+            Supported sorting fields:
+
+            - created_at (default)
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ListFinetunedModelsResponse
+            A successful response.
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.list_finetuned_models()
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/finetuning/finetuned-models",
+            method="GET",
+            params={
+                "page_size": page_size,
+                "page_token": page_token,
+                "order_by": order_by,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ListFinetunedModelsResponse,
+                    construct_type(
+                        type_=ListFinetunedModelsResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def create_finetuned_model(
+        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None
+    ) -> CreateFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        request : FinetunedModel
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        CreateFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        from cohere import Client
+        from cohere.v1.finetuning.finetuning import BaseModel, FinetunedModel, Settings
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.create_finetuned_model(
+            request=FinetunedModel(
+                name="api-test",
+                settings=Settings(
+                    base_model=BaseModel(
+                        base_type="BASE_TYPE_CHAT",
+                    ),
+                    dataset_id="my-dataset-id",
+                ),
+            ),
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "v1/finetuning/finetuned-models",
+            method="POST",
+            json=convert_and_respect_annotation_metadata(object_=request, annotation=FinetunedModel, direction="write"),
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    CreateFinetunedModelResponse,
+                    construct_type(
+                        type_=CreateFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def get_finetuned_model(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> GetFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        id : str
+            The fine-tuned model ID.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        GetFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.get_finetuned_model(
+            id="id",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    GetFinetunedModelResponse,
+                    construct_type(
+                        type_=GetFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def delete_finetuned_model(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> DeleteFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        id : str
+            The fine-tuned model ID.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DeleteFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.delete_finetuned_model(
+            id="id",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    DeleteFinetunedModelResponse,
+                    construct_type(
+                        type_=DeleteFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def update_finetuned_model(
+        self,
+        id: str,
+        *,
+        name: str,
+        settings: Settings,
+        creator_id: typing.Optional[str] = OMIT,
+        organization_id: typing.Optional[str] = OMIT,
+        status: typing.Optional[Status] = OMIT,
+        created_at: typing.Optional[dt.datetime] = OMIT,
+        updated_at: typing.Optional[dt.datetime] = OMIT,
+        completed_at: typing.Optional[dt.datetime] = OMIT,
+        last_used: typing.Optional[dt.datetime] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> UpdateFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        id : str
+            FinetunedModel ID.
+
+        name : str
+            FinetunedModel name (e.g. `foobar`).
+
+        settings : Settings
+            FinetunedModel settings such as dataset, hyperparameters...
+
+        creator_id : typing.Optional[str]
+            User ID of the creator.
+
+        organization_id : typing.Optional[str]
+            Organization ID.
+
+        status : typing.Optional[Status]
+            Current stage in the life-cycle of the fine-tuned model.
+
+        created_at : typing.Optional[dt.datetime]
+            Creation timestamp.
+
+        updated_at : typing.Optional[dt.datetime]
+            Latest update timestamp.
+
+        completed_at : typing.Optional[dt.datetime]
+            Timestamp for the completed fine-tuning.
+
+        last_used : typing.Optional[dt.datetime]
+            Timestamp for the latest request to this fine-tuned model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        UpdateFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        from cohere import Client
+        from cohere.v1.finetuning.finetuning import BaseModel, Settings
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.update_finetuned_model(
+            id="id",
+            name="name",
+            settings=Settings(
+                base_model=BaseModel(
+                    base_type="BASE_TYPE_UNSPECIFIED",
+                ),
+                dataset_id="dataset_id",
+            ),
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
+            method="PATCH",
+            json={
+                "name": name,
+                "creator_id": creator_id,
+                "organization_id": organization_id,
+                "settings": convert_and_respect_annotation_metadata(
+                    object_=settings, annotation=Settings, direction="write"
+                ),
+                "status": status,
+                "created_at": created_at,
+                "updated_at": updated_at,
+                "completed_at": completed_at,
+                "last_used": last_used,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    UpdateFinetunedModelResponse,
+                    construct_type(
+                        type_=UpdateFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def list_events(
+        self,
+        finetuned_model_id: str,
+        *,
+        page_size: typing.Optional[int] = None,
+        page_token: typing.Optional[str] = None,
+        order_by: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ListEventsResponse:
+        """
+        Parameters
+        ----------
+        finetuned_model_id : str
+            The parent fine-tuned model ID.
+
+        page_size : typing.Optional[int]
+            Maximum number of results to be returned by the server. If 0, defaults to 50.
+
+        page_token : typing.Optional[str]
+            Request a specific page of the list results.
+
+        order_by : typing.Optional[str]
+            Comma separated list of fields. For example: "created_at,name". The default
+            sorting order is ascending. To specify descending order for a field, append
+            " desc" to the field name. For example: "created_at desc,name".
+
+            Supported sorting fields:
+
+            - created_at (default)
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ListEventsResponse
+            A successful response.
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.list_events(
+            finetuned_model_id="finetuned_model_id",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/events",
+            method="GET",
+            params={
+                "page_size": page_size,
+                "page_token": page_token,
+                "order_by": order_by,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ListEventsResponse,
+                    construct_type(
+                        type_=ListEventsResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def list_training_step_metrics(
+        self,
+        finetuned_model_id: str,
+        *,
+        page_size: typing.Optional[int] = None,
+        page_token: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ListTrainingStepMetricsResponse:
+        """
+        Parameters
+        ----------
+        finetuned_model_id : str
+            The parent fine-tuned model ID.
+
+        page_size : typing.Optional[int]
+            Maximum number of results to be returned by the server. If 0, defaults to 50.
+
+        page_token : typing.Optional[str]
+            Request a specific page of the list results.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ListTrainingStepMetricsResponse
+            A successful response.
+
+        Examples
+        --------
+        from cohere import Client
+
+        client = Client(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+        client.v1.v1.list_training_step_metrics(
+            finetuned_model_id="finetuned_model_id",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/training-step-metrics",
+            method="GET",
+            params={
+                "page_size": page_size,
+                "page_token": page_token,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ListTrainingStepMetricsResponse,
+                    construct_type(
+                        type_=ListTrainingStepMetricsResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+
+class AsyncV1Client:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    async def chat_stream(
+        self,
+        *,
+        message: str,
+        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
+        model: typing.Optional[str] = OMIT,
+        preamble: typing.Optional[str] = OMIT,
+        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
+        conversation_id: typing.Optional[str] = OMIT,
+        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
+        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
+        search_queries_only: typing.Optional[bool] = OMIT,
+        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
+        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        max_input_tokens: typing.Optional[int] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        return_prompt: typing.Optional[bool] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
+        force_single_step: typing.Optional[bool] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[StreamedChatResponse]:
+        """
+        Generates a text response to a user message.
+        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
+
+        Parameters
+        ----------
+        message : str
+            Text input for the model to respond to.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        accepts : typing.Optional[typing.Literal["text/event-stream"]]
+            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
+
+        model : typing.Optional[str]
+            Defaults to `command-r-plus-08-2024`.
+
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
+            Compatible Deployments: Cohere Platform, Private Deployments
+
+
+        preamble : typing.Optional[str]
+            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
+
+            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        chat_history : typing.Optional[typing.Sequence[Message]]
+            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
+
+            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
+
+            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        conversation_id : typing.Optional[str]
+            An alternative to `chat_history`.
+
+            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
+            Compatible Deployments: Cohere Platform
+
+
+        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]
+            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+
+            Dictates how the prompt will be constructed.
+
+            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
+
+            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
+
+            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        connectors : typing.Optional[typing.Sequence[ChatConnector]]
+            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+
+            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
+            Compatible Deployments: Cohere Platform
+
+
+        search_queries_only : typing.Optional[bool]
+            Defaults to `false`.
+
+            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        documents : typing.Optional[typing.Sequence[ChatDocument]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
+
+            Example:
+            ```
+            [
+              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
+              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
+            ]
+            ```
+
+            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
+
+            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
+
+            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
+
+            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
+
+            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
+            Defaults to `"accurate"`.
+
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        temperature : typing.Optional[float]
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_input_tokens : typing.Optional[int]
+            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
+
+            Input will be truncated according to the `prompt_truncation` parameter.
+
+            Compatible Deployments: Cohere Platform
+
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        frequency_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without
+            any pre-processing.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        return_prompt : typing.Optional[bool]
+            The prompt is returned in the `prompt` response field when this is enabled.
+
+        tools : typing.Optional[typing.Sequence[Tool]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        tool_results : typing.Optional[typing.Sequence[ToolResult]]
+            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
+            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
+
+            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
+            ```
+            tool_results = [
+              {
+                "call": {
+                  "name": <tool name>,
+                  "parameters": {
+                    <param name>: <param value>
+                  }
+                },
+                "outputs": [{
+                  <key>: <value>
+                }]
+              },
+              ...
+            ]
+            ```
+            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        force_single_step : typing.Optional[bool]
+            Forces the chat to be single step. Defaults to `false`.
+
+        response_format : typing.Optional[ResponseFormat]
+
+        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.AsyncIterator[StreamedChatResponse]
+
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+        from cohere.v1 import (
+            ChatConnector,
+            ChatMessage,
+            TextResponseFormat,
+            Tool,
+            ToolCall,
+            ToolParameterDefinitionsValue,
+            ToolResult,
+        )
+        from cohere.v1.v1 import ChatStreamRequestConnectorsSearchOptions
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            response = await client.v1.v1.chat_stream(
+                message="string",
+                model="string",
+                preamble="string",
+                chat_history=[
+                    ChatMessage(
+                        message="string",
+                        tool_calls=[
+                            ToolCall(
+                                name="string",
+                                parameters={"string": {"key": "value"}},
+                            )
+                        ],
+                    )
+                ],
+                conversation_id="string",
+                prompt_truncation="OFF",
+                connectors=[
+                    ChatConnector(
+                        id="string",
+                        user_access_token="string",
+                        continue_on_failure=True,
+                        options={"string": {"key": "value"}},
+                    )
+                ],
+                search_queries_only=True,
+                documents=[{"string": {"key": "value"}}],
+                citation_quality="fast",
+                temperature=1.1,
+                max_tokens=1,
+                max_input_tokens=1,
+                k=1,
+                p=1.1,
+                seed=1,
+                stop_sequences=["string"],
+                connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
+                    seed=1,
+                ),
+                frequency_penalty=1.1,
+                presence_penalty=1.1,
+                raw_prompting=True,
+                return_prompt=True,
+                tools=[
+                    Tool(
+                        name="string",
+                        description="string",
+                        parameter_definitions={
+                            "string": ToolParameterDefinitionsValue(
+                                description="string",
+                                type="string",
+                                required=True,
+                            )
+                        },
+                    )
+                ],
+                tool_results=[
+                    ToolResult(
+                        call=ToolCall(
+                            name="string",
+                            parameters={"string": {"key": "value"}},
+                        ),
+                        outputs=[{"string": {"key": "value"}}],
+                    )
+                ],
+                force_single_step=True,
+                response_format=TextResponseFormat(),
+                safety_mode="CONTEXTUAL",
+            )
+            async for chunk in response:
+                yield chunk
+
+
+        asyncio.run(main())
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/chat",
+            method="POST",
+            json={
+                "message": message,
+                "model": model,
+                "preamble": preamble,
+                "chat_history": convert_and_respect_annotation_metadata(
+                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
+                ),
+                "conversation_id": conversation_id,
+                "prompt_truncation": prompt_truncation,
+                "connectors": convert_and_respect_annotation_metadata(
+                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
+                ),
+                "search_queries_only": search_queries_only,
+                "documents": documents,
+                "citation_quality": citation_quality,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+                "max_input_tokens": max_input_tokens,
+                "k": k,
+                "p": p,
+                "seed": seed,
+                "stop_sequences": stop_sequences,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "raw_prompting": raw_prompting,
+                "return_prompt": return_prompt,
+                "tools": convert_and_respect_annotation_metadata(
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "tool_results": convert_and_respect_annotation_metadata(
+                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
+                ),
+                "force_single_step": force_single_step,
+                "response_format": convert_and_respect_annotation_metadata(
+                    object_=response_format, annotation=ResponseFormat, direction="write"
+                ),
+                "safety_mode": safety_mode,
+                "stream": True,
+            },
+            headers={
+                "Accepts": str(accepts) if accepts is not None else None,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            try:
+                if 200 <= _response.status_code < 300:
+                    async for _text in _response.aiter_lines():
+                        try:
+                            if len(_text) == 0:
+                                continue
+                            yield typing.cast(
+                                StreamedChatResponse,
+                                construct_type(
+                                    type_=StreamedChatResponse,  # type: ignore
+                                    object_=json.loads(_text),
+                                ),
+                            )
+                        except:
+                            pass
+                    return
+                await _response.aread()
+                if _response.status_code == 400:
+                    raise BadRequestError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 401:
+                    raise UnauthorizedError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 403:
+                    raise ForbiddenError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 404:
+                    raise NotFoundError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 422:
+                    raise UnprocessableEntityError(
+                        typing.cast(
+                            UnprocessableEntityErrorBody,
+                            construct_type(
+                                type_=UnprocessableEntityErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 429:
+                    raise TooManyRequestsError(
+                        typing.cast(
+                            TooManyRequestsErrorBody,
+                            construct_type(
+                                type_=TooManyRequestsErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 499:
+                    raise ClientClosedRequestError(
+                        typing.cast(
+                            ClientClosedRequestErrorBody,
+                            construct_type(
+                                type_=ClientClosedRequestErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 500:
+                    raise InternalServerError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 501:
+                    raise NotImplementedError(
+                        typing.cast(
+                            NotImplementedErrorBody,
+                            construct_type(
+                                type_=NotImplementedErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 503:
+                    raise ServiceUnavailableError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 504:
+                    raise GatewayTimeoutError(
+                        typing.cast(
+                            GatewayTimeoutErrorBody,
+                            construct_type(
+                                type_=GatewayTimeoutErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def chat(
+        self,
+        *,
+        message: str,
+        accepts: typing.Optional[typing.Literal["text/event-stream"]] = None,
+        model: typing.Optional[str] = OMIT,
+        preamble: typing.Optional[str] = OMIT,
+        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,
+        conversation_id: typing.Optional[str] = OMIT,
+        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
+        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
+        search_queries_only: typing.Optional[bool] = OMIT,
+        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
+        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        max_input_tokens: typing.Optional[int] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        return_prompt: typing.Optional[bool] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
+        force_single_step: typing.Optional[bool] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> NonStreamedChatResponse:
+        """
+        Generates a text response to a user message.
+        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).
+
+        Parameters
+        ----------
+        message : str
+            Text input for the model to respond to.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        accepts : typing.Optional[typing.Literal["text/event-stream"]]
+            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\n` delimited events.
+
+        model : typing.Optional[str]
+            Defaults to `command-r-plus-08-2024`.
+
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
+            Compatible Deployments: Cohere Platform, Private Deployments
+
+
+        preamble : typing.Optional[str]
+            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
+
+            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        chat_history : typing.Optional[typing.Sequence[Message]]
+            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
+
+            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
+
+            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        conversation_id : typing.Optional[str]
+            An alternative to `chat_history`.
+
+            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
+            Compatible Deployments: Cohere Platform
+
+
+        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]
+            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+
+            Dictates how the prompt will be constructed.
+
+            With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
+
+            With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
+
+            With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        connectors : typing.Optional[typing.Sequence[ChatConnector]]
+            Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+
+            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
+            Compatible Deployments: Cohere Platform
+
+
+        search_queries_only : typing.Optional[bool]
+            Defaults to `false`.
+
+            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        documents : typing.Optional[typing.Sequence[ChatDocument]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
+
+            Example:
+            ```
+            [
+              { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
+              { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
+            ]
+            ```
+
+            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
+
+            Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
+
+            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
+
+            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
+
+            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        citation_quality : typing.Optional[ChatRequestCitationQuality]
+            Defaults to `"accurate"`.
+
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        temperature : typing.Optional[float]
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        max_input_tokens : typing.Optional[int]
+            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
+
+            Input will be truncated according to the `prompt_truncation` parameter.
+
+            Compatible Deployments: Cohere Platform
+
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        frequency_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without
+            any pre-processing.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        return_prompt : typing.Optional[bool]
+            The prompt is returned in the `prompt` response field when this is enabled.
+
+        tools : typing.Optional[typing.Sequence[Tool]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        tool_results : typing.Optional[typing.Sequence[ToolResult]]
+            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
+            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
+
+            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
+            ```
+            tool_results = [
+              {
+                "call": {
+                  "name": <tool name>,
+                  "parameters": {
+                    <param name>: <param value>
+                  }
+                },
+                "outputs": [{
+                  <key>: <value>
+                }]
+              },
+              ...
+            ]
+            ```
+            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        force_single_step : typing.Optional[bool]
+            Forces the chat to be single step. Defaults to `false`.
+
+        response_format : typing.Optional[ResponseFormat]
+
+        safety_mode : typing.Optional[ChatRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        NonStreamedChatResponse
+
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+        from cohere.v1 import ChatMessage
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.chat(
+                message="Can you give me a global market overview of solar panels?",
+                chat_history=[
+                    ChatMessage(
+                        message="Hi!",
+                    ),
+                    ChatMessage(
+                        message="How can I help you today?",
+                    ),
+                ],
+                prompt_truncation="OFF",
+                temperature=0.3,
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/chat",
+            method="POST",
+            json={
+                "message": message,
+                "model": model,
+                "preamble": preamble,
+                "chat_history": convert_and_respect_annotation_metadata(
+                    object_=chat_history, annotation=typing.Sequence[Message], direction="write"
+                ),
+                "conversation_id": conversation_id,
+                "prompt_truncation": prompt_truncation,
+                "connectors": convert_and_respect_annotation_metadata(
+                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction="write"
+                ),
+                "search_queries_only": search_queries_only,
+                "documents": documents,
+                "citation_quality": citation_quality,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+                "max_input_tokens": max_input_tokens,
+                "k": k,
+                "p": p,
+                "seed": seed,
+                "stop_sequences": stop_sequences,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "raw_prompting": raw_prompting,
+                "return_prompt": return_prompt,
+                "tools": convert_and_respect_annotation_metadata(
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "tool_results": convert_and_respect_annotation_metadata(
+                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction="write"
+                ),
+                "force_single_step": force_single_step,
+                "response_format": convert_and_respect_annotation_metadata(
+                    object_=response_format, annotation=ResponseFormat, direction="write"
+                ),
+                "safety_mode": safety_mode,
+                "stream": False,
+            },
+            headers={
+                "Accepts": str(accepts) if accepts is not None else None,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    NonStreamedChatResponse,
+                    construct_type(
+                        type_=NonStreamedChatResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def generate_stream(
+        self,
+        *,
+        prompt: str,
+        model: typing.Optional[str] = OMIT,
+        num_generations: typing.Optional[int] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        preset: typing.Optional[str] = OMIT,
+        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[GenerateStreamedResponse]:
+        """
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
+        Generates realistic text conditioned on a given input.
+
+        Parameters
+        ----------
+        prompt : str
+            The input text that serves as the starting point for generating the response.
+            Note: The prompt will be pre-processed and modified before reaching the model.
+
+
+        model : typing.Optional[str]
+            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
+            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+
+        num_generations : typing.Optional[int]
+            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
+
+            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
+
+
+        truncate : typing.Optional[GenerateStreamRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        temperature : typing.Optional[float]
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
+            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        preset : typing.Optional[str]
+            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
+            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
+
+
+        end_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+
+        frequency_penalty : typing.Optional[float]
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]
+            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
+
+            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
+
+            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without any pre-processing.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Yields
+        ------
+        typing.AsyncIterator[GenerateStreamedResponse]
+
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            response = await client.v1.v1.generate_stream(
+                prompt="string",
+                model="string",
+                num_generations=1,
+                max_tokens=1,
+                truncate="NONE",
+                temperature=1.1,
+                seed=1,
+                preset="string",
+                end_sequences=["string"],
+                stop_sequences=["string"],
+                k=1,
+                p=1.1,
+                frequency_penalty=1.1,
+                presence_penalty=1.1,
+                return_likelihoods="GENERATION",
+                raw_prompting=True,
+            )
+            async for chunk in response:
+                yield chunk
+
+
+        asyncio.run(main())
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/generate",
+            method="POST",
+            json={
+                "prompt": prompt,
+                "model": model,
+                "num_generations": num_generations,
+                "max_tokens": max_tokens,
+                "truncate": truncate,
+                "temperature": temperature,
+                "seed": seed,
+                "preset": preset,
+                "end_sequences": end_sequences,
+                "stop_sequences": stop_sequences,
+                "k": k,
+                "p": p,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "return_likelihoods": return_likelihoods,
+                "raw_prompting": raw_prompting,
+                "stream": True,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            try:
+                if 200 <= _response.status_code < 300:
+                    async for _text in _response.aiter_lines():
+                        try:
+                            if len(_text) == 0:
+                                continue
+                            yield typing.cast(
+                                GenerateStreamedResponse,
+                                construct_type(
+                                    type_=GenerateStreamedResponse,  # type: ignore
+                                    object_=json.loads(_text),
+                                ),
+                            )
+                        except:
+                            pass
+                    return
+                await _response.aread()
+                if _response.status_code == 400:
+                    raise BadRequestError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 401:
+                    raise UnauthorizedError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 403:
+                    raise ForbiddenError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 404:
+                    raise NotFoundError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 422:
+                    raise UnprocessableEntityError(
+                        typing.cast(
+                            UnprocessableEntityErrorBody,
+                            construct_type(
+                                type_=UnprocessableEntityErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 429:
+                    raise TooManyRequestsError(
+                        typing.cast(
+                            TooManyRequestsErrorBody,
+                            construct_type(
+                                type_=TooManyRequestsErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 499:
+                    raise ClientClosedRequestError(
+                        typing.cast(
+                            ClientClosedRequestErrorBody,
+                            construct_type(
+                                type_=ClientClosedRequestErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 500:
+                    raise InternalServerError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 501:
+                    raise NotImplementedError(
+                        typing.cast(
+                            NotImplementedErrorBody,
+                            construct_type(
+                                type_=NotImplementedErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 503:
+                    raise ServiceUnavailableError(
+                        typing.cast(
+                            typing.Optional[typing.Any],
+                            construct_type(
+                                type_=typing.Optional[typing.Any],  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                if _response.status_code == 504:
+                    raise GatewayTimeoutError(
+                        typing.cast(
+                            GatewayTimeoutErrorBody,
+                            construct_type(
+                                type_=GatewayTimeoutErrorBody,  # type: ignore
+                                object_=_response.json(),
+                            ),
+                        )
+                    )
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        model: typing.Optional[str] = OMIT,
+        num_generations: typing.Optional[int] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
+        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        seed: typing.Optional[int] = OMIT,
+        preset: typing.Optional[str] = OMIT,
+        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
+        k: typing.Optional[int] = OMIT,
+        p: typing.Optional[float] = OMIT,
+        frequency_penalty: typing.Optional[float] = OMIT,
+        presence_penalty: typing.Optional[float] = OMIT,
+        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
+        raw_prompting: typing.Optional[bool] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Generation:
+        """
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
+        Generates realistic text conditioned on a given input.
+
+        Parameters
+        ----------
+        prompt : str
+            The input text that serves as the starting point for generating the response.
+            Note: The prompt will be pre-processed and modified before reaching the model.
+
+
+        model : typing.Optional[str]
+            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
+            Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+
+        num_generations : typing.Optional[int]
+            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
+
+
+        max_tokens : typing.Optional[int]
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
+            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
+
+            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
+
+
+        truncate : typing.Optional[GenerateRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        temperature : typing.Optional[float]
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
+            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
+
+
+        seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
+        preset : typing.Optional[str]
+            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).
+            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
+
+
+        end_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
+
+        stop_sequences : typing.Optional[typing.Sequence[str]]
+            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
+
+        k : typing.Optional[int]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
+
+        frequency_penalty : typing.Optional[float]
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        presence_penalty : typing.Optional[float]
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+
+            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
+
+
+        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]
+            One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
+
+            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
+
+            If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+
+        raw_prompting : typing.Optional[bool]
+            When enabled, the user's prompt will be sent to the model without any pre-processing.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Generation
+
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.generate(
+                prompt="Please explain to me how LLMs work",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/generate",
+            method="POST",
+            json={
+                "prompt": prompt,
+                "model": model,
+                "num_generations": num_generations,
+                "max_tokens": max_tokens,
+                "truncate": truncate,
+                "temperature": temperature,
+                "seed": seed,
+                "preset": preset,
+                "end_sequences": end_sequences,
+                "stop_sequences": stop_sequences,
+                "k": k,
+                "p": p,
+                "frequency_penalty": frequency_penalty,
+                "presence_penalty": presence_penalty,
+                "return_likelihoods": return_likelihoods,
+                "raw_prompting": raw_prompting,
+                "stream": False,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    Generation,
+                    construct_type(
+                        type_=Generation,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def embed(
+        self,
+        *,
+        texts: typing.Optional[typing.Sequence[str]] = OMIT,
+        images: typing.Optional[typing.Sequence[str]] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        input_type: typing.Optional[EmbedInputType] = OMIT,
+        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
+        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EmbedResponse:
+        """
+        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
+
+        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
+
+        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
+
+        Parameters
+        ----------
+        texts : typing.Optional[typing.Sequence[str]]
+            An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
+
+        images : typing.Optional[typing.Sequence[str]]
+            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
+
+            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+
+        model : typing.Optional[str]
+            Defaults to embed-english-v2.0
+
+            The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+
+            Available models and corresponding embedding dimensions:
+
+            * `embed-english-v3.0`  1024
+            * `embed-multilingual-v3.0`  1024
+            * `embed-english-light-v3.0`  384
+            * `embed-multilingual-light-v3.0`  384
+
+            * `embed-english-v2.0`  4096
+            * `embed-english-light-v2.0`  1024
+            * `embed-multilingual-v2.0`  768
+
+        input_type : typing.Optional[EmbedInputType]
+
+        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
+            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
+
+            * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
+            * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
+            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
+            * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
+            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
+
+        truncate : typing.Optional[EmbedRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EmbedResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.embed()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/embed",
+            method="POST",
+            json={
+                "texts": texts,
+                "images": images,
+                "model": model,
+                "input_type": input_type,
+                "embedding_types": embedding_types,
+                "truncate": truncate,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EmbedResponse,
+                    construct_type(
+                        type_=EmbedResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def rerank(
+        self,
+        *,
+        query: str,
+        documents: typing.Sequence[RerankRequestDocumentsItem],
+        model: typing.Optional[str] = OMIT,
+        top_n: typing.Optional[int] = OMIT,
+        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,
+        return_documents: typing.Optional[bool] = OMIT,
+        max_chunks_per_doc: typing.Optional[int] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> RerankResponse:
+        """
+        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
+
+        Parameters
+        ----------
+        query : str
+            The search query
+
+        documents : typing.Sequence[RerankRequestDocumentsItem]
+            A list of document objects or strings to rerank.
+            If a document is provided the text fields is required and all other fields will be preserved in the response.
+
+            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
+
+            We recommend a maximum of 1,000 documents for optimal endpoint performance.
+
+        model : typing.Optional[str]
+            The identifier of the model to use, one of : `rerank-english-v3.0`, `rerank-multilingual-v3.0`, `rerank-english-v2.0`, `rerank-multilingual-v2.0`
+
+        top_n : typing.Optional[int]
+            The number of most relevant documents or indices to return, defaults to the length of the documents
+
+        rank_fields : typing.Optional[typing.Sequence[str]]
+            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.
+
+        return_documents : typing.Optional[bool]
+            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
+            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
+
+        max_chunks_per_doc : typing.Optional[int]
+            The maximum number of chunks to produce internally from a document
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        RerankResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.rerank(
+                query="query",
+                documents=["documents"],
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/rerank",
+            method="POST",
+            json={
+                "model": model,
+                "query": query,
+                "documents": convert_and_respect_annotation_metadata(
+                    object_=documents, annotation=typing.Sequence[RerankRequestDocumentsItem], direction="write"
+                ),
+                "top_n": top_n,
+                "rank_fields": rank_fields,
+                "return_documents": return_documents,
+                "max_chunks_per_doc": max_chunks_per_doc,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    RerankResponse,
+                    construct_type(
+                        type_=RerankResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def classify(
+        self,
+        *,
+        inputs: typing.Sequence[str],
+        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        preset: typing.Optional[str] = OMIT,
+        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ClassifyResponse:
+        """
+        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
+        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
+
+        Parameters
+        ----------
+        inputs : typing.Sequence[str]
+            A list of up to 96 texts to be classified. Each one must be a non-empty string.
+            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
+            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
+
+        examples : typing.Optional[typing.Sequence[ClassifyExample]]
+            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
+            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
+
+        model : typing.Optional[str]
+            The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
+
+        preset : typing.Optional[str]
+            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
+
+        truncate : typing.Optional[ClassifyRequestTruncate]
+            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ClassifyResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.classify(
+                inputs=["inputs"],
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/classify",
+            method="POST",
+            json={
+                "inputs": inputs,
+                "examples": convert_and_respect_annotation_metadata(
+                    object_=examples, annotation=typing.Sequence[ClassifyExample], direction="write"
+                ),
+                "model": model,
+                "preset": preset,
+                "truncate": truncate,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ClassifyResponse,
+                    construct_type(
+                        type_=ClassifyResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def summarize(
+        self,
+        *,
+        text: str,
+        length: typing.Optional[SummarizeRequestLength] = OMIT,
+        format: typing.Optional[SummarizeRequestFormat] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        additional_command: typing.Optional[str] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> SummarizeResponse:
+        """
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
+        Generates a summary in English for a given text.
+
+        Parameters
+        ----------
+        text : str
+            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
+
+        length : typing.Optional[SummarizeRequestLength]
+            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
+
+        format : typing.Optional[SummarizeRequestFormat]
+            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
+
+        model : typing.Optional[str]
+            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
+
+        extractiveness : typing.Optional[SummarizeRequestExtractiveness]
+            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
+
+        temperature : typing.Optional[float]
+            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
+
+        additional_command : typing.Optional[str]
+            A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        SummarizeResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.summarize(
+                text="text",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/summarize",
+            method="POST",
+            json={
+                "text": text,
+                "length": length,
+                "format": format,
+                "model": model,
+                "extractiveness": extractiveness,
+                "temperature": temperature,
+                "additional_command": additional_command,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    SummarizeResponse,
+                    construct_type(
+                        type_=SummarizeResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def tokenize(
+        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None
+    ) -> TokenizeResponse:
+        """
+        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
+
+        Parameters
+        ----------
+        text : str
+            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
+
+        model : str
+            An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        TokenizeResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.tokenize(
+                text="tokenize me! :D",
+                model="command",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/tokenize",
+            method="POST",
+            json={
+                "text": text,
+                "model": model,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    TokenizeResponse,
+                    construct_type(
+                        type_=TokenizeResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def detokenize(
+        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None
+    ) -> DetokenizeResponse:
+        """
+        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
+
+        Parameters
+        ----------
+        tokens : typing.Sequence[int]
+            The list of tokens to be detokenized.
+
+        model : str
+            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DetokenizeResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.detokenize(
+                tokens=[1],
+                model="model",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/detokenize",
+            method="POST",
+            json={
+                "tokens": tokens,
+                "model": model,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    DetokenizeResponse,
+                    construct_type(
+                        type_=DetokenizeResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def check_api_key(self, *, request_options: typing.Optional[RequestOptions] = None) -> CheckApiKeyResponse:
+        """
+        Checks that the api key in the Authorization header is valid and active
+
+        Parameters
+        ----------
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        CheckApiKeyResponse
+            OK
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.check_api_key()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/check-api-key",
+            method="POST",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    CheckApiKeyResponse,
+                    construct_type(
+                        type_=CheckApiKeyResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        UnprocessableEntityErrorBody,
+                        construct_type(
+                            type_=UnprocessableEntityErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    typing.cast(
+                        TooManyRequestsErrorBody,
+                        construct_type(
+                            type_=TooManyRequestsErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 499:
+                raise ClientClosedRequestError(
+                    typing.cast(
+                        ClientClosedRequestErrorBody,
+                        construct_type(
+                            type_=ClientClosedRequestErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 501:
+                raise NotImplementedError(
+                    typing.cast(
+                        NotImplementedErrorBody,
+                        construct_type(
+                            type_=NotImplementedErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 504:
+                raise GatewayTimeoutError(
+                    typing.cast(
+                        GatewayTimeoutErrorBody,
+                        construct_type(
+                            type_=GatewayTimeoutErrorBody,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def list_finetuned_models(
+        self,
+        *,
+        page_size: typing.Optional[int] = None,
+        page_token: typing.Optional[str] = None,
+        order_by: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ListFinetunedModelsResponse:
+        """
+        Parameters
+        ----------
+        page_size : typing.Optional[int]
+            Maximum number of results to be returned by the server. If 0, defaults to 50.
+
+        page_token : typing.Optional[str]
+            Request a specific page of the list results.
+
+        order_by : typing.Optional[str]
+            Comma separated list of fields. For example: "created_at,name". The default
+            sorting order is ascending. To specify descending order for a field, append
+            " desc" to the field name. For example: "created_at desc,name".
+
+            Supported sorting fields:
+
+            - created_at (default)
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ListFinetunedModelsResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.list_finetuned_models()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/finetuning/finetuned-models",
+            method="GET",
+            params={
+                "page_size": page_size,
+                "page_token": page_token,
+                "order_by": order_by,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ListFinetunedModelsResponse,
+                    construct_type(
+                        type_=ListFinetunedModelsResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def create_finetuned_model(
+        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None
+    ) -> CreateFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        request : FinetunedModel
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        CreateFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+        from cohere.v1.finetuning.finetuning import BaseModel, FinetunedModel, Settings
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.create_finetuned_model(
+                request=FinetunedModel(
+                    name="api-test",
+                    settings=Settings(
+                        base_model=BaseModel(
+                            base_type="BASE_TYPE_CHAT",
+                        ),
+                        dataset_id="my-dataset-id",
+                    ),
+                ),
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "v1/finetuning/finetuned-models",
+            method="POST",
+            json=convert_and_respect_annotation_metadata(object_=request, annotation=FinetunedModel, direction="write"),
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    CreateFinetunedModelResponse,
+                    construct_type(
+                        type_=CreateFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def get_finetuned_model(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> GetFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        id : str
+            The fine-tuned model ID.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        GetFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.get_finetuned_model(
+                id="id",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    GetFinetunedModelResponse,
+                    construct_type(
+                        type_=GetFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def delete_finetuned_model(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> DeleteFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        id : str
+            The fine-tuned model ID.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DeleteFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.delete_finetuned_model(
+                id="id",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    DeleteFinetunedModelResponse,
+                    construct_type(
+                        type_=DeleteFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def update_finetuned_model(
+        self,
+        id: str,
+        *,
+        name: str,
+        settings: Settings,
+        creator_id: typing.Optional[str] = OMIT,
+        organization_id: typing.Optional[str] = OMIT,
+        status: typing.Optional[Status] = OMIT,
+        created_at: typing.Optional[dt.datetime] = OMIT,
+        updated_at: typing.Optional[dt.datetime] = OMIT,
+        completed_at: typing.Optional[dt.datetime] = OMIT,
+        last_used: typing.Optional[dt.datetime] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> UpdateFinetunedModelResponse:
+        """
+        Parameters
+        ----------
+        id : str
+            FinetunedModel ID.
+
+        name : str
+            FinetunedModel name (e.g. `foobar`).
+
+        settings : Settings
+            FinetunedModel settings such as dataset, hyperparameters...
+
+        creator_id : typing.Optional[str]
+            User ID of the creator.
+
+        organization_id : typing.Optional[str]
+            Organization ID.
+
+        status : typing.Optional[Status]
+            Current stage in the life-cycle of the fine-tuned model.
+
+        created_at : typing.Optional[dt.datetime]
+            Creation timestamp.
+
+        updated_at : typing.Optional[dt.datetime]
+            Latest update timestamp.
+
+        completed_at : typing.Optional[dt.datetime]
+            Timestamp for the completed fine-tuning.
+
+        last_used : typing.Optional[dt.datetime]
+            Timestamp for the latest request to this fine-tuned model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        UpdateFinetunedModelResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+        from cohere.v1.finetuning.finetuning import BaseModel, Settings
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.update_finetuned_model(
+                id="id",
+                name="name",
+                settings=Settings(
+                    base_model=BaseModel(
+                        base_type="BASE_TYPE_UNSPECIFIED",
+                    ),
+                    dataset_id="dataset_id",
+                ),
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(id)}",
+            method="PATCH",
+            json={
+                "name": name,
+                "creator_id": creator_id,
+                "organization_id": organization_id,
+                "settings": convert_and_respect_annotation_metadata(
+                    object_=settings, annotation=Settings, direction="write"
+                ),
+                "status": status,
+                "created_at": created_at,
+                "updated_at": updated_at,
+                "completed_at": completed_at,
+                "last_used": last_used,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    UpdateFinetunedModelResponse,
+                    construct_type(
+                        type_=UpdateFinetunedModelResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def list_events(
+        self,
+        finetuned_model_id: str,
+        *,
+        page_size: typing.Optional[int] = None,
+        page_token: typing.Optional[str] = None,
+        order_by: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ListEventsResponse:
+        """
+        Parameters
+        ----------
+        finetuned_model_id : str
+            The parent fine-tuned model ID.
+
+        page_size : typing.Optional[int]
+            Maximum number of results to be returned by the server. If 0, defaults to 50.
+
+        page_token : typing.Optional[str]
+            Request a specific page of the list results.
+
+        order_by : typing.Optional[str]
+            Comma separated list of fields. For example: "created_at,name". The default
+            sorting order is ascending. To specify descending order for a field, append
+            " desc" to the field name. For example: "created_at desc,name".
+
+            Supported sorting fields:
+
+            - created_at (default)
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ListEventsResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.list_events(
+                finetuned_model_id="finetuned_model_id",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/events",
+            method="GET",
+            params={
+                "page_size": page_size,
+                "page_token": page_token,
+                "order_by": order_by,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ListEventsResponse,
+                    construct_type(
+                        type_=ListEventsResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def list_training_step_metrics(
+        self,
+        finetuned_model_id: str,
+        *,
+        page_size: typing.Optional[int] = None,
+        page_token: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> ListTrainingStepMetricsResponse:
+        """
+        Parameters
+        ----------
+        finetuned_model_id : str
+            The parent fine-tuned model ID.
+
+        page_size : typing.Optional[int]
+            Maximum number of results to be returned by the server. If 0, defaults to 50.
+
+        page_token : typing.Optional[str]
+            Request a specific page of the list results.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        ListTrainingStepMetricsResponse
+            A successful response.
+
+        Examples
+        --------
+        import asyncio
+
+        from cohere import AsyncClient
+
+        client = AsyncClient(
+            client_name="YOUR_CLIENT_NAME",
+            token="YOUR_TOKEN",
+        )
+
+
+        async def main() -> None:
+            await client.v1.v1.list_training_step_metrics(
+                finetuned_model_id="finetuned_model_id",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/training-step-metrics",
+            method="GET",
+            params={
+                "page_size": page_size,
+                "page_token": page_token,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    ListTrainingStepMetricsResponse,
+                    construct_type(
+                        type_=ListTrainingStepMetricsResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 403:
+                raise ForbiddenError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 503:
+                raise ServiceUnavailableError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        construct_type(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
diff --git a/src/cohere/v1/v1/types/__init__.py b/src/cohere/v1/v1/types/__init__.py
new file mode 100644
index 000000000..24cf9801b
--- /dev/null
+++ b/src/cohere/v1/v1/types/__init__.py
@@ -0,0 +1,67 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from .chat_request_citation_quality import ChatRequestCitationQuality
+from .chat_request_connectors_search_options import ChatRequestConnectorsSearchOptions
+from .chat_request_prompt_truncation import ChatRequestPromptTruncation
+from .chat_request_safety_mode import ChatRequestSafetyMode
+from .chat_stream_request_citation_quality import ChatStreamRequestCitationQuality
+from .chat_stream_request_connectors_search_options import ChatStreamRequestConnectorsSearchOptions
+from .chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
+from .chat_stream_request_safety_mode import ChatStreamRequestSafetyMode
+from .check_api_key_response import CheckApiKeyResponse
+from .classify_request_truncate import ClassifyRequestTruncate
+from .classify_response import ClassifyResponse
+from .classify_response_classifications_item import ClassifyResponseClassificationsItem
+from .classify_response_classifications_item_classification_type import (
+    ClassifyResponseClassificationsItemClassificationType,
+)
+from .classify_response_classifications_item_labels_value import ClassifyResponseClassificationsItemLabelsValue
+from .detokenize_response import DetokenizeResponse
+from .embed_request_truncate import EmbedRequestTruncate
+from .embed_response import EmbedResponse
+from .generate_request_return_likelihoods import GenerateRequestReturnLikelihoods
+from .generate_request_truncate import GenerateRequestTruncate
+from .generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods
+from .generate_stream_request_truncate import GenerateStreamRequestTruncate
+from .rerank_request_documents_item import RerankRequestDocumentsItem
+from .rerank_response import RerankResponse
+from .rerank_response_results_item import RerankResponseResultsItem
+from .rerank_response_results_item_document import RerankResponseResultsItemDocument
+from .summarize_request_extractiveness import SummarizeRequestExtractiveness
+from .summarize_request_format import SummarizeRequestFormat
+from .summarize_request_length import SummarizeRequestLength
+from .summarize_response import SummarizeResponse
+from .tokenize_response import TokenizeResponse
+
+__all__ = [
+    "ChatRequestCitationQuality",
+    "ChatRequestConnectorsSearchOptions",
+    "ChatRequestPromptTruncation",
+    "ChatRequestSafetyMode",
+    "ChatStreamRequestCitationQuality",
+    "ChatStreamRequestConnectorsSearchOptions",
+    "ChatStreamRequestPromptTruncation",
+    "ChatStreamRequestSafetyMode",
+    "CheckApiKeyResponse",
+    "ClassifyRequestTruncate",
+    "ClassifyResponse",
+    "ClassifyResponseClassificationsItem",
+    "ClassifyResponseClassificationsItemClassificationType",
+    "ClassifyResponseClassificationsItemLabelsValue",
+    "DetokenizeResponse",
+    "EmbedRequestTruncate",
+    "EmbedResponse",
+    "GenerateRequestReturnLikelihoods",
+    "GenerateRequestTruncate",
+    "GenerateStreamRequestReturnLikelihoods",
+    "GenerateStreamRequestTruncate",
+    "RerankRequestDocumentsItem",
+    "RerankResponse",
+    "RerankResponseResultsItem",
+    "RerankResponseResultsItemDocument",
+    "SummarizeRequestExtractiveness",
+    "SummarizeRequestFormat",
+    "SummarizeRequestLength",
+    "SummarizeResponse",
+    "TokenizeResponse",
+]
diff --git a/src/cohere/types/chat_request_citation_quality.py b/src/cohere/v1/v1/types/chat_request_citation_quality.py
similarity index 100%
rename from src/cohere/types/chat_request_citation_quality.py
rename to src/cohere/v1/v1/types/chat_request_citation_quality.py
diff --git a/src/cohere/types/chat_request_connectors_search_options.py b/src/cohere/v1/v1/types/chat_request_connectors_search_options.py
similarity index 90%
rename from src/cohere/types/chat_request_connectors_search_options.py
rename to src/cohere/v1/v1/types/chat_request_connectors_search_options.py
index 2b86c7d29..4b9451b59 100644
--- a/src/cohere/types/chat_request_connectors_search_options.py
+++ b/src/cohere/v1/v1/types/chat_request_connectors_search_options.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatRequestConnectorsSearchOptions(UncheckedBaseModel):
diff --git a/src/cohere/types/chat_request_prompt_truncation.py b/src/cohere/v1/v1/types/chat_request_prompt_truncation.py
similarity index 100%
rename from src/cohere/types/chat_request_prompt_truncation.py
rename to src/cohere/v1/v1/types/chat_request_prompt_truncation.py
diff --git a/src/cohere/types/chat_request_safety_mode.py b/src/cohere/v1/v1/types/chat_request_safety_mode.py
similarity index 100%
rename from src/cohere/types/chat_request_safety_mode.py
rename to src/cohere/v1/v1/types/chat_request_safety_mode.py
diff --git a/src/cohere/types/chat_stream_request_citation_quality.py b/src/cohere/v1/v1/types/chat_stream_request_citation_quality.py
similarity index 100%
rename from src/cohere/types/chat_stream_request_citation_quality.py
rename to src/cohere/v1/v1/types/chat_stream_request_citation_quality.py
diff --git a/src/cohere/types/chat_stream_request_connectors_search_options.py b/src/cohere/v1/v1/types/chat_stream_request_connectors_search_options.py
similarity index 90%
rename from src/cohere/types/chat_stream_request_connectors_search_options.py
rename to src/cohere/v1/v1/types/chat_stream_request_connectors_search_options.py
index 49ea7624f..7495f4640 100644
--- a/src/cohere/types/chat_stream_request_connectors_search_options.py
+++ b/src/cohere/v1/v1/types/chat_stream_request_connectors_search_options.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatStreamRequestConnectorsSearchOptions(UncheckedBaseModel):
diff --git a/src/cohere/types/chat_stream_request_prompt_truncation.py b/src/cohere/v1/v1/types/chat_stream_request_prompt_truncation.py
similarity index 100%
rename from src/cohere/types/chat_stream_request_prompt_truncation.py
rename to src/cohere/v1/v1/types/chat_stream_request_prompt_truncation.py
diff --git a/src/cohere/types/chat_stream_request_safety_mode.py b/src/cohere/v1/v1/types/chat_stream_request_safety_mode.py
similarity index 100%
rename from src/cohere/types/chat_stream_request_safety_mode.py
rename to src/cohere/v1/v1/types/chat_stream_request_safety_mode.py
diff --git a/src/cohere/types/check_api_key_response.py b/src/cohere/v1/v1/types/check_api_key_response.py
similarity index 82%
rename from src/cohere/types/check_api_key_response.py
rename to src/cohere/v1/v1/types/check_api_key_response.py
index f838acff5..d242a5a2b 100644
--- a/src/cohere/types/check_api_key_response.py
+++ b/src/cohere/v1/v1/types/check_api_key_response.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/classify_request_truncate.py b/src/cohere/v1/v1/types/classify_request_truncate.py
similarity index 100%
rename from src/cohere/types/classify_request_truncate.py
rename to src/cohere/v1/v1/types/classify_request_truncate.py
diff --git a/src/cohere/types/classify_response.py b/src/cohere/v1/v1/types/classify_response.py
similarity index 80%
rename from src/cohere/types/classify_response.py
rename to src/cohere/v1/v1/types/classify_response.py
index 1d2460d29..25d1827ca 100644
--- a/src/cohere/types/classify_response.py
+++ b/src/cohere/v1/v1/types/classify_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .classify_response_classifications_item import ClassifyResponseClassificationsItem
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...types.api_meta import ApiMeta
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/classify_response_classifications_item.py b/src/cohere/v1/v1/types/classify_response_classifications_item.py
similarity index 94%
rename from src/cohere/types/classify_response_classifications_item.py
rename to src/cohere/v1/v1/types/classify_response_classifications_item.py
index 95d226684..77a4ae68c 100644
--- a/src/cohere/types/classify_response_classifications_item.py
+++ b/src/cohere/v1/v1/types/classify_response_classifications_item.py
@@ -1,13 +1,13 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
 from .classify_response_classifications_item_labels_value import ClassifyResponseClassificationsItemLabelsValue
 from .classify_response_classifications_item_classification_type import (
     ClassifyResponseClassificationsItemClassificationType,
 )
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ClassifyResponseClassificationsItem(UncheckedBaseModel):
diff --git a/src/cohere/types/classify_response_classifications_item_classification_type.py b/src/cohere/v1/v1/types/classify_response_classifications_item_classification_type.py
similarity index 100%
rename from src/cohere/types/classify_response_classifications_item_classification_type.py
rename to src/cohere/v1/v1/types/classify_response_classifications_item_classification_type.py
diff --git a/src/cohere/types/classify_response_classifications_item_labels_value.py b/src/cohere/v1/v1/types/classify_response_classifications_item_labels_value.py
similarity index 81%
rename from src/cohere/types/classify_response_classifications_item_labels_value.py
rename to src/cohere/v1/v1/types/classify_response_classifications_item_labels_value.py
index f88f8e19c..0f7dd2efa 100644
--- a/src/cohere/types/classify_response_classifications_item_labels_value.py
+++ b/src/cohere/v1/v1/types/classify_response_classifications_item_labels_value.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/types/detokenize_response.py b/src/cohere/v1/v1/types/detokenize_response.py
similarity index 78%
rename from src/cohere/types/detokenize_response.py
rename to src/cohere/v1/v1/types/detokenize_response.py
index b72b2dc8d..22cb43d51 100644
--- a/src/cohere/types/detokenize_response.py
+++ b/src/cohere/v1/v1/types/detokenize_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 import typing
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...types.api_meta import ApiMeta
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class DetokenizeResponse(UncheckedBaseModel):
diff --git a/src/cohere/types/embed_request_truncate.py b/src/cohere/v1/v1/types/embed_request_truncate.py
similarity index 100%
rename from src/cohere/types/embed_request_truncate.py
rename to src/cohere/v1/v1/types/embed_request_truncate.py
diff --git a/src/cohere/v1/v1/types/embed_response.py b/src/cohere/v1/v1/types/embed_response.py
new file mode 100644
index 000000000..29cdbcce8
--- /dev/null
+++ b/src/cohere/v1/v1/types/embed_response.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from ...types.embed_floats_response import EmbedFloatsResponse
+from ...types.embed_by_type_response import EmbedByTypeResponse
+
+EmbedResponse = typing.Union[EmbedFloatsResponse, EmbedByTypeResponse]
diff --git a/src/cohere/types/generate_request_return_likelihoods.py b/src/cohere/v1/v1/types/generate_request_return_likelihoods.py
similarity index 100%
rename from src/cohere/types/generate_request_return_likelihoods.py
rename to src/cohere/v1/v1/types/generate_request_return_likelihoods.py
diff --git a/src/cohere/types/generate_request_truncate.py b/src/cohere/v1/v1/types/generate_request_truncate.py
similarity index 100%
rename from src/cohere/types/generate_request_truncate.py
rename to src/cohere/v1/v1/types/generate_request_truncate.py
diff --git a/src/cohere/types/generate_stream_request_return_likelihoods.py b/src/cohere/v1/v1/types/generate_stream_request_return_likelihoods.py
similarity index 100%
rename from src/cohere/types/generate_stream_request_return_likelihoods.py
rename to src/cohere/v1/v1/types/generate_stream_request_return_likelihoods.py
diff --git a/src/cohere/types/generate_stream_request_truncate.py b/src/cohere/v1/v1/types/generate_stream_request_truncate.py
similarity index 100%
rename from src/cohere/types/generate_stream_request_truncate.py
rename to src/cohere/v1/v1/types/generate_stream_request_truncate.py
diff --git a/src/cohere/types/rerank_request_documents_item.py b/src/cohere/v1/v1/types/rerank_request_documents_item.py
similarity index 72%
rename from src/cohere/types/rerank_request_documents_item.py
rename to src/cohere/v1/v1/types/rerank_request_documents_item.py
index e8220eeac..6acd9c3be 100644
--- a/src/cohere/types/rerank_request_documents_item.py
+++ b/src/cohere/v1/v1/types/rerank_request_documents_item.py
@@ -1,6 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing
-from .rerank_document import RerankDocument
+from ....types.rerank_document import RerankDocument
 
 RerankRequestDocumentsItem = typing.Union[str, RerankDocument]
diff --git a/src/cohere/types/rerank_response.py b/src/cohere/v1/v1/types/rerank_response.py
similarity index 82%
rename from src/cohere/types/rerank_response.py
rename to src/cohere/v1/v1/types/rerank_response.py
index d83a6995e..53be31674 100644
--- a/src/cohere/types/rerank_response.py
+++ b/src/cohere/v1/v1/types/rerank_response.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .rerank_response_results_item import RerankResponseResultsItem
 import pydantic
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...types.api_meta import ApiMeta
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class RerankResponse(UncheckedBaseModel):
diff --git a/src/cohere/types/rerank_response_results_item.py b/src/cohere/v1/v1/types/rerank_response_results_item.py
similarity index 92%
rename from src/cohere/types/rerank_response_results_item.py
rename to src/cohere/v1/v1/types/rerank_response_results_item.py
index 052c53494..43f57576f 100644
--- a/src/cohere/types/rerank_response_results_item.py
+++ b/src/cohere/v1/v1/types/rerank_response_results_item.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .rerank_response_results_item_document import RerankResponseResultsItemDocument
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class RerankResponseResultsItem(UncheckedBaseModel):
diff --git a/src/cohere/types/rerank_response_results_item_document.py b/src/cohere/v1/v1/types/rerank_response_results_item_document.py
similarity index 85%
rename from src/cohere/types/rerank_response_results_item_document.py
rename to src/cohere/v1/v1/types/rerank_response_results_item_document.py
index 5fa05e4b5..55b47f254 100644
--- a/src/cohere/types/rerank_response_results_item_document.py
+++ b/src/cohere/v1/v1/types/rerank_response_results_item_document.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import pydantic
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
 
 
diff --git a/src/cohere/types/summarize_request_extractiveness.py b/src/cohere/v1/v1/types/summarize_request_extractiveness.py
similarity index 100%
rename from src/cohere/types/summarize_request_extractiveness.py
rename to src/cohere/v1/v1/types/summarize_request_extractiveness.py
diff --git a/src/cohere/types/summarize_request_format.py b/src/cohere/v1/v1/types/summarize_request_format.py
similarity index 100%
rename from src/cohere/types/summarize_request_format.py
rename to src/cohere/v1/v1/types/summarize_request_format.py
diff --git a/src/cohere/types/summarize_request_length.py b/src/cohere/v1/v1/types/summarize_request_length.py
similarity index 100%
rename from src/cohere/types/summarize_request_length.py
rename to src/cohere/v1/v1/types/summarize_request_length.py
diff --git a/src/cohere/types/summarize_response.py b/src/cohere/v1/v1/types/summarize_response.py
similarity index 82%
rename from src/cohere/types/summarize_response.py
rename to src/cohere/v1/v1/types/summarize_response.py
index 187a3565e..dc9197f7e 100644
--- a/src/cohere/types/summarize_response.py
+++ b/src/cohere/v1/v1/types/summarize_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...types.api_meta import ApiMeta
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class SummarizeResponse(UncheckedBaseModel):
diff --git a/src/cohere/types/tokenize_response.py b/src/cohere/v1/v1/types/tokenize_response.py
similarity index 80%
rename from src/cohere/types/tokenize_response.py
rename to src/cohere/v1/v1/types/tokenize_response.py
index e3f2dd985..b17f0ce71 100644
--- a/src/cohere/types/tokenize_response.py
+++ b/src/cohere/v1/v1/types/tokenize_response.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...types.api_meta import ApiMeta
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class TokenizeResponse(UncheckedBaseModel):
diff --git a/src/cohere/v2/__init__.py b/src/cohere/v2/__init__.py
index 922af5e29..8b436aa56 100644
--- a/src/cohere/v2/__init__.py
+++ b/src/cohere/v2/__init__.py
@@ -1,13 +1,40 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from .types import (
+    BadRequestErrorBody,
+    ClientClosedRequestErrorBody,
+    ForbiddenErrorBody,
+    GatewayTimeoutErrorBody,
+    InternalServerErrorBody,
+    NotFoundErrorBody,
+    NotImplementedErrorBody,
+    ServiceUnavailableErrorBody,
+    TooManyRequestsErrorBody,
+    UnauthorizedErrorBody,
+    UnprocessableEntityErrorBody,
+    V2ChatRequestDocumentsItem,
+    V2ChatRequestSafetyMode,
+    V2ChatStreamRequestDocumentsItem,
+    V2ChatStreamRequestSafetyMode,
+)
+from .errors import (
+    BadRequestError,
+    ClientClosedRequestError,
+    ForbiddenError,
+    GatewayTimeoutError,
+    InternalServerError,
+    NotFoundError,
+    NotImplementedError,
+    ServiceUnavailableError,
+    TooManyRequestsError,
+    UnauthorizedError,
+    UnprocessableEntityError,
+)
+from . import v2
+from .v2 import (
     AssistantMessage,
     AssistantMessageContent,
-    AssistantMessageContentItem,
-    AssistantMessageContentItem_Text,
     AssistantMessageResponse,
-    AssistantMessageResponseContentItem,
-    AssistantMessageResponseContentItem_Text,
     ChatContentDeltaEvent,
     ChatContentDeltaEventDelta,
     ChatContentDeltaEventDeltaMessage,
@@ -18,11 +45,7 @@
     ChatContentStartEventDeltaMessage,
     ChatContentStartEventDeltaMessageContent,
     ChatFinishReason,
-    ChatMessage2,
-    ChatMessage2_Assistant,
-    ChatMessage2_System,
-    ChatMessage2_Tool,
-    ChatMessage2_User,
+    ChatMessage,
     ChatMessageEndEvent,
     ChatMessageEndEventDelta,
     ChatMessageStartEvent,
@@ -30,6 +53,7 @@
     ChatMessageStartEventDeltaMessage,
     ChatMessages,
     ChatStreamEventType,
+    ChatStreamEventTypeType,
     ChatToolCallDeltaEvent,
     ChatToolCallDeltaEventDelta,
     ChatToolCallDeltaEventDeltaToolCall,
@@ -43,63 +67,44 @@
     ChatToolPlanDeltaEventDelta,
     Citation,
     CitationEndEvent,
+    CitationOptions,
+    CitationOptionsMode,
     CitationStartEvent,
     CitationStartEventDelta,
     CitationStartEventDeltaMessage,
-    Content,
-    Content_Text,
+    Document,
+    DocumentContent,
     DocumentSource,
-    JsonResponseFormat2,
-    NonStreamedChatResponse2,
-    ResponseFormat2,
-    ResponseFormat2_JsonObject,
-    ResponseFormat2_Text,
+    JsonResponseFormat,
+    NonStreamedChatResponse,
+    ResponseFormat,
     Source,
-    Source_Document,
-    Source_Tool,
-    StreamedChatResponse2,
-    StreamedChatResponse2_CitationEnd,
-    StreamedChatResponse2_CitationStart,
-    StreamedChatResponse2_ContentDelta,
-    StreamedChatResponse2_ContentEnd,
-    StreamedChatResponse2_ContentStart,
-    StreamedChatResponse2_MessageEnd,
-    StreamedChatResponse2_MessageStart,
-    StreamedChatResponse2_ToolCallDelta,
-    StreamedChatResponse2_ToolCallEnd,
-    StreamedChatResponse2_ToolCallStart,
-    StreamedChatResponse2_ToolPlanDelta,
+    StreamedChatResponse,
     SystemMessage,
     SystemMessageContent,
-    SystemMessageContentItem,
-    SystemMessageContentItem_Text,
     TextContent,
-    TextResponseFormat2,
-    Tool2,
-    Tool2Function,
-    ToolCall2,
-    ToolCall2Function,
-    ToolMessage2,
+    TextResponseFormat,
+    Tool,
+    ToolCall,
+    ToolCallFunction,
+    ToolContent,
+    ToolFunction,
+    ToolMessage,
+    ToolMessageToolContent,
     ToolSource,
     Usage,
     UsageBilledUnits,
     UsageTokens,
     UserMessage,
     UserMessageContent,
-    V2ChatRequestCitationMode,
-    V2ChatRequestSafetyMode,
-    V2ChatStreamRequestCitationMode,
-    V2ChatStreamRequestSafetyMode,
 )
 
 __all__ = [
     "AssistantMessage",
     "AssistantMessageContent",
-    "AssistantMessageContentItem",
-    "AssistantMessageContentItem_Text",
     "AssistantMessageResponse",
-    "AssistantMessageResponseContentItem",
-    "AssistantMessageResponseContentItem_Text",
+    "BadRequestError",
+    "BadRequestErrorBody",
     "ChatContentDeltaEvent",
     "ChatContentDeltaEventDelta",
     "ChatContentDeltaEventDeltaMessage",
@@ -110,11 +115,7 @@
     "ChatContentStartEventDeltaMessage",
     "ChatContentStartEventDeltaMessageContent",
     "ChatFinishReason",
-    "ChatMessage2",
-    "ChatMessage2_Assistant",
-    "ChatMessage2_System",
-    "ChatMessage2_Tool",
-    "ChatMessage2_User",
+    "ChatMessage",
     "ChatMessageEndEvent",
     "ChatMessageEndEventDelta",
     "ChatMessageStartEvent",
@@ -122,6 +123,7 @@
     "ChatMessageStartEventDeltaMessage",
     "ChatMessages",
     "ChatStreamEventType",
+    "ChatStreamEventTypeType",
     "ChatToolCallDeltaEvent",
     "ChatToolCallDeltaEventDelta",
     "ChatToolCallDeltaEventDeltaToolCall",
@@ -135,51 +137,59 @@
     "ChatToolPlanDeltaEventDelta",
     "Citation",
     "CitationEndEvent",
+    "CitationOptions",
+    "CitationOptionsMode",
     "CitationStartEvent",
     "CitationStartEventDelta",
     "CitationStartEventDeltaMessage",
-    "Content",
-    "Content_Text",
+    "ClientClosedRequestError",
+    "ClientClosedRequestErrorBody",
+    "Document",
+    "DocumentContent",
     "DocumentSource",
-    "JsonResponseFormat2",
-    "NonStreamedChatResponse2",
-    "ResponseFormat2",
-    "ResponseFormat2_JsonObject",
-    "ResponseFormat2_Text",
+    "ForbiddenError",
+    "ForbiddenErrorBody",
+    "GatewayTimeoutError",
+    "GatewayTimeoutErrorBody",
+    "InternalServerError",
+    "InternalServerErrorBody",
+    "JsonResponseFormat",
+    "NonStreamedChatResponse",
+    "NotFoundError",
+    "NotFoundErrorBody",
+    "NotImplementedError",
+    "NotImplementedErrorBody",
+    "ResponseFormat",
+    "ServiceUnavailableError",
+    "ServiceUnavailableErrorBody",
     "Source",
-    "Source_Document",
-    "Source_Tool",
-    "StreamedChatResponse2",
-    "StreamedChatResponse2_CitationEnd",
-    "StreamedChatResponse2_CitationStart",
-    "StreamedChatResponse2_ContentDelta",
-    "StreamedChatResponse2_ContentEnd",
-    "StreamedChatResponse2_ContentStart",
-    "StreamedChatResponse2_MessageEnd",
-    "StreamedChatResponse2_MessageStart",
-    "StreamedChatResponse2_ToolCallDelta",
-    "StreamedChatResponse2_ToolCallEnd",
-    "StreamedChatResponse2_ToolCallStart",
-    "StreamedChatResponse2_ToolPlanDelta",
+    "StreamedChatResponse",
     "SystemMessage",
     "SystemMessageContent",
-    "SystemMessageContentItem",
-    "SystemMessageContentItem_Text",
     "TextContent",
-    "TextResponseFormat2",
-    "Tool2",
-    "Tool2Function",
-    "ToolCall2",
-    "ToolCall2Function",
-    "ToolMessage2",
+    "TextResponseFormat",
+    "TooManyRequestsError",
+    "TooManyRequestsErrorBody",
+    "Tool",
+    "ToolCall",
+    "ToolCallFunction",
+    "ToolContent",
+    "ToolFunction",
+    "ToolMessage",
+    "ToolMessageToolContent",
     "ToolSource",
+    "UnauthorizedError",
+    "UnauthorizedErrorBody",
+    "UnprocessableEntityError",
+    "UnprocessableEntityErrorBody",
     "Usage",
     "UsageBilledUnits",
     "UsageTokens",
     "UserMessage",
     "UserMessageContent",
-    "V2ChatRequestCitationMode",
+    "V2ChatRequestDocumentsItem",
     "V2ChatRequestSafetyMode",
-    "V2ChatStreamRequestCitationMode",
+    "V2ChatStreamRequestDocumentsItem",
     "V2ChatStreamRequestSafetyMode",
+    "v2",
 ]
diff --git a/src/cohere/v2/client.py b/src/cohere/v2/client.py
index 34340141f..700b3a07e 100644
--- a/src/cohere/v2/client.py
+++ b/src/cohere/v2/client.py
@@ -2,38 +2,45 @@
 
 import typing
 from ..core.client_wrapper import SyncClientWrapper
-from .types.chat_messages import ChatMessages
-from .types.tool2 import Tool2
-from .types.v2chat_stream_request_citation_mode import V2ChatStreamRequestCitationMode
-from .types.response_format2 import ResponseFormat2
+from .v2.types.chat_messages import ChatMessages
+from .v2.types.tool import Tool
+from .types.v2chat_stream_request_documents_item import V2ChatStreamRequestDocumentsItem
+from .v2.types.citation_options import CitationOptions
+from .v2.types.response_format import ResponseFormat
 from .types.v2chat_stream_request_safety_mode import V2ChatStreamRequestSafetyMode
 from ..core.request_options import RequestOptions
-from .types.streamed_chat_response2 import StreamedChatResponse2
+from .v2.types.streamed_chat_response import StreamedChatResponse
 from ..core.serialization import convert_and_respect_annotation_metadata
 import httpx_sse
 from ..core.unchecked_base_model import construct_type
 import json
-from ..errors.bad_request_error import BadRequestError
-from ..errors.unauthorized_error import UnauthorizedError
-from ..errors.forbidden_error import ForbiddenError
-from ..errors.not_found_error import NotFoundError
-from ..errors.unprocessable_entity_error import UnprocessableEntityError
-from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
-from ..errors.too_many_requests_error import TooManyRequestsError
-from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
-from ..errors.client_closed_request_error import ClientClosedRequestError
-from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
-from ..errors.internal_server_error import InternalServerError
-from ..errors.not_implemented_error import NotImplementedError
-from ..types.not_implemented_error_body import NotImplementedErrorBody
-from ..errors.service_unavailable_error import ServiceUnavailableError
-from ..errors.gateway_timeout_error import GatewayTimeoutError
-from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
+from .errors.bad_request_error import BadRequestError
+from .types.bad_request_error_body import BadRequestErrorBody
+from .errors.unauthorized_error import UnauthorizedError
+from .types.unauthorized_error_body import UnauthorizedErrorBody
+from .errors.forbidden_error import ForbiddenError
+from .types.forbidden_error_body import ForbiddenErrorBody
+from .errors.not_found_error import NotFoundError
+from .types.not_found_error_body import NotFoundErrorBody
+from .errors.unprocessable_entity_error import UnprocessableEntityError
+from .types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
+from .errors.too_many_requests_error import TooManyRequestsError
+from .types.too_many_requests_error_body import TooManyRequestsErrorBody
+from .errors.client_closed_request_error import ClientClosedRequestError
+from .types.client_closed_request_error_body import ClientClosedRequestErrorBody
+from .errors.internal_server_error import InternalServerError
+from .types.internal_server_error_body import InternalServerErrorBody
+from .errors.not_implemented_error import NotImplementedError
+from .types.not_implemented_error_body import NotImplementedErrorBody
+from .errors.service_unavailable_error import ServiceUnavailableError
+from .types.service_unavailable_error_body import ServiceUnavailableErrorBody
+from .errors.gateway_timeout_error import GatewayTimeoutError
+from .types.gateway_timeout_error_body import GatewayTimeoutErrorBody
 from json.decoder import JSONDecodeError
 from ..core.api_error import ApiError
-from .types.v2chat_request_citation_mode import V2ChatRequestCitationMode
+from .types.v2chat_request_documents_item import V2ChatRequestDocumentsItem
 from .types.v2chat_request_safety_mode import V2ChatRequestSafetyMode
-from .types.non_streamed_chat_response2 import NonStreamedChatResponse2
+from .v2.types.non_streamed_chat_response import NonStreamedChatResponse
 from ..core.client_wrapper import AsyncClientWrapper
 
 # this is used as the default value for optional parameters
@@ -49,9 +56,10 @@ def chat_stream(
         *,
         model: str,
         messages: ChatMessages,
-        tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        citation_mode: typing.Optional[V2ChatStreamRequestCitationMode] = OMIT,
-        response_format: typing.Optional[ResponseFormat2] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        documents: typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]] = OMIT,
+        citation_options: typing.Optional[CitationOptions] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
         safety_mode: typing.Optional[V2ChatStreamRequestSafetyMode] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
@@ -63,7 +71,7 @@ def chat_stream(
         p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[StreamedChatResponse2]:
+    ) -> typing.Iterator[StreamedChatResponse]:
         """
         Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
 
@@ -74,18 +82,19 @@ def chat_stream(
 
         messages : ChatMessages
 
-        tools : typing.Optional[typing.Sequence[Tool2]]
+        tools : typing.Optional[typing.Sequence[Tool]]
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
 
-        citation_mode : typing.Optional[V2ChatStreamRequestCitationMode]
-            Defaults to `"accurate"`.
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.
 
 
-        response_format : typing.Optional[ResponseFormat2]
+        citation_options : typing.Optional[CitationOptions]
+
+        response_format : typing.Optional[ResponseFormat]
 
         safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]
             Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
@@ -149,17 +158,18 @@ def chat_stream(
 
         Yields
         ------
-        typing.Iterator[StreamedChatResponse2]
+        typing.Iterator[StreamedChatResponse]
 
 
         Examples
         --------
         from cohere import Client
-        from cohere.v2 import (
-            ChatMessage2_User,
-            ResponseFormat2_Text,
-            Tool2,
-            Tool2Function,
+        from cohere.v2.v2 import (
+            CitationOptions,
+            TextResponseFormat,
+            Tool,
+            ToolFunction,
+            UserMessage,
         )
 
         client = Client(
@@ -169,22 +179,24 @@ def chat_stream(
         response = client.v2.chat_stream(
             model="string",
             messages=[
-                ChatMessage2_User(
+                UserMessage(
                     content="string",
-                    documents=[{"string": {"key": "value"}}],
                 )
             ],
             tools=[
-                Tool2(
-                    function=Tool2Function(
+                Tool(
+                    function=ToolFunction(
                         name="string",
                         description="string",
                         parameters={"string": {"key": "value"}},
                     ),
                 )
             ],
-            citation_mode="FAST",
-            response_format=ResponseFormat2_Text(),
+            documents=["string"],
+            citation_options=CitationOptions(
+                mode="FAST",
+            ),
+            response_format=TextResponseFormat(),
             safety_mode="CONTEXTUAL",
             max_tokens=1,
             stop_sequences=["string"],
@@ -208,11 +220,16 @@ def chat_stream(
                     object_=messages, annotation=ChatMessages, direction="write"
                 ),
                 "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool2], direction="write"
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "documents": convert_and_respect_annotation_metadata(
+                    object_=documents, annotation=typing.Sequence[V2ChatStreamRequestDocumentsItem], direction="write"
+                ),
+                "citation_options": convert_and_respect_annotation_metadata(
+                    object_=citation_options, annotation=CitationOptions, direction="write"
                 ),
-                "citation_mode": citation_mode,
                 "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat2, direction="write"
+                    object_=response_format, annotation=ResponseFormat, direction="write"
                 ),
                 "safety_mode": safety_mode,
                 "max_tokens": max_tokens,
@@ -235,9 +252,9 @@ def chat_stream(
                     for _sse in _event_source.iter_sse():
                         try:
                             yield typing.cast(
-                                StreamedChatResponse2,
+                                StreamedChatResponse,
                                 construct_type(
-                                    type_=StreamedChatResponse2,  # type: ignore
+                                    type_=StreamedChatResponse,  # type: ignore
                                     object_=json.loads(_sse.data),
                                 ),
                             )
@@ -248,9 +265,9 @@ def chat_stream(
                 if _response.status_code == 400:
                     raise BadRequestError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            BadRequestErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=BadRequestErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -258,9 +275,9 @@ def chat_stream(
                 if _response.status_code == 401:
                     raise UnauthorizedError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            UnauthorizedErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=UnauthorizedErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -268,9 +285,9 @@ def chat_stream(
                 if _response.status_code == 403:
                     raise ForbiddenError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            ForbiddenErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=ForbiddenErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -278,9 +295,9 @@ def chat_stream(
                 if _response.status_code == 404:
                     raise NotFoundError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            NotFoundErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=NotFoundErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -318,9 +335,9 @@ def chat_stream(
                 if _response.status_code == 500:
                     raise InternalServerError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            InternalServerErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=InternalServerErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -338,9 +355,9 @@ def chat_stream(
                 if _response.status_code == 503:
                     raise ServiceUnavailableError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            ServiceUnavailableErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=ServiceUnavailableErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -365,9 +382,10 @@ def chat(
         *,
         model: str,
         messages: ChatMessages,
-        tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        citation_mode: typing.Optional[V2ChatRequestCitationMode] = OMIT,
-        response_format: typing.Optional[ResponseFormat2] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        documents: typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]] = OMIT,
+        citation_options: typing.Optional[CitationOptions] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
         safety_mode: typing.Optional[V2ChatRequestSafetyMode] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
@@ -379,7 +397,7 @@ def chat(
         p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> NonStreamedChatResponse2:
+    ) -> NonStreamedChatResponse:
         """
         Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
 
@@ -390,18 +408,19 @@ def chat(
 
         messages : ChatMessages
 
-        tools : typing.Optional[typing.Sequence[Tool2]]
+        tools : typing.Optional[typing.Sequence[Tool]]
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
 
-        citation_mode : typing.Optional[V2ChatRequestCitationMode]
-            Defaults to `"accurate"`.
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.
+
 
+        citation_options : typing.Optional[CitationOptions]
 
-        response_format : typing.Optional[ResponseFormat2]
+        response_format : typing.Optional[ResponseFormat]
 
         safety_mode : typing.Optional[V2ChatRequestSafetyMode]
             Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
@@ -465,13 +484,13 @@ def chat(
 
         Returns
         -------
-        NonStreamedChatResponse2
+        NonStreamedChatResponse
 
 
         Examples
         --------
         from cohere import Client
-        from cohere.v2 import ChatMessage2_Tool
+        from cohere.v2.v2 import UserMessage
 
         client = Client(
             client_name="YOUR_CLIENT_NAME",
@@ -480,9 +499,8 @@ def chat(
         client.v2.chat(
             model="model",
             messages=[
-                ChatMessage2_Tool(
-                    tool_call_id="messages",
-                    tool_content=["messages"],
+                UserMessage(
+                    content="content",
                 )
             ],
         )
@@ -496,11 +514,16 @@ def chat(
                     object_=messages, annotation=ChatMessages, direction="write"
                 ),
                 "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool2], direction="write"
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "documents": convert_and_respect_annotation_metadata(
+                    object_=documents, annotation=typing.Sequence[V2ChatRequestDocumentsItem], direction="write"
+                ),
+                "citation_options": convert_and_respect_annotation_metadata(
+                    object_=citation_options, annotation=CitationOptions, direction="write"
                 ),
-                "citation_mode": citation_mode,
                 "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat2, direction="write"
+                    object_=response_format, annotation=ResponseFormat, direction="write"
                 ),
                 "safety_mode": safety_mode,
                 "max_tokens": max_tokens,
@@ -520,18 +543,18 @@ def chat(
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    NonStreamedChatResponse2,
+                    NonStreamedChatResponse,
                     construct_type(
-                        type_=NonStreamedChatResponse2,  # type: ignore
+                        type_=NonStreamedChatResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
             if _response.status_code == 400:
                 raise BadRequestError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        BadRequestErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=BadRequestErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -539,9 +562,9 @@ def chat(
             if _response.status_code == 401:
                 raise UnauthorizedError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        UnauthorizedErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=UnauthorizedErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -549,9 +572,9 @@ def chat(
             if _response.status_code == 403:
                 raise ForbiddenError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        ForbiddenErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=ForbiddenErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -559,9 +582,9 @@ def chat(
             if _response.status_code == 404:
                 raise NotFoundError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        NotFoundErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=NotFoundErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -599,9 +622,9 @@ def chat(
             if _response.status_code == 500:
                 raise InternalServerError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        InternalServerErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=InternalServerErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -619,9 +642,9 @@ def chat(
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        ServiceUnavailableErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=ServiceUnavailableErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -651,9 +674,10 @@ async def chat_stream(
         *,
         model: str,
         messages: ChatMessages,
-        tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        citation_mode: typing.Optional[V2ChatStreamRequestCitationMode] = OMIT,
-        response_format: typing.Optional[ResponseFormat2] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        documents: typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]] = OMIT,
+        citation_options: typing.Optional[CitationOptions] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
         safety_mode: typing.Optional[V2ChatStreamRequestSafetyMode] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
@@ -665,7 +689,7 @@ async def chat_stream(
         p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[StreamedChatResponse2]:
+    ) -> typing.AsyncIterator[StreamedChatResponse]:
         """
         Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
 
@@ -676,18 +700,19 @@ async def chat_stream(
 
         messages : ChatMessages
 
-        tools : typing.Optional[typing.Sequence[Tool2]]
+        tools : typing.Optional[typing.Sequence[Tool]]
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
 
-        citation_mode : typing.Optional[V2ChatStreamRequestCitationMode]
-            Defaults to `"accurate"`.
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.
+
 
+        citation_options : typing.Optional[CitationOptions]
 
-        response_format : typing.Optional[ResponseFormat2]
+        response_format : typing.Optional[ResponseFormat]
 
         safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]
             Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
@@ -751,7 +776,7 @@ async def chat_stream(
 
         Yields
         ------
-        typing.AsyncIterator[StreamedChatResponse2]
+        typing.AsyncIterator[StreamedChatResponse]
 
 
         Examples
@@ -759,11 +784,12 @@ async def chat_stream(
         import asyncio
 
         from cohere import AsyncClient
-        from cohere.v2 import (
-            ChatMessage2_User,
-            ResponseFormat2_Text,
-            Tool2,
-            Tool2Function,
+        from cohere.v2.v2 import (
+            CitationOptions,
+            TextResponseFormat,
+            Tool,
+            ToolFunction,
+            UserMessage,
         )
 
         client = AsyncClient(
@@ -776,22 +802,24 @@ async def main() -> None:
             response = await client.v2.chat_stream(
                 model="string",
                 messages=[
-                    ChatMessage2_User(
+                    UserMessage(
                         content="string",
-                        documents=[{"string": {"key": "value"}}],
                     )
                 ],
                 tools=[
-                    Tool2(
-                        function=Tool2Function(
+                    Tool(
+                        function=ToolFunction(
                             name="string",
                             description="string",
                             parameters={"string": {"key": "value"}},
                         ),
                     )
                 ],
-                citation_mode="FAST",
-                response_format=ResponseFormat2_Text(),
+                documents=["string"],
+                citation_options=CitationOptions(
+                    mode="FAST",
+                ),
+                response_format=TextResponseFormat(),
                 safety_mode="CONTEXTUAL",
                 max_tokens=1,
                 stop_sequences=["string"],
@@ -818,11 +846,16 @@ async def main() -> None:
                     object_=messages, annotation=ChatMessages, direction="write"
                 ),
                 "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool2], direction="write"
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "documents": convert_and_respect_annotation_metadata(
+                    object_=documents, annotation=typing.Sequence[V2ChatStreamRequestDocumentsItem], direction="write"
+                ),
+                "citation_options": convert_and_respect_annotation_metadata(
+                    object_=citation_options, annotation=CitationOptions, direction="write"
                 ),
-                "citation_mode": citation_mode,
                 "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat2, direction="write"
+                    object_=response_format, annotation=ResponseFormat, direction="write"
                 ),
                 "safety_mode": safety_mode,
                 "max_tokens": max_tokens,
@@ -845,9 +878,9 @@ async def main() -> None:
                     async for _sse in _event_source.aiter_sse():
                         try:
                             yield typing.cast(
-                                StreamedChatResponse2,
+                                StreamedChatResponse,
                                 construct_type(
-                                    type_=StreamedChatResponse2,  # type: ignore
+                                    type_=StreamedChatResponse,  # type: ignore
                                     object_=json.loads(_sse.data),
                                 ),
                             )
@@ -858,9 +891,9 @@ async def main() -> None:
                 if _response.status_code == 400:
                     raise BadRequestError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            BadRequestErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=BadRequestErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -868,9 +901,9 @@ async def main() -> None:
                 if _response.status_code == 401:
                     raise UnauthorizedError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            UnauthorizedErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=UnauthorizedErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -878,9 +911,9 @@ async def main() -> None:
                 if _response.status_code == 403:
                     raise ForbiddenError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            ForbiddenErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=ForbiddenErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -888,9 +921,9 @@ async def main() -> None:
                 if _response.status_code == 404:
                     raise NotFoundError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            NotFoundErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=NotFoundErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -928,9 +961,9 @@ async def main() -> None:
                 if _response.status_code == 500:
                     raise InternalServerError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            InternalServerErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=InternalServerErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -948,9 +981,9 @@ async def main() -> None:
                 if _response.status_code == 503:
                     raise ServiceUnavailableError(
                         typing.cast(
-                            typing.Optional[typing.Any],
+                            ServiceUnavailableErrorBody,
                             construct_type(
-                                type_=typing.Optional[typing.Any],  # type: ignore
+                                type_=ServiceUnavailableErrorBody,  # type: ignore
                                 object_=_response.json(),
                             ),
                         )
@@ -975,9 +1008,10 @@ async def chat(
         *,
         model: str,
         messages: ChatMessages,
-        tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        citation_mode: typing.Optional[V2ChatRequestCitationMode] = OMIT,
-        response_format: typing.Optional[ResponseFormat2] = OMIT,
+        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
+        documents: typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]] = OMIT,
+        citation_options: typing.Optional[CitationOptions] = OMIT,
+        response_format: typing.Optional[ResponseFormat] = OMIT,
         safety_mode: typing.Optional[V2ChatRequestSafetyMode] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
@@ -989,7 +1023,7 @@ async def chat(
         p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> NonStreamedChatResponse2:
+    ) -> NonStreamedChatResponse:
         """
         Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
 
@@ -1000,18 +1034,19 @@ async def chat(
 
         messages : ChatMessages
 
-        tools : typing.Optional[typing.Sequence[Tool2]]
+        tools : typing.Optional[typing.Sequence[Tool]]
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
 
-        citation_mode : typing.Optional[V2ChatRequestCitationMode]
-            Defaults to `"accurate"`.
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]
+            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.
 
 
-        response_format : typing.Optional[ResponseFormat2]
+        citation_options : typing.Optional[CitationOptions]
+
+        response_format : typing.Optional[ResponseFormat]
 
         safety_mode : typing.Optional[V2ChatRequestSafetyMode]
             Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
@@ -1075,7 +1110,7 @@ async def chat(
 
         Returns
         -------
-        NonStreamedChatResponse2
+        NonStreamedChatResponse
 
 
         Examples
@@ -1083,7 +1118,7 @@ async def chat(
         import asyncio
 
         from cohere import AsyncClient
-        from cohere.v2 import ChatMessage2_Tool
+        from cohere.v2.v2 import UserMessage
 
         client = AsyncClient(
             client_name="YOUR_CLIENT_NAME",
@@ -1095,9 +1130,8 @@ async def main() -> None:
             await client.v2.chat(
                 model="model",
                 messages=[
-                    ChatMessage2_Tool(
-                        tool_call_id="messages",
-                        tool_content=["messages"],
+                    UserMessage(
+                        content="content",
                     )
                 ],
             )
@@ -1114,11 +1148,16 @@ async def main() -> None:
                     object_=messages, annotation=ChatMessages, direction="write"
                 ),
                 "tools": convert_and_respect_annotation_metadata(
-                    object_=tools, annotation=typing.Sequence[Tool2], direction="write"
+                    object_=tools, annotation=typing.Sequence[Tool], direction="write"
+                ),
+                "documents": convert_and_respect_annotation_metadata(
+                    object_=documents, annotation=typing.Sequence[V2ChatRequestDocumentsItem], direction="write"
+                ),
+                "citation_options": convert_and_respect_annotation_metadata(
+                    object_=citation_options, annotation=CitationOptions, direction="write"
                 ),
-                "citation_mode": citation_mode,
                 "response_format": convert_and_respect_annotation_metadata(
-                    object_=response_format, annotation=ResponseFormat2, direction="write"
+                    object_=response_format, annotation=ResponseFormat, direction="write"
                 ),
                 "safety_mode": safety_mode,
                 "max_tokens": max_tokens,
@@ -1138,18 +1177,18 @@ async def main() -> None:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    NonStreamedChatResponse2,
+                    NonStreamedChatResponse,
                     construct_type(
-                        type_=NonStreamedChatResponse2,  # type: ignore
+                        type_=NonStreamedChatResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
             if _response.status_code == 400:
                 raise BadRequestError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        BadRequestErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=BadRequestErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -1157,9 +1196,9 @@ async def main() -> None:
             if _response.status_code == 401:
                 raise UnauthorizedError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        UnauthorizedErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=UnauthorizedErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -1167,9 +1206,9 @@ async def main() -> None:
             if _response.status_code == 403:
                 raise ForbiddenError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        ForbiddenErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=ForbiddenErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -1177,9 +1216,9 @@ async def main() -> None:
             if _response.status_code == 404:
                 raise NotFoundError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        NotFoundErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=NotFoundErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -1217,9 +1256,9 @@ async def main() -> None:
             if _response.status_code == 500:
                 raise InternalServerError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        InternalServerErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=InternalServerErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
@@ -1237,9 +1276,9 @@ async def main() -> None:
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     typing.cast(
-                        typing.Optional[typing.Any],
+                        ServiceUnavailableErrorBody,
                         construct_type(
-                            type_=typing.Optional[typing.Any],  # type: ignore
+                            type_=ServiceUnavailableErrorBody,  # type: ignore
                             object_=_response.json(),
                         ),
                     )
diff --git a/src/cohere/v2/errors/__init__.py b/src/cohere/v2/errors/__init__.py
new file mode 100644
index 000000000..c48c1d74a
--- /dev/null
+++ b/src/cohere/v2/errors/__init__.py
@@ -0,0 +1,27 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from .bad_request_error import BadRequestError
+from .client_closed_request_error import ClientClosedRequestError
+from .forbidden_error import ForbiddenError
+from .gateway_timeout_error import GatewayTimeoutError
+from .internal_server_error import InternalServerError
+from .not_found_error import NotFoundError
+from .not_implemented_error import NotImplementedError
+from .service_unavailable_error import ServiceUnavailableError
+from .too_many_requests_error import TooManyRequestsError
+from .unauthorized_error import UnauthorizedError
+from .unprocessable_entity_error import UnprocessableEntityError
+
+__all__ = [
+    "BadRequestError",
+    "ClientClosedRequestError",
+    "ForbiddenError",
+    "GatewayTimeoutError",
+    "InternalServerError",
+    "NotFoundError",
+    "NotImplementedError",
+    "ServiceUnavailableError",
+    "TooManyRequestsError",
+    "UnauthorizedError",
+    "UnprocessableEntityError",
+]
diff --git a/src/cohere/v2/errors/bad_request_error.py b/src/cohere/v2/errors/bad_request_error.py
new file mode 100644
index 000000000..b4897e0fc
--- /dev/null
+++ b/src/cohere/v2/errors/bad_request_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.bad_request_error_body import BadRequestErrorBody
+
+
+class BadRequestError(ApiError):
+    def __init__(self, body: BadRequestErrorBody):
+        super().__init__(status_code=400, body=body)
diff --git a/src/cohere/v2/errors/client_closed_request_error.py b/src/cohere/v2/errors/client_closed_request_error.py
new file mode 100644
index 000000000..2e08bde1c
--- /dev/null
+++ b/src/cohere/v2/errors/client_closed_request_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
+
+
+class ClientClosedRequestError(ApiError):
+    def __init__(self, body: ClientClosedRequestErrorBody):
+        super().__init__(status_code=499, body=body)
diff --git a/src/cohere/v2/errors/forbidden_error.py b/src/cohere/v2/errors/forbidden_error.py
new file mode 100644
index 000000000..fa99df422
--- /dev/null
+++ b/src/cohere/v2/errors/forbidden_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.forbidden_error_body import ForbiddenErrorBody
+
+
+class ForbiddenError(ApiError):
+    def __init__(self, body: ForbiddenErrorBody):
+        super().__init__(status_code=403, body=body)
diff --git a/src/cohere/v2/errors/gateway_timeout_error.py b/src/cohere/v2/errors/gateway_timeout_error.py
new file mode 100644
index 000000000..354bb0488
--- /dev/null
+++ b/src/cohere/v2/errors/gateway_timeout_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
+
+
+class GatewayTimeoutError(ApiError):
+    def __init__(self, body: GatewayTimeoutErrorBody):
+        super().__init__(status_code=504, body=body)
diff --git a/src/cohere/v2/errors/internal_server_error.py b/src/cohere/v2/errors/internal_server_error.py
new file mode 100644
index 000000000..bb3d0bcfc
--- /dev/null
+++ b/src/cohere/v2/errors/internal_server_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.internal_server_error_body import InternalServerErrorBody
+
+
+class InternalServerError(ApiError):
+    def __init__(self, body: InternalServerErrorBody):
+        super().__init__(status_code=500, body=body)
diff --git a/src/cohere/v2/errors/not_found_error.py b/src/cohere/v2/errors/not_found_error.py
new file mode 100644
index 000000000..8a96cd54f
--- /dev/null
+++ b/src/cohere/v2/errors/not_found_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.not_found_error_body import NotFoundErrorBody
+
+
+class NotFoundError(ApiError):
+    def __init__(self, body: NotFoundErrorBody):
+        super().__init__(status_code=404, body=body)
diff --git a/src/cohere/v2/errors/not_implemented_error.py b/src/cohere/v2/errors/not_implemented_error.py
new file mode 100644
index 000000000..e07f0b851
--- /dev/null
+++ b/src/cohere/v2/errors/not_implemented_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.not_implemented_error_body import NotImplementedErrorBody
+
+
+class NotImplementedError(ApiError):
+    def __init__(self, body: NotImplementedErrorBody):
+        super().__init__(status_code=501, body=body)
diff --git a/src/cohere/v2/errors/service_unavailable_error.py b/src/cohere/v2/errors/service_unavailable_error.py
new file mode 100644
index 000000000..68101be27
--- /dev/null
+++ b/src/cohere/v2/errors/service_unavailable_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.service_unavailable_error_body import ServiceUnavailableErrorBody
+
+
+class ServiceUnavailableError(ApiError):
+    def __init__(self, body: ServiceUnavailableErrorBody):
+        super().__init__(status_code=503, body=body)
diff --git a/src/cohere/v2/errors/too_many_requests_error.py b/src/cohere/v2/errors/too_many_requests_error.py
new file mode 100644
index 000000000..091dffcab
--- /dev/null
+++ b/src/cohere/v2/errors/too_many_requests_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
+
+
+class TooManyRequestsError(ApiError):
+    def __init__(self, body: TooManyRequestsErrorBody):
+        super().__init__(status_code=429, body=body)
diff --git a/src/cohere/v2/errors/unauthorized_error.py b/src/cohere/v2/errors/unauthorized_error.py
new file mode 100644
index 000000000..c46044e43
--- /dev/null
+++ b/src/cohere/v2/errors/unauthorized_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.unauthorized_error_body import UnauthorizedErrorBody
+
+
+class UnauthorizedError(ApiError):
+    def __init__(self, body: UnauthorizedErrorBody):
+        super().__init__(status_code=401, body=body)
diff --git a/src/cohere/v2/errors/unprocessable_entity_error.py b/src/cohere/v2/errors/unprocessable_entity_error.py
new file mode 100644
index 000000000..061f6bec1
--- /dev/null
+++ b/src/cohere/v2/errors/unprocessable_entity_error.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.api_error import ApiError
+from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
+
+
+class UnprocessableEntityError(ApiError):
+    def __init__(self, body: UnprocessableEntityErrorBody):
+        super().__init__(status_code=422, body=body)
diff --git a/src/cohere/v2/types/__init__.py b/src/cohere/v2/types/__init__.py
index 33038791e..389f6605a 100644
--- a/src/cohere/v2/types/__init__.py
+++ b/src/cohere/v2/types/__init__.py
@@ -1,182 +1,35 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from .assistant_message import AssistantMessage
-from .assistant_message_content import AssistantMessageContent
-from .assistant_message_content_item import AssistantMessageContentItem, AssistantMessageContentItem_Text
-from .assistant_message_response import AssistantMessageResponse
-from .assistant_message_response_content_item import (
-    AssistantMessageResponseContentItem,
-    AssistantMessageResponseContentItem_Text,
-)
-from .chat_content_delta_event import ChatContentDeltaEvent
-from .chat_content_delta_event_delta import ChatContentDeltaEventDelta
-from .chat_content_delta_event_delta_message import ChatContentDeltaEventDeltaMessage
-from .chat_content_delta_event_delta_message_content import ChatContentDeltaEventDeltaMessageContent
-from .chat_content_end_event import ChatContentEndEvent
-from .chat_content_start_event import ChatContentStartEvent
-from .chat_content_start_event_delta import ChatContentStartEventDelta
-from .chat_content_start_event_delta_message import ChatContentStartEventDeltaMessage
-from .chat_content_start_event_delta_message_content import ChatContentStartEventDeltaMessageContent
-from .chat_finish_reason import ChatFinishReason
-from .chat_message2 import (
-    ChatMessage2,
-    ChatMessage2_Assistant,
-    ChatMessage2_System,
-    ChatMessage2_Tool,
-    ChatMessage2_User,
-)
-from .chat_message_end_event import ChatMessageEndEvent
-from .chat_message_end_event_delta import ChatMessageEndEventDelta
-from .chat_message_start_event import ChatMessageStartEvent
-from .chat_message_start_event_delta import ChatMessageStartEventDelta
-from .chat_message_start_event_delta_message import ChatMessageStartEventDeltaMessage
-from .chat_messages import ChatMessages
-from .chat_stream_event_type import ChatStreamEventType
-from .chat_tool_call_delta_event import ChatToolCallDeltaEvent
-from .chat_tool_call_delta_event_delta import ChatToolCallDeltaEventDelta
-from .chat_tool_call_delta_event_delta_tool_call import ChatToolCallDeltaEventDeltaToolCall
-from .chat_tool_call_delta_event_delta_tool_call_function import ChatToolCallDeltaEventDeltaToolCallFunction
-from .chat_tool_call_end_event import ChatToolCallEndEvent
-from .chat_tool_call_start_event import ChatToolCallStartEvent
-from .chat_tool_call_start_event_delta import ChatToolCallStartEventDelta
-from .chat_tool_call_start_event_delta_tool_call import ChatToolCallStartEventDeltaToolCall
-from .chat_tool_call_start_event_delta_tool_call_function import ChatToolCallStartEventDeltaToolCallFunction
-from .chat_tool_plan_delta_event import ChatToolPlanDeltaEvent
-from .chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta
-from .citation import Citation
-from .citation_end_event import CitationEndEvent
-from .citation_start_event import CitationStartEvent
-from .citation_start_event_delta import CitationStartEventDelta
-from .citation_start_event_delta_message import CitationStartEventDeltaMessage
-from .content import Content, Content_Text
-from .document_source import DocumentSource
-from .json_response_format2 import JsonResponseFormat2
-from .non_streamed_chat_response2 import NonStreamedChatResponse2
-from .response_format2 import ResponseFormat2, ResponseFormat2_JsonObject, ResponseFormat2_Text
-from .source import Source, Source_Document, Source_Tool
-from .streamed_chat_response2 import (
-    StreamedChatResponse2,
-    StreamedChatResponse2_CitationEnd,
-    StreamedChatResponse2_CitationStart,
-    StreamedChatResponse2_ContentDelta,
-    StreamedChatResponse2_ContentEnd,
-    StreamedChatResponse2_ContentStart,
-    StreamedChatResponse2_MessageEnd,
-    StreamedChatResponse2_MessageStart,
-    StreamedChatResponse2_ToolCallDelta,
-    StreamedChatResponse2_ToolCallEnd,
-    StreamedChatResponse2_ToolCallStart,
-    StreamedChatResponse2_ToolPlanDelta,
-)
-from .system_message import SystemMessage
-from .system_message_content import SystemMessageContent
-from .system_message_content_item import SystemMessageContentItem, SystemMessageContentItem_Text
-from .text_content import TextContent
-from .text_response_format2 import TextResponseFormat2
-from .tool2 import Tool2
-from .tool2function import Tool2Function
-from .tool_call2 import ToolCall2
-from .tool_call2function import ToolCall2Function
-from .tool_message2 import ToolMessage2
-from .tool_source import ToolSource
-from .usage import Usage
-from .usage_billed_units import UsageBilledUnits
-from .usage_tokens import UsageTokens
-from .user_message import UserMessage
-from .user_message_content import UserMessageContent
-from .v2chat_request_citation_mode import V2ChatRequestCitationMode
+from .bad_request_error_body import BadRequestErrorBody
+from .client_closed_request_error_body import ClientClosedRequestErrorBody
+from .forbidden_error_body import ForbiddenErrorBody
+from .gateway_timeout_error_body import GatewayTimeoutErrorBody
+from .internal_server_error_body import InternalServerErrorBody
+from .not_found_error_body import NotFoundErrorBody
+from .not_implemented_error_body import NotImplementedErrorBody
+from .service_unavailable_error_body import ServiceUnavailableErrorBody
+from .too_many_requests_error_body import TooManyRequestsErrorBody
+from .unauthorized_error_body import UnauthorizedErrorBody
+from .unprocessable_entity_error_body import UnprocessableEntityErrorBody
+from .v2chat_request_documents_item import V2ChatRequestDocumentsItem
 from .v2chat_request_safety_mode import V2ChatRequestSafetyMode
-from .v2chat_stream_request_citation_mode import V2ChatStreamRequestCitationMode
+from .v2chat_stream_request_documents_item import V2ChatStreamRequestDocumentsItem
 from .v2chat_stream_request_safety_mode import V2ChatStreamRequestSafetyMode
 
 __all__ = [
-    "AssistantMessage",
-    "AssistantMessageContent",
-    "AssistantMessageContentItem",
-    "AssistantMessageContentItem_Text",
-    "AssistantMessageResponse",
-    "AssistantMessageResponseContentItem",
-    "AssistantMessageResponseContentItem_Text",
-    "ChatContentDeltaEvent",
-    "ChatContentDeltaEventDelta",
-    "ChatContentDeltaEventDeltaMessage",
-    "ChatContentDeltaEventDeltaMessageContent",
-    "ChatContentEndEvent",
-    "ChatContentStartEvent",
-    "ChatContentStartEventDelta",
-    "ChatContentStartEventDeltaMessage",
-    "ChatContentStartEventDeltaMessageContent",
-    "ChatFinishReason",
-    "ChatMessage2",
-    "ChatMessage2_Assistant",
-    "ChatMessage2_System",
-    "ChatMessage2_Tool",
-    "ChatMessage2_User",
-    "ChatMessageEndEvent",
-    "ChatMessageEndEventDelta",
-    "ChatMessageStartEvent",
-    "ChatMessageStartEventDelta",
-    "ChatMessageStartEventDeltaMessage",
-    "ChatMessages",
-    "ChatStreamEventType",
-    "ChatToolCallDeltaEvent",
-    "ChatToolCallDeltaEventDelta",
-    "ChatToolCallDeltaEventDeltaToolCall",
-    "ChatToolCallDeltaEventDeltaToolCallFunction",
-    "ChatToolCallEndEvent",
-    "ChatToolCallStartEvent",
-    "ChatToolCallStartEventDelta",
-    "ChatToolCallStartEventDeltaToolCall",
-    "ChatToolCallStartEventDeltaToolCallFunction",
-    "ChatToolPlanDeltaEvent",
-    "ChatToolPlanDeltaEventDelta",
-    "Citation",
-    "CitationEndEvent",
-    "CitationStartEvent",
-    "CitationStartEventDelta",
-    "CitationStartEventDeltaMessage",
-    "Content",
-    "Content_Text",
-    "DocumentSource",
-    "JsonResponseFormat2",
-    "NonStreamedChatResponse2",
-    "ResponseFormat2",
-    "ResponseFormat2_JsonObject",
-    "ResponseFormat2_Text",
-    "Source",
-    "Source_Document",
-    "Source_Tool",
-    "StreamedChatResponse2",
-    "StreamedChatResponse2_CitationEnd",
-    "StreamedChatResponse2_CitationStart",
-    "StreamedChatResponse2_ContentDelta",
-    "StreamedChatResponse2_ContentEnd",
-    "StreamedChatResponse2_ContentStart",
-    "StreamedChatResponse2_MessageEnd",
-    "StreamedChatResponse2_MessageStart",
-    "StreamedChatResponse2_ToolCallDelta",
-    "StreamedChatResponse2_ToolCallEnd",
-    "StreamedChatResponse2_ToolCallStart",
-    "StreamedChatResponse2_ToolPlanDelta",
-    "SystemMessage",
-    "SystemMessageContent",
-    "SystemMessageContentItem",
-    "SystemMessageContentItem_Text",
-    "TextContent",
-    "TextResponseFormat2",
-    "Tool2",
-    "Tool2Function",
-    "ToolCall2",
-    "ToolCall2Function",
-    "ToolMessage2",
-    "ToolSource",
-    "Usage",
-    "UsageBilledUnits",
-    "UsageTokens",
-    "UserMessage",
-    "UserMessageContent",
-    "V2ChatRequestCitationMode",
+    "BadRequestErrorBody",
+    "ClientClosedRequestErrorBody",
+    "ForbiddenErrorBody",
+    "GatewayTimeoutErrorBody",
+    "InternalServerErrorBody",
+    "NotFoundErrorBody",
+    "NotImplementedErrorBody",
+    "ServiceUnavailableErrorBody",
+    "TooManyRequestsErrorBody",
+    "UnauthorizedErrorBody",
+    "UnprocessableEntityErrorBody",
+    "V2ChatRequestDocumentsItem",
     "V2ChatRequestSafetyMode",
-    "V2ChatStreamRequestCitationMode",
+    "V2ChatStreamRequestDocumentsItem",
     "V2ChatStreamRequestSafetyMode",
 ]
diff --git a/src/cohere/v2/types/assistant_message_content.py b/src/cohere/v2/types/assistant_message_content.py
deleted file mode 100644
index e18ea5f47..000000000
--- a/src/cohere/v2/types/assistant_message_content.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-from .assistant_message_content_item import AssistantMessageContentItem
-
-AssistantMessageContent = typing.Union[str, typing.List[AssistantMessageContentItem]]
diff --git a/src/cohere/v2/types/assistant_message_content_item.py b/src/cohere/v2/types/assistant_message_content_item.py
deleted file mode 100644
index 7f2936c96..000000000
--- a/src/cohere/v2/types/assistant_message_content_item.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class AssistantMessageContentItem_Text(UncheckedBaseModel):
-    type: typing.Literal["text"] = "text"
-    text: str
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-AssistantMessageContentItem = typing_extensions.Annotated[
-    AssistantMessageContentItem_Text, UnionMetadata(discriminant="type")
-]
diff --git a/src/cohere/v2/types/assistant_message_response_content_item.py b/src/cohere/v2/types/assistant_message_response_content_item.py
deleted file mode 100644
index 2c49973f5..000000000
--- a/src/cohere/v2/types/assistant_message_response_content_item.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class AssistantMessageResponseContentItem_Text(UncheckedBaseModel):
-    type: typing.Literal["text"] = "text"
-    text: str
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-AssistantMessageResponseContentItem = typing_extensions.Annotated[
-    AssistantMessageResponseContentItem_Text, UnionMetadata(discriminant="type")
-]
diff --git a/src/cohere/v2/types/bad_request_error_body.py b/src/cohere/v2/types/bad_request_error_body.py
new file mode 100644
index 000000000..2bc0b65da
--- /dev/null
+++ b/src/cohere/v2/types/bad_request_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class BadRequestErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/chat_message2.py b/src/cohere/v2/types/chat_message2.py
deleted file mode 100644
index 0abc57ff4..000000000
--- a/src/cohere/v2/types/chat_message2.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .user_message_content import UserMessageContent
-from ...types.chat_document import ChatDocument
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-from .tool_call2 import ToolCall2
-from .assistant_message_content import AssistantMessageContent
-from .citation import Citation
-from .system_message_content import SystemMessageContent
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class ChatMessage2_User(UncheckedBaseModel):
-    """
-    Represents a single message in the chat history from a given role.
-    """
-
-    role: typing.Literal["user"] = "user"
-    content: UserMessageContent
-    documents: typing.Optional[typing.List[ChatDocument]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class ChatMessage2_Assistant(UncheckedBaseModel):
-    """
-    Represents a single message in the chat history from a given role.
-    """
-
-    role: typing.Literal["assistant"] = "assistant"
-    tool_calls: typing.Optional[typing.List[ToolCall2]] = None
-    tool_plan: typing.Optional[str] = None
-    content: typing.Optional[AssistantMessageContent] = None
-    citations: typing.Optional[typing.List[Citation]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class ChatMessage2_System(UncheckedBaseModel):
-    """
-    Represents a single message in the chat history from a given role.
-    """
-
-    role: typing.Literal["system"] = "system"
-    content: SystemMessageContent
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class ChatMessage2_Tool(UncheckedBaseModel):
-    """
-    Represents a single message in the chat history from a given role.
-    """
-
-    role: typing.Literal["tool"] = "tool"
-    tool_call_id: str
-    tool_content: typing.List[str]
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-ChatMessage2 = typing_extensions.Annotated[
-    typing.Union[ChatMessage2_User, ChatMessage2_Assistant, ChatMessage2_System, ChatMessage2_Tool],
-    UnionMetadata(discriminant="role"),
-]
diff --git a/src/cohere/v2/types/chat_messages.py b/src/cohere/v2/types/chat_messages.py
deleted file mode 100644
index 5daf64301..000000000
--- a/src/cohere/v2/types/chat_messages.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-from .chat_message2 import ChatMessage2
-
-ChatMessages = typing.List[ChatMessage2]
diff --git a/src/cohere/types/client_closed_request_error_body.py b/src/cohere/v2/types/client_closed_request_error_body.py
similarity index 80%
rename from src/cohere/types/client_closed_request_error_body.py
rename to src/cohere/v2/types/client_closed_request_error_body.py
index ad4bd2c0e..f178bf8bb 100644
--- a/src/cohere/types/client_closed_request_error_body.py
+++ b/src/cohere/v2/types/client_closed_request_error_body.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/content.py b/src/cohere/v2/types/content.py
deleted file mode 100644
index 597aa8348..000000000
--- a/src/cohere/v2/types/content.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class Content_Text(UncheckedBaseModel):
-    """
-    A Content block which contains information about the content type and the content itself.
-    """
-
-    type: typing.Literal["text"] = "text"
-    text: str
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-Content = typing_extensions.Annotated[Content_Text, UnionMetadata(discriminant="type")]
diff --git a/src/cohere/v2/types/forbidden_error_body.py b/src/cohere/v2/types/forbidden_error_body.py
new file mode 100644
index 000000000..8f4a1b1f4
--- /dev/null
+++ b/src/cohere/v2/types/forbidden_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class ForbiddenErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/gateway_timeout_error_body.py b/src/cohere/v2/types/gateway_timeout_error_body.py
similarity index 80%
rename from src/cohere/types/gateway_timeout_error_body.py
rename to src/cohere/v2/types/gateway_timeout_error_body.py
index 03c315fd9..880893f4b 100644
--- a/src/cohere/types/gateway_timeout_error_body.py
+++ b/src/cohere/v2/types/gateway_timeout_error_body.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ...core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/internal_server_error_body.py b/src/cohere/v2/types/internal_server_error_body.py
new file mode 100644
index 000000000..fed8a0864
--- /dev/null
+++ b/src/cohere/v2/types/internal_server_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class InternalServerErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/text_response_format2.py b/src/cohere/v2/types/not_found_error_body.py
similarity index 85%
rename from src/cohere/v2/types/text_response_format2.py
rename to src/cohere/v2/types/not_found_error_body.py
index 3a6866d58..4e5ebfc83 100644
--- a/src/cohere/v2/types/text_response_format2.py
+++ b/src/cohere/v2/types/not_found_error_body.py
@@ -1,12 +1,14 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from ...core.unchecked_base_model import UncheckedBaseModel
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
-class TextResponseFormat2(UncheckedBaseModel):
+class NotFoundErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:
diff --git a/src/cohere/v2/types/not_implemented_error_body.py b/src/cohere/v2/types/not_implemented_error_body.py
new file mode 100644
index 000000000..c2d212986
--- /dev/null
+++ b/src/cohere/v2/types/not_implemented_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class NotImplementedErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/response_format2.py b/src/cohere/v2/types/response_format2.py
deleted file mode 100644
index 6d3316b3c..000000000
--- a/src/cohere/v2/types/response_format2.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class ResponseFormat2_Text(UncheckedBaseModel):
-    """
-    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/docs/command-r), [Command R+](https://docs.cohere.com/docs/command-r-plus) and newer models.
-
-    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
-
-    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.
-
-    **Note**: When using `{ "type": "json_object" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _"Generate a JSON ..."_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.
-    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).
-    """
-
-    type: typing.Literal["text"] = "text"
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class ResponseFormat2_JsonObject(UncheckedBaseModel):
-    """
-    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/docs/command-r), [Command R+](https://docs.cohere.com/docs/command-r-plus) and newer models.
-
-    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
-
-    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.
-
-    **Note**: When using `{ "type": "json_object" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _"Generate a JSON ..."_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.
-    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).
-    """
-
-    type: typing.Literal["json_object"] = "json_object"
-    json_schema: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-ResponseFormat2 = typing_extensions.Annotated[
-    typing.Union[ResponseFormat2_Text, ResponseFormat2_JsonObject], UnionMetadata(discriminant="type")
-]
diff --git a/src/cohere/v2/types/service_unavailable_error_body.py b/src/cohere/v2/types/service_unavailable_error_body.py
new file mode 100644
index 000000000..979f16896
--- /dev/null
+++ b/src/cohere/v2/types/service_unavailable_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class ServiceUnavailableErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/source.py b/src/cohere/v2/types/source.py
deleted file mode 100644
index a96fc9e6c..000000000
--- a/src/cohere/v2/types/source.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class Source_Tool(UncheckedBaseModel):
-    """
-    A source object containing information about the source of the data cited.
-    """
-
-    type: typing.Literal["tool"] = "tool"
-    id: typing.Optional[str] = None
-    tool_output: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class Source_Document(UncheckedBaseModel):
-    """
-    A source object containing information about the source of the data cited.
-    """
-
-    type: typing.Literal["document"] = "document"
-    id: typing.Optional[str] = None
-    document: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-Source = typing_extensions.Annotated[typing.Union[Source_Tool, Source_Document], UnionMetadata(discriminant="type")]
diff --git a/src/cohere/v2/types/streamed_chat_response2.py b/src/cohere/v2/types/streamed_chat_response2.py
deleted file mode 100644
index ab8c49249..000000000
--- a/src/cohere/v2/types/streamed_chat_response2.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from .chat_message_start_event_delta import ChatMessageStartEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-from .chat_content_start_event_delta import ChatContentStartEventDelta
-from .chat_content_delta_event_delta import ChatContentDeltaEventDelta
-from .chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta
-from .chat_tool_call_start_event_delta import ChatToolCallStartEventDelta
-from .chat_tool_call_delta_event_delta import ChatToolCallDeltaEventDelta
-from .citation_start_event_delta import CitationStartEventDelta
-from .chat_message_end_event_delta import ChatMessageEndEventDelta
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class StreamedChatResponse2_MessageStart(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["message-start"] = "message-start"
-    id: typing.Optional[str] = None
-    delta: typing.Optional[ChatMessageStartEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_ContentStart(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["content-start"] = "content-start"
-    index: typing.Optional[int] = None
-    delta: typing.Optional[ChatContentStartEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_ContentDelta(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["content-delta"] = "content-delta"
-    index: typing.Optional[int] = None
-    delta: typing.Optional[ChatContentDeltaEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_ContentEnd(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["content-end"] = "content-end"
-    index: typing.Optional[int] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_ToolPlanDelta(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["tool-plan-delta"] = "tool-plan-delta"
-    delta: typing.Optional[ChatToolPlanDeltaEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_ToolCallStart(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["tool-call-start"] = "tool-call-start"
-    index: typing.Optional[int] = None
-    delta: typing.Optional[ChatToolCallStartEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_ToolCallDelta(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["tool-call-delta"] = "tool-call-delta"
-    index: typing.Optional[int] = None
-    delta: typing.Optional[ChatToolCallDeltaEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_ToolCallEnd(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["tool-call-end"] = "tool-call-end"
-    index: typing.Optional[int] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_CitationStart(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["citation-start"] = "citation-start"
-    index: typing.Optional[int] = None
-    delta: typing.Optional[CitationStartEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_CitationEnd(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["citation-end"] = "citation-end"
-    index: typing.Optional[int] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-class StreamedChatResponse2_MessageEnd(UncheckedBaseModel):
-    """
-    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
-    """
-
-    type: typing.Literal["message-end"] = "message-end"
-    id: typing.Optional[str] = None
-    delta: typing.Optional[ChatMessageEndEventDelta] = None
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-StreamedChatResponse2 = typing_extensions.Annotated[
-    typing.Union[
-        StreamedChatResponse2_MessageStart,
-        StreamedChatResponse2_ContentStart,
-        StreamedChatResponse2_ContentDelta,
-        StreamedChatResponse2_ContentEnd,
-        StreamedChatResponse2_ToolPlanDelta,
-        StreamedChatResponse2_ToolCallStart,
-        StreamedChatResponse2_ToolCallDelta,
-        StreamedChatResponse2_ToolCallEnd,
-        StreamedChatResponse2_CitationStart,
-        StreamedChatResponse2_CitationEnd,
-        StreamedChatResponse2_MessageEnd,
-    ],
-    UnionMetadata(discriminant="type"),
-]
diff --git a/src/cohere/v2/types/system_message_content.py b/src/cohere/v2/types/system_message_content.py
deleted file mode 100644
index d842337e4..000000000
--- a/src/cohere/v2/types/system_message_content.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-from .system_message_content_item import SystemMessageContentItem
-
-SystemMessageContent = typing.Union[str, typing.List[SystemMessageContentItem]]
diff --git a/src/cohere/v2/types/system_message_content_item.py b/src/cohere/v2/types/system_message_content_item.py
deleted file mode 100644
index ed95a8a90..000000000
--- a/src/cohere/v2/types/system_message_content_item.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ...core.unchecked_base_model import UncheckedBaseModel
-import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-import typing_extensions
-from ...core.unchecked_base_model import UnionMetadata
-
-
-class SystemMessageContentItem_Text(UncheckedBaseModel):
-    type: typing.Literal["text"] = "text"
-    text: str
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-SystemMessageContentItem = typing_extensions.Annotated[
-    SystemMessageContentItem_Text, UnionMetadata(discriminant="type")
-]
diff --git a/src/cohere/v2/types/too_many_requests_error_body.py b/src/cohere/v2/types/too_many_requests_error_body.py
new file mode 100644
index 000000000..99f763209
--- /dev/null
+++ b/src/cohere/v2/types/too_many_requests_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class TooManyRequestsErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/unauthorized_error_body.py b/src/cohere/v2/types/unauthorized_error_body.py
new file mode 100644
index 000000000..10e7055d8
--- /dev/null
+++ b/src/cohere/v2/types/unauthorized_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class UnauthorizedErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/unprocessable_entity_error_body.py b/src/cohere/v2/types/unprocessable_entity_error_body.py
new file mode 100644
index 000000000..34c3adea2
--- /dev/null
+++ b/src/cohere/v2/types/unprocessable_entity_error_body.py
@@ -0,0 +1,19 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ...core.unchecked_base_model import UncheckedBaseModel
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class UnprocessableEntityErrorBody(UncheckedBaseModel):
+    data: typing.Optional[str] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/user_message_content.py b/src/cohere/v2/types/user_message_content.py
deleted file mode 100644
index e4e3c7ebb..000000000
--- a/src/cohere/v2/types/user_message_content.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-from .content import Content
-
-UserMessageContent = typing.Union[str, typing.List[Content]]
diff --git a/src/cohere/v2/types/v2chat_request_citation_mode.py b/src/cohere/v2/types/v2chat_request_citation_mode.py
deleted file mode 100644
index 9b5dc7d5a..000000000
--- a/src/cohere/v2/types/v2chat_request_citation_mode.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-V2ChatRequestCitationMode = typing.Union[typing.Literal["FAST", "ACCURATE", "OFF"], typing.Any]
diff --git a/src/cohere/v2/types/v2chat_request_documents_item.py b/src/cohere/v2/types/v2chat_request_documents_item.py
new file mode 100644
index 000000000..ecf50c80b
--- /dev/null
+++ b/src/cohere/v2/types/v2chat_request_documents_item.py
@@ -0,0 +1,6 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from ..v2.types.document import Document
+
+V2ChatRequestDocumentsItem = typing.Union[str, Document]
diff --git a/src/cohere/v2/types/v2chat_stream_request_citation_mode.py b/src/cohere/v2/types/v2chat_stream_request_citation_mode.py
deleted file mode 100644
index 2e07c9ebe..000000000
--- a/src/cohere/v2/types/v2chat_stream_request_citation_mode.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-V2ChatStreamRequestCitationMode = typing.Union[typing.Literal["FAST", "ACCURATE", "OFF"], typing.Any]
diff --git a/src/cohere/v2/types/v2chat_stream_request_documents_item.py b/src/cohere/v2/types/v2chat_stream_request_documents_item.py
new file mode 100644
index 000000000..9968261bb
--- /dev/null
+++ b/src/cohere/v2/types/v2chat_stream_request_documents_item.py
@@ -0,0 +1,6 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from ..v2.types.document import Document
+
+V2ChatStreamRequestDocumentsItem = typing.Union[str, Document]
diff --git a/src/cohere/v2/v2/__init__.py b/src/cohere/v2/v2/__init__.py
new file mode 100644
index 000000000..44f867319
--- /dev/null
+++ b/src/cohere/v2/v2/__init__.py
@@ -0,0 +1,137 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from .types import (
+    AssistantMessage,
+    AssistantMessageContent,
+    AssistantMessageResponse,
+    ChatContentDeltaEvent,
+    ChatContentDeltaEventDelta,
+    ChatContentDeltaEventDeltaMessage,
+    ChatContentDeltaEventDeltaMessageContent,
+    ChatContentEndEvent,
+    ChatContentStartEvent,
+    ChatContentStartEventDelta,
+    ChatContentStartEventDeltaMessage,
+    ChatContentStartEventDeltaMessageContent,
+    ChatFinishReason,
+    ChatMessage,
+    ChatMessageEndEvent,
+    ChatMessageEndEventDelta,
+    ChatMessageStartEvent,
+    ChatMessageStartEventDelta,
+    ChatMessageStartEventDeltaMessage,
+    ChatMessages,
+    ChatStreamEventType,
+    ChatStreamEventTypeType,
+    ChatToolCallDeltaEvent,
+    ChatToolCallDeltaEventDelta,
+    ChatToolCallDeltaEventDeltaToolCall,
+    ChatToolCallDeltaEventDeltaToolCallFunction,
+    ChatToolCallEndEvent,
+    ChatToolCallStartEvent,
+    ChatToolCallStartEventDelta,
+    ChatToolCallStartEventDeltaToolCall,
+    ChatToolCallStartEventDeltaToolCallFunction,
+    ChatToolPlanDeltaEvent,
+    ChatToolPlanDeltaEventDelta,
+    Citation,
+    CitationEndEvent,
+    CitationOptions,
+    CitationOptionsMode,
+    CitationStartEvent,
+    CitationStartEventDelta,
+    CitationStartEventDeltaMessage,
+    Document,
+    DocumentContent,
+    DocumentSource,
+    JsonResponseFormat,
+    NonStreamedChatResponse,
+    ResponseFormat,
+    Source,
+    StreamedChatResponse,
+    SystemMessage,
+    SystemMessageContent,
+    TextContent,
+    TextResponseFormat,
+    Tool,
+    ToolCall,
+    ToolCallFunction,
+    ToolContent,
+    ToolFunction,
+    ToolMessage,
+    ToolMessageToolContent,
+    ToolSource,
+    Usage,
+    UsageBilledUnits,
+    UsageTokens,
+    UserMessage,
+    UserMessageContent,
+)
+
+__all__ = [
+    "AssistantMessage",
+    "AssistantMessageContent",
+    "AssistantMessageResponse",
+    "ChatContentDeltaEvent",
+    "ChatContentDeltaEventDelta",
+    "ChatContentDeltaEventDeltaMessage",
+    "ChatContentDeltaEventDeltaMessageContent",
+    "ChatContentEndEvent",
+    "ChatContentStartEvent",
+    "ChatContentStartEventDelta",
+    "ChatContentStartEventDeltaMessage",
+    "ChatContentStartEventDeltaMessageContent",
+    "ChatFinishReason",
+    "ChatMessage",
+    "ChatMessageEndEvent",
+    "ChatMessageEndEventDelta",
+    "ChatMessageStartEvent",
+    "ChatMessageStartEventDelta",
+    "ChatMessageStartEventDeltaMessage",
+    "ChatMessages",
+    "ChatStreamEventType",
+    "ChatStreamEventTypeType",
+    "ChatToolCallDeltaEvent",
+    "ChatToolCallDeltaEventDelta",
+    "ChatToolCallDeltaEventDeltaToolCall",
+    "ChatToolCallDeltaEventDeltaToolCallFunction",
+    "ChatToolCallEndEvent",
+    "ChatToolCallStartEvent",
+    "ChatToolCallStartEventDelta",
+    "ChatToolCallStartEventDeltaToolCall",
+    "ChatToolCallStartEventDeltaToolCallFunction",
+    "ChatToolPlanDeltaEvent",
+    "ChatToolPlanDeltaEventDelta",
+    "Citation",
+    "CitationEndEvent",
+    "CitationOptions",
+    "CitationOptionsMode",
+    "CitationStartEvent",
+    "CitationStartEventDelta",
+    "CitationStartEventDeltaMessage",
+    "Document",
+    "DocumentContent",
+    "DocumentSource",
+    "JsonResponseFormat",
+    "NonStreamedChatResponse",
+    "ResponseFormat",
+    "Source",
+    "StreamedChatResponse",
+    "SystemMessage",
+    "SystemMessageContent",
+    "TextContent",
+    "TextResponseFormat",
+    "Tool",
+    "ToolCall",
+    "ToolCallFunction",
+    "ToolContent",
+    "ToolFunction",
+    "ToolMessage",
+    "ToolMessageToolContent",
+    "ToolSource",
+    "Usage",
+    "UsageBilledUnits",
+    "UsageTokens",
+    "UserMessage",
+    "UserMessageContent",
+]
diff --git a/src/cohere/v2/v2/types/__init__.py b/src/cohere/v2/v2/types/__init__.py
new file mode 100644
index 000000000..c48c96fcd
--- /dev/null
+++ b/src/cohere/v2/v2/types/__init__.py
@@ -0,0 +1,135 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from .assistant_message import AssistantMessage
+from .assistant_message_content import AssistantMessageContent
+from .assistant_message_response import AssistantMessageResponse
+from .chat_content_delta_event import ChatContentDeltaEvent
+from .chat_content_delta_event_delta import ChatContentDeltaEventDelta
+from .chat_content_delta_event_delta_message import ChatContentDeltaEventDeltaMessage
+from .chat_content_delta_event_delta_message_content import ChatContentDeltaEventDeltaMessageContent
+from .chat_content_end_event import ChatContentEndEvent
+from .chat_content_start_event import ChatContentStartEvent
+from .chat_content_start_event_delta import ChatContentStartEventDelta
+from .chat_content_start_event_delta_message import ChatContentStartEventDeltaMessage
+from .chat_content_start_event_delta_message_content import ChatContentStartEventDeltaMessageContent
+from .chat_finish_reason import ChatFinishReason
+from .chat_message import ChatMessage
+from .chat_message_end_event import ChatMessageEndEvent
+from .chat_message_end_event_delta import ChatMessageEndEventDelta
+from .chat_message_start_event import ChatMessageStartEvent
+from .chat_message_start_event_delta import ChatMessageStartEventDelta
+from .chat_message_start_event_delta_message import ChatMessageStartEventDeltaMessage
+from .chat_messages import ChatMessages
+from .chat_stream_event_type import ChatStreamEventType
+from .chat_stream_event_type_type import ChatStreamEventTypeType
+from .chat_tool_call_delta_event import ChatToolCallDeltaEvent
+from .chat_tool_call_delta_event_delta import ChatToolCallDeltaEventDelta
+from .chat_tool_call_delta_event_delta_tool_call import ChatToolCallDeltaEventDeltaToolCall
+from .chat_tool_call_delta_event_delta_tool_call_function import ChatToolCallDeltaEventDeltaToolCallFunction
+from .chat_tool_call_end_event import ChatToolCallEndEvent
+from .chat_tool_call_start_event import ChatToolCallStartEvent
+from .chat_tool_call_start_event_delta import ChatToolCallStartEventDelta
+from .chat_tool_call_start_event_delta_tool_call import ChatToolCallStartEventDeltaToolCall
+from .chat_tool_call_start_event_delta_tool_call_function import ChatToolCallStartEventDeltaToolCallFunction
+from .chat_tool_plan_delta_event import ChatToolPlanDeltaEvent
+from .chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta
+from .citation import Citation
+from .citation_end_event import CitationEndEvent
+from .citation_options import CitationOptions
+from .citation_options_mode import CitationOptionsMode
+from .citation_start_event import CitationStartEvent
+from .citation_start_event_delta import CitationStartEventDelta
+from .citation_start_event_delta_message import CitationStartEventDeltaMessage
+from .document import Document
+from .document_content import DocumentContent
+from .document_source import DocumentSource
+from .json_response_format import JsonResponseFormat
+from .non_streamed_chat_response import NonStreamedChatResponse
+from .response_format import ResponseFormat
+from .source import Source
+from .streamed_chat_response import StreamedChatResponse
+from .system_message import SystemMessage
+from .system_message_content import SystemMessageContent
+from .text_content import TextContent
+from .text_response_format import TextResponseFormat
+from .tool import Tool
+from .tool_call import ToolCall
+from .tool_call_function import ToolCallFunction
+from .tool_content import ToolContent
+from .tool_function import ToolFunction
+from .tool_message import ToolMessage
+from .tool_message_tool_content import ToolMessageToolContent
+from .tool_source import ToolSource
+from .usage import Usage
+from .usage_billed_units import UsageBilledUnits
+from .usage_tokens import UsageTokens
+from .user_message import UserMessage
+from .user_message_content import UserMessageContent
+
+__all__ = [
+    "AssistantMessage",
+    "AssistantMessageContent",
+    "AssistantMessageResponse",
+    "ChatContentDeltaEvent",
+    "ChatContentDeltaEventDelta",
+    "ChatContentDeltaEventDeltaMessage",
+    "ChatContentDeltaEventDeltaMessageContent",
+    "ChatContentEndEvent",
+    "ChatContentStartEvent",
+    "ChatContentStartEventDelta",
+    "ChatContentStartEventDeltaMessage",
+    "ChatContentStartEventDeltaMessageContent",
+    "ChatFinishReason",
+    "ChatMessage",
+    "ChatMessageEndEvent",
+    "ChatMessageEndEventDelta",
+    "ChatMessageStartEvent",
+    "ChatMessageStartEventDelta",
+    "ChatMessageStartEventDeltaMessage",
+    "ChatMessages",
+    "ChatStreamEventType",
+    "ChatStreamEventTypeType",
+    "ChatToolCallDeltaEvent",
+    "ChatToolCallDeltaEventDelta",
+    "ChatToolCallDeltaEventDeltaToolCall",
+    "ChatToolCallDeltaEventDeltaToolCallFunction",
+    "ChatToolCallEndEvent",
+    "ChatToolCallStartEvent",
+    "ChatToolCallStartEventDelta",
+    "ChatToolCallStartEventDeltaToolCall",
+    "ChatToolCallStartEventDeltaToolCallFunction",
+    "ChatToolPlanDeltaEvent",
+    "ChatToolPlanDeltaEventDelta",
+    "Citation",
+    "CitationEndEvent",
+    "CitationOptions",
+    "CitationOptionsMode",
+    "CitationStartEvent",
+    "CitationStartEventDelta",
+    "CitationStartEventDeltaMessage",
+    "Document",
+    "DocumentContent",
+    "DocumentSource",
+    "JsonResponseFormat",
+    "NonStreamedChatResponse",
+    "ResponseFormat",
+    "Source",
+    "StreamedChatResponse",
+    "SystemMessage",
+    "SystemMessageContent",
+    "TextContent",
+    "TextResponseFormat",
+    "Tool",
+    "ToolCall",
+    "ToolCallFunction",
+    "ToolContent",
+    "ToolFunction",
+    "ToolMessage",
+    "ToolMessageToolContent",
+    "ToolSource",
+    "Usage",
+    "UsageBilledUnits",
+    "UsageTokens",
+    "UserMessage",
+    "UserMessageContent",
+]
diff --git a/src/cohere/v2/types/assistant_message.py b/src/cohere/v2/v2/types/assistant_message.py
similarity index 75%
rename from src/cohere/v2/types/assistant_message.py
rename to src/cohere/v2/v2/types/assistant_message.py
index e299a2158..3f2f30001 100644
--- a/src/cohere/v2/types/assistant_message.py
+++ b/src/cohere/v2/v2/types/assistant_message.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from .tool_call2 import ToolCall2
+from .tool_call import ToolCall
 from .assistant_message_content import AssistantMessageContent
 from .citation import Citation
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
@@ -14,7 +14,8 @@ class AssistantMessage(UncheckedBaseModel):
     A message from the assistant role can contain text and tool call information.
     """
 
-    tool_calls: typing.Optional[typing.List[ToolCall2]] = None
+    role: typing.Literal["assistant"] = "assistant"
+    tool_calls: typing.Optional[typing.List[ToolCall]] = None
     tool_plan: typing.Optional[str] = None
     content: typing.Optional[AssistantMessageContent] = None
     citations: typing.Optional[typing.List[Citation]] = None
diff --git a/src/cohere/v2/v2/types/assistant_message_content.py b/src/cohere/v2/v2/types/assistant_message_content.py
new file mode 100644
index 000000000..ce197254f
--- /dev/null
+++ b/src/cohere/v2/v2/types/assistant_message_content.py
@@ -0,0 +1,6 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .text_content import TextContent
+
+AssistantMessageContent = typing.Union[str, typing.List[TextContent]]
diff --git a/src/cohere/v2/types/assistant_message_response.py b/src/cohere/v2/v2/types/assistant_message_response.py
similarity index 65%
rename from src/cohere/v2/types/assistant_message_response.py
rename to src/cohere/v2/v2/types/assistant_message_response.py
index b0cf9e170..6532bb572 100644
--- a/src/cohere/v2/types/assistant_message_response.py
+++ b/src/cohere/v2/v2/types/assistant_message_response.py
@@ -1,11 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from .tool_call2 import ToolCall2
-from .assistant_message_response_content_item import AssistantMessageResponseContentItem
+from .tool_call import ToolCall
+from .text_content import TextContent
 from .citation import Citation
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
@@ -15,9 +15,9 @@ class AssistantMessageResponse(UncheckedBaseModel):
     """
 
     role: typing.Literal["assistant"] = "assistant"
-    tool_calls: typing.Optional[typing.List[ToolCall2]] = None
+    tool_calls: typing.Optional[typing.List[ToolCall]] = None
     tool_plan: typing.Optional[str] = None
-    content: typing.Optional[typing.List[AssistantMessageResponseContentItem]] = None
+    content: typing.Optional[typing.List[TextContent]] = None
     citations: typing.Optional[typing.List[Citation]] = None
 
     if IS_PYDANTIC_V2:
diff --git a/src/cohere/v2/types/chat_content_delta_event.py b/src/cohere/v2/v2/types/chat_content_delta_event.py
similarity index 93%
rename from src/cohere/v2/types/chat_content_delta_event.py
rename to src/cohere/v2/v2/types/chat_content_delta_event.py
index b33952a06..648eba92f 100644
--- a/src/cohere/v2/types/chat_content_delta_event.py
+++ b/src/cohere/v2/v2/types/chat_content_delta_event.py
@@ -3,7 +3,7 @@
 from .chat_stream_event_type import ChatStreamEventType
 import typing
 from .chat_content_delta_event_delta import ChatContentDeltaEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_delta_event_delta.py b/src/cohere/v2/v2/types/chat_content_delta_event_delta.py
similarity index 83%
rename from src/cohere/v2/types/chat_content_delta_event_delta.py
rename to src/cohere/v2/v2/types/chat_content_delta_event_delta.py
index 8c868844c..ac790b6c0 100644
--- a/src/cohere/v2/types/chat_content_delta_event_delta.py
+++ b/src/cohere/v2/v2/types/chat_content_delta_event_delta.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_content_delta_event_delta_message import ChatContentDeltaEventDeltaMessage
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_delta_event_delta_message.py b/src/cohere/v2/v2/types/chat_content_delta_event_delta_message.py
similarity index 84%
rename from src/cohere/v2/types/chat_content_delta_event_delta_message.py
rename to src/cohere/v2/v2/types/chat_content_delta_event_delta_message.py
index 5bf3a1999..85e435bbe 100644
--- a/src/cohere/v2/types/chat_content_delta_event_delta_message.py
+++ b/src/cohere/v2/v2/types/chat_content_delta_event_delta_message.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_content_delta_event_delta_message_content import ChatContentDeltaEventDeltaMessageContent
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_delta_event_delta_message_content.py b/src/cohere/v2/v2/types/chat_content_delta_event_delta_message_content.py
similarity index 81%
rename from src/cohere/v2/types/chat_content_delta_event_delta_message_content.py
rename to src/cohere/v2/v2/types/chat_content_delta_event_delta_message_content.py
index 9425f36eb..9b7c614fa 100644
--- a/src/cohere/v2/types/chat_content_delta_event_delta_message_content.py
+++ b/src/cohere/v2/v2/types/chat_content_delta_event_delta_message_content.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_end_event.py b/src/cohere/v2/v2/types/chat_content_end_event.py
similarity index 91%
rename from src/cohere/v2/types/chat_content_end_event.py
rename to src/cohere/v2/v2/types/chat_content_end_event.py
index f3337173d..b06e5d406 100644
--- a/src/cohere/v2/types/chat_content_end_event.py
+++ b/src/cohere/v2/v2/types/chat_content_end_event.py
@@ -2,7 +2,7 @@
 
 from .chat_stream_event_type import ChatStreamEventType
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_start_event.py b/src/cohere/v2/v2/types/chat_content_start_event.py
similarity index 93%
rename from src/cohere/v2/types/chat_content_start_event.py
rename to src/cohere/v2/v2/types/chat_content_start_event.py
index df6372716..8ad0a6b8c 100644
--- a/src/cohere/v2/types/chat_content_start_event.py
+++ b/src/cohere/v2/v2/types/chat_content_start_event.py
@@ -3,7 +3,7 @@
 from .chat_stream_event_type import ChatStreamEventType
 import typing
 from .chat_content_start_event_delta import ChatContentStartEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_start_event_delta.py b/src/cohere/v2/v2/types/chat_content_start_event_delta.py
similarity index 83%
rename from src/cohere/v2/types/chat_content_start_event_delta.py
rename to src/cohere/v2/v2/types/chat_content_start_event_delta.py
index 4cd5d191e..0a603152a 100644
--- a/src/cohere/v2/types/chat_content_start_event_delta.py
+++ b/src/cohere/v2/v2/types/chat_content_start_event_delta.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_content_start_event_delta_message import ChatContentStartEventDeltaMessage
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_start_event_delta_message.py b/src/cohere/v2/v2/types/chat_content_start_event_delta_message.py
similarity index 84%
rename from src/cohere/v2/types/chat_content_start_event_delta_message.py
rename to src/cohere/v2/v2/types/chat_content_start_event_delta_message.py
index 6b182da31..720c3593a 100644
--- a/src/cohere/v2/types/chat_content_start_event_delta_message.py
+++ b/src/cohere/v2/v2/types/chat_content_start_event_delta_message.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_content_start_event_delta_message_content import ChatContentStartEventDeltaMessageContent
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_content_start_event_delta_message_content.py b/src/cohere/v2/v2/types/chat_content_start_event_delta_message_content.py
similarity index 82%
rename from src/cohere/v2/types/chat_content_start_event_delta_message_content.py
rename to src/cohere/v2/v2/types/chat_content_start_event_delta_message_content.py
index e55f8de83..d515fa13d 100644
--- a/src/cohere/v2/types/chat_content_start_event_delta_message_content.py
+++ b/src/cohere/v2/v2/types/chat_content_start_event_delta_message_content.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_finish_reason.py b/src/cohere/v2/v2/types/chat_finish_reason.py
similarity index 100%
rename from src/cohere/v2/types/chat_finish_reason.py
rename to src/cohere/v2/v2/types/chat_finish_reason.py
diff --git a/src/cohere/v2/v2/types/chat_message.py b/src/cohere/v2/v2/types/chat_message.py
new file mode 100644
index 000000000..7995443d0
--- /dev/null
+++ b/src/cohere/v2/v2/types/chat_message.py
@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .user_message import UserMessage
+from .assistant_message import AssistantMessage
+from .system_message import SystemMessage
+from .tool_message import ToolMessage
+
+ChatMessage = typing.Union[UserMessage, AssistantMessage, SystemMessage, ToolMessage]
diff --git a/src/cohere/v2/types/chat_message_end_event.py b/src/cohere/v2/v2/types/chat_message_end_event.py
similarity index 93%
rename from src/cohere/v2/types/chat_message_end_event.py
rename to src/cohere/v2/v2/types/chat_message_end_event.py
index f0d038163..ac006a659 100644
--- a/src/cohere/v2/types/chat_message_end_event.py
+++ b/src/cohere/v2/v2/types/chat_message_end_event.py
@@ -3,7 +3,7 @@
 from .chat_stream_event_type import ChatStreamEventType
 import typing
 from .chat_message_end_event_delta import ChatMessageEndEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_message_end_event_delta.py b/src/cohere/v2/v2/types/chat_message_end_event_delta.py
similarity index 84%
rename from src/cohere/v2/types/chat_message_end_event_delta.py
rename to src/cohere/v2/v2/types/chat_message_end_event_delta.py
index ee91b93cb..ea7c9b53a 100644
--- a/src/cohere/v2/types/chat_message_end_event_delta.py
+++ b/src/cohere/v2/v2/types/chat_message_end_event_delta.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_finish_reason import ChatFinishReason
 from .usage import Usage
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_message_start_event.py b/src/cohere/v2/v2/types/chat_message_start_event.py
similarity index 93%
rename from src/cohere/v2/types/chat_message_start_event.py
rename to src/cohere/v2/v2/types/chat_message_start_event.py
index fdfc2a8d2..b546072ee 100644
--- a/src/cohere/v2/types/chat_message_start_event.py
+++ b/src/cohere/v2/v2/types/chat_message_start_event.py
@@ -4,7 +4,7 @@
 import typing
 import pydantic
 from .chat_message_start_event_delta import ChatMessageStartEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatMessageStartEvent(ChatStreamEventType):
diff --git a/src/cohere/v2/types/chat_message_start_event_delta.py b/src/cohere/v2/v2/types/chat_message_start_event_delta.py
similarity index 83%
rename from src/cohere/v2/types/chat_message_start_event_delta.py
rename to src/cohere/v2/v2/types/chat_message_start_event_delta.py
index 1a34986ed..5161a88db 100644
--- a/src/cohere/v2/types/chat_message_start_event_delta.py
+++ b/src/cohere/v2/v2/types/chat_message_start_event_delta.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_message_start_event_delta_message import ChatMessageStartEventDeltaMessage
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_message_start_event_delta_message.py b/src/cohere/v2/v2/types/chat_message_start_event_delta_message.py
similarity index 83%
rename from src/cohere/v2/types/chat_message_start_event_delta_message.py
rename to src/cohere/v2/v2/types/chat_message_start_event_delta_message.py
index 24cedb102..28e387ff7 100644
--- a/src/cohere/v2/types/chat_message_start_event_delta_message.py
+++ b/src/cohere/v2/v2/types/chat_message_start_event_delta_message.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ChatMessageStartEventDeltaMessage(UncheckedBaseModel):
diff --git a/src/cohere/types/embed_job_truncate.py b/src/cohere/v2/v2/types/chat_messages.py
similarity index 50%
rename from src/cohere/types/embed_job_truncate.py
rename to src/cohere/v2/v2/types/chat_messages.py
index ec7fb3769..d91e6ee31 100644
--- a/src/cohere/types/embed_job_truncate.py
+++ b/src/cohere/v2/v2/types/chat_messages.py
@@ -1,5 +1,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing
+from .chat_message import ChatMessage
 
-EmbedJobTruncate = typing.Union[typing.Literal["START", "END"], typing.Any]
+ChatMessages = typing.List[ChatMessage]
diff --git a/src/cohere/types/create_embed_job_response.py b/src/cohere/v2/v2/types/chat_stream_event_type.py
similarity index 58%
rename from src/cohere/types/create_embed_job_response.py
rename to src/cohere/v2/v2/types/chat_stream_event_type.py
index 135c64646..9e544b2c2 100644
--- a/src/cohere/types/create_embed_job_response.py
+++ b/src/cohere/v2/v2/types/chat_stream_event_type.py
@@ -1,19 +1,18 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
+from .chat_stream_event_type_type import ChatStreamEventTypeType
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
-from .api_meta import ApiMeta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
-class CreateEmbedJobResponse(UncheckedBaseModel):
+class ChatStreamEventType(UncheckedBaseModel):
     """
-    Response from creating an embed job.
+    The streamed event types
     """
 
-    job_id: str
-    meta: typing.Optional[ApiMeta] = None
+    type: ChatStreamEventTypeType
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/v2/v2/types/chat_stream_event_type_type.py b/src/cohere/v2/v2/types/chat_stream_event_type_type.py
new file mode 100644
index 000000000..fbdab97f6
--- /dev/null
+++ b/src/cohere/v2/v2/types/chat_stream_event_type_type.py
@@ -0,0 +1,20 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+ChatStreamEventTypeType = typing.Union[
+    typing.Literal[
+        "message-start",
+        "content-start",
+        "content-delta",
+        "content-end",
+        "tool-call-start",
+        "tool-call-delta",
+        "tool-call-end",
+        "tool-plan-delta",
+        "citation-start",
+        "citation-end",
+        "message-end",
+    ],
+    typing.Any,
+]
diff --git a/src/cohere/v2/types/chat_tool_call_delta_event.py b/src/cohere/v2/v2/types/chat_tool_call_delta_event.py
similarity index 93%
rename from src/cohere/v2/types/chat_tool_call_delta_event.py
rename to src/cohere/v2/v2/types/chat_tool_call_delta_event.py
index 84afb7677..a32819ea6 100644
--- a/src/cohere/v2/types/chat_tool_call_delta_event.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_delta_event.py
@@ -3,7 +3,7 @@
 from .chat_stream_event_type import ChatStreamEventType
 import typing
 from .chat_tool_call_delta_event_delta import ChatToolCallDeltaEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_delta_event_delta.py b/src/cohere/v2/v2/types/chat_tool_call_delta_event_delta.py
similarity index 84%
rename from src/cohere/v2/types/chat_tool_call_delta_event_delta.py
rename to src/cohere/v2/v2/types/chat_tool_call_delta_event_delta.py
index 47df23b3c..60a2d5387 100644
--- a/src/cohere/v2/types/chat_tool_call_delta_event_delta.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_delta_event_delta.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_tool_call_delta_event_delta_tool_call import ChatToolCallDeltaEventDeltaToolCall
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_delta_event_delta_tool_call.py b/src/cohere/v2/v2/types/chat_tool_call_delta_event_delta_tool_call.py
similarity index 84%
rename from src/cohere/v2/types/chat_tool_call_delta_event_delta_tool_call.py
rename to src/cohere/v2/v2/types/chat_tool_call_delta_event_delta_tool_call.py
index f53f2a5cb..6c812ba70 100644
--- a/src/cohere/v2/types/chat_tool_call_delta_event_delta_tool_call.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_delta_event_delta_tool_call.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_tool_call_delta_event_delta_tool_call_function import ChatToolCallDeltaEventDeltaToolCallFunction
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_delta_event_delta_tool_call_function.py b/src/cohere/v2/v2/types/chat_tool_call_delta_event_delta_tool_call_function.py
similarity index 81%
rename from src/cohere/v2/types/chat_tool_call_delta_event_delta_tool_call_function.py
rename to src/cohere/v2/v2/types/chat_tool_call_delta_event_delta_tool_call_function.py
index 2ec3c2d69..206a9e7f5 100644
--- a/src/cohere/v2/types/chat_tool_call_delta_event_delta_tool_call_function.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_delta_event_delta_tool_call_function.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_end_event.py b/src/cohere/v2/v2/types/chat_tool_call_end_event.py
similarity index 91%
rename from src/cohere/v2/types/chat_tool_call_end_event.py
rename to src/cohere/v2/v2/types/chat_tool_call_end_event.py
index dfa3bef27..4778b83ab 100644
--- a/src/cohere/v2/types/chat_tool_call_end_event.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_end_event.py
@@ -2,7 +2,7 @@
 
 from .chat_stream_event_type import ChatStreamEventType
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_start_event.py b/src/cohere/v2/v2/types/chat_tool_call_start_event.py
similarity index 93%
rename from src/cohere/v2/types/chat_tool_call_start_event.py
rename to src/cohere/v2/v2/types/chat_tool_call_start_event.py
index e241b3c33..be7d8482a 100644
--- a/src/cohere/v2/types/chat_tool_call_start_event.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_start_event.py
@@ -3,7 +3,7 @@
 from .chat_stream_event_type import ChatStreamEventType
 import typing
 from .chat_tool_call_start_event_delta import ChatToolCallStartEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_start_event_delta.py b/src/cohere/v2/v2/types/chat_tool_call_start_event_delta.py
similarity index 84%
rename from src/cohere/v2/types/chat_tool_call_start_event_delta.py
rename to src/cohere/v2/v2/types/chat_tool_call_start_event_delta.py
index bce86be8a..b855bf139 100644
--- a/src/cohere/v2/types/chat_tool_call_start_event_delta.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_start_event_delta.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_tool_call_start_event_delta_tool_call import ChatToolCallStartEventDeltaToolCall
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_start_event_delta_tool_call.py b/src/cohere/v2/v2/types/chat_tool_call_start_event_delta_tool_call.py
similarity index 86%
rename from src/cohere/v2/types/chat_tool_call_start_event_delta_tool_call.py
rename to src/cohere/v2/v2/types/chat_tool_call_start_event_delta_tool_call.py
index 3d100b023..f95424c90 100644
--- a/src/cohere/v2/types/chat_tool_call_start_event_delta_tool_call.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_start_event_delta_tool_call.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .chat_tool_call_start_event_delta_tool_call_function import ChatToolCallStartEventDeltaToolCallFunction
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_call_start_event_delta_tool_call_function.py b/src/cohere/v2/v2/types/chat_tool_call_start_event_delta_tool_call_function.py
similarity index 82%
rename from src/cohere/v2/types/chat_tool_call_start_event_delta_tool_call_function.py
rename to src/cohere/v2/v2/types/chat_tool_call_start_event_delta_tool_call_function.py
index c4d88cf75..8b6379fa4 100644
--- a/src/cohere/v2/types/chat_tool_call_start_event_delta_tool_call_function.py
+++ b/src/cohere/v2/v2/types/chat_tool_call_start_event_delta_tool_call_function.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_plan_delta_event.py b/src/cohere/v2/v2/types/chat_tool_plan_delta_event.py
similarity index 92%
rename from src/cohere/v2/types/chat_tool_plan_delta_event.py
rename to src/cohere/v2/v2/types/chat_tool_plan_delta_event.py
index dee9a0ec8..a667e548c 100644
--- a/src/cohere/v2/types/chat_tool_plan_delta_event.py
+++ b/src/cohere/v2/v2/types/chat_tool_plan_delta_event.py
@@ -3,7 +3,7 @@
 from .chat_stream_event_type import ChatStreamEventType
 import typing
 from .chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/chat_tool_plan_delta_event_delta.py b/src/cohere/v2/v2/types/chat_tool_plan_delta_event_delta.py
similarity index 80%
rename from src/cohere/v2/types/chat_tool_plan_delta_event_delta.py
rename to src/cohere/v2/v2/types/chat_tool_plan_delta_event_delta.py
index 1aa872220..84b830882 100644
--- a/src/cohere/v2/types/chat_tool_plan_delta_event_delta.py
+++ b/src/cohere/v2/v2/types/chat_tool_plan_delta_event_delta.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/citation.py b/src/cohere/v2/v2/types/citation.py
similarity index 85%
rename from src/cohere/v2/types/citation.py
rename to src/cohere/v2/v2/types/citation.py
index 140f28677..5cde1738e 100644
--- a/src/cohere/v2/types/citation.py
+++ b/src/cohere/v2/v2/types/citation.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .source import Source
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/citation_end_event.py b/src/cohere/v2/v2/types/citation_end_event.py
similarity index 91%
rename from src/cohere/v2/types/citation_end_event.py
rename to src/cohere/v2/v2/types/citation_end_event.py
index 9c9fd5c47..bec92090c 100644
--- a/src/cohere/v2/types/citation_end_event.py
+++ b/src/cohere/v2/v2/types/citation_end_event.py
@@ -2,7 +2,7 @@
 
 from .chat_stream_event_type import ChatStreamEventType
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/v2/types/citation_options.py b/src/cohere/v2/v2/types/citation_options.py
new file mode 100644
index 000000000..0eefe682b
--- /dev/null
+++ b/src/cohere/v2/v2/types/citation_options.py
@@ -0,0 +1,28 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ....core.unchecked_base_model import UncheckedBaseModel
+import typing
+from .citation_options_mode import CitationOptionsMode
+import pydantic
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
+
+
+class CitationOptions(UncheckedBaseModel):
+    """
+    Options for controlling citation generation.
+    """
+
+    mode: typing.Optional[CitationOptionsMode] = pydantic.Field(default=None)
+    """
+    Defaults to `"accurate"`.
+    Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/v2/types/citation_options_mode.py b/src/cohere/v2/v2/types/citation_options_mode.py
new file mode 100644
index 000000000..ddfdf67f2
--- /dev/null
+++ b/src/cohere/v2/v2/types/citation_options_mode.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+CitationOptionsMode = typing.Union[typing.Literal["FAST", "ACCURATE", "OFF"], typing.Any]
diff --git a/src/cohere/v2/types/citation_start_event.py b/src/cohere/v2/v2/types/citation_start_event.py
similarity index 93%
rename from src/cohere/v2/types/citation_start_event.py
rename to src/cohere/v2/v2/types/citation_start_event.py
index 2052f5ebf..96497c344 100644
--- a/src/cohere/v2/types/citation_start_event.py
+++ b/src/cohere/v2/v2/types/citation_start_event.py
@@ -3,7 +3,7 @@
 from .chat_stream_event_type import ChatStreamEventType
 import typing
 from .citation_start_event_delta import CitationStartEventDelta
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/citation_start_event_delta.py b/src/cohere/v2/v2/types/citation_start_event_delta.py
similarity index 83%
rename from src/cohere/v2/types/citation_start_event_delta.py
rename to src/cohere/v2/v2/types/citation_start_event_delta.py
index f59e78003..afeeb1e04 100644
--- a/src/cohere/v2/types/citation_start_event_delta.py
+++ b/src/cohere/v2/v2/types/citation_start_event_delta.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .citation_start_event_delta_message import CitationStartEventDeltaMessage
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/citation_start_event_delta_message.py b/src/cohere/v2/v2/types/citation_start_event_delta_message.py
similarity index 81%
rename from src/cohere/v2/types/citation_start_event_delta_message.py
rename to src/cohere/v2/v2/types/citation_start_event_delta_message.py
index b0741909c..2ceb72e01 100644
--- a/src/cohere/v2/types/citation_start_event_delta_message.py
+++ b/src/cohere/v2/v2/types/citation_start_event_delta_message.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .citation import Citation
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/v2/types/document.py b/src/cohere/v2/v2/types/document.py
new file mode 100644
index 000000000..527fe8fe7
--- /dev/null
+++ b/src/cohere/v2/v2/types/document.py
@@ -0,0 +1,33 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ....core.unchecked_base_model import UncheckedBaseModel
+import typing
+import pydantic
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
+
+
+class Document(UncheckedBaseModel):
+    """
+    Relevant information that could be used by the model to generate a more accurate reply.
+    The content of each document are generally short (should be under 300 words). Metadata should be used to provide additional information, both the key name and the value will be
+    passed to the model.
+    """
+
+    data: typing.Dict[str, str] = pydantic.Field()
+    """
+    A relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
+    """
+
+    id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Unique identifier for this document which will be referenced in citations. If not provided an ID will be automatically generated
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/v2/types/document_content.py b/src/cohere/v2/v2/types/document_content.py
new file mode 100644
index 000000000..d9a8c8558
--- /dev/null
+++ b/src/cohere/v2/v2/types/document_content.py
@@ -0,0 +1,25 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ....core.unchecked_base_model import UncheckedBaseModel
+import typing
+from .document import Document
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+
+
+class DocumentContent(UncheckedBaseModel):
+    """
+    Document content.
+    """
+
+    type: typing.Literal["document"] = "document"
+    document: Document
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/document_source.py b/src/cohere/v2/v2/types/document_source.py
similarity index 86%
rename from src/cohere/v2/types/document_source.py
rename to src/cohere/v2/v2/types/document_source.py
index a04be859e..6cba6c9a8 100644
--- a/src/cohere/v2/types/document_source.py
+++ b/src/cohere/v2/v2/types/document_source.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class DocumentSource(UncheckedBaseModel):
diff --git a/src/cohere/v2/types/json_response_format2.py b/src/cohere/v2/v2/types/json_response_format.py
similarity index 82%
rename from src/cohere/v2/types/json_response_format2.py
rename to src/cohere/v2/v2/types/json_response_format.py
index c0a45d1ee..f663294a1 100644
--- a/src/cohere/v2/types/json_response_format2.py
+++ b/src/cohere/v2/v2/types/json_response_format.py
@@ -1,12 +1,13 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
-class JsonResponseFormat2(UncheckedBaseModel):
+class JsonResponseFormat(UncheckedBaseModel):
+    type: typing.Literal["json_object"] = "json_object"
     json_schema: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = pydantic.Field(default=None)
     """
     [BETA] A JSON schema object that the output will adhere to. There are some restrictions we have on the schema, refer to [our guide](/docs/structured-outputs-json#schema-constraints) for more information.
diff --git a/src/cohere/v2/types/non_streamed_chat_response2.py b/src/cohere/v2/v2/types/non_streamed_chat_response.py
similarity index 85%
rename from src/cohere/v2/types/non_streamed_chat_response2.py
rename to src/cohere/v2/v2/types/non_streamed_chat_response.py
index a0f2a8db6..2197da4c4 100644
--- a/src/cohere/v2/types/non_streamed_chat_response2.py
+++ b/src/cohere/v2/v2/types/non_streamed_chat_response.py
@@ -1,15 +1,15 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import pydantic
 from .chat_finish_reason import ChatFinishReason
 import typing
 from .assistant_message_response import AssistantMessageResponse
 from .usage import Usage
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
-class NonStreamedChatResponse2(UncheckedBaseModel):
+class NonStreamedChatResponse(UncheckedBaseModel):
     id: str = pydantic.Field()
     """
     Unique identifier for the generated reply. Useful for submitting feedback.
diff --git a/src/cohere/v2/v2/types/response_format.py b/src/cohere/v2/v2/types/response_format.py
new file mode 100644
index 000000000..e56c42c1f
--- /dev/null
+++ b/src/cohere/v2/v2/types/response_format.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .text_response_format import TextResponseFormat
+from .json_response_format import JsonResponseFormat
+
+ResponseFormat = typing.Union[TextResponseFormat, JsonResponseFormat]
diff --git a/src/cohere/v2/v2/types/source.py b/src/cohere/v2/v2/types/source.py
new file mode 100644
index 000000000..68af9c6b9
--- /dev/null
+++ b/src/cohere/v2/v2/types/source.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .tool_source import ToolSource
+from .document_source import DocumentSource
+
+Source = typing.Union[ToolSource, DocumentSource]
diff --git a/src/cohere/v2/v2/types/streamed_chat_response.py b/src/cohere/v2/v2/types/streamed_chat_response.py
new file mode 100644
index 000000000..fd4a37396
--- /dev/null
+++ b/src/cohere/v2/v2/types/streamed_chat_response.py
@@ -0,0 +1,28 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .chat_message_start_event import ChatMessageStartEvent
+from .chat_content_start_event import ChatContentStartEvent
+from .chat_content_delta_event import ChatContentDeltaEvent
+from .chat_content_end_event import ChatContentEndEvent
+from .chat_tool_plan_delta_event import ChatToolPlanDeltaEvent
+from .chat_tool_call_start_event import ChatToolCallStartEvent
+from .chat_tool_call_delta_event import ChatToolCallDeltaEvent
+from .chat_tool_call_end_event import ChatToolCallEndEvent
+from .citation_start_event import CitationStartEvent
+from .citation_end_event import CitationEndEvent
+from .chat_message_end_event import ChatMessageEndEvent
+
+StreamedChatResponse = typing.Union[
+    ChatMessageStartEvent,
+    ChatContentStartEvent,
+    ChatContentDeltaEvent,
+    ChatContentEndEvent,
+    ChatToolPlanDeltaEvent,
+    ChatToolCallStartEvent,
+    ChatToolCallDeltaEvent,
+    ChatToolCallEndEvent,
+    CitationStartEvent,
+    CitationEndEvent,
+    ChatMessageEndEvent,
+]
diff --git a/src/cohere/v2/types/system_message.py b/src/cohere/v2/v2/types/system_message.py
similarity index 77%
rename from src/cohere/v2/types/system_message.py
rename to src/cohere/v2/v2/types/system_message.py
index 23fce20ee..7e51a2c5f 100644
--- a/src/cohere/v2/types/system_message.py
+++ b/src/cohere/v2/v2/types/system_message.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
-from .system_message_content import SystemMessageContent
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
+from .system_message_content import SystemMessageContent
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
@@ -12,6 +12,7 @@ class SystemMessage(UncheckedBaseModel):
     A message from the system.
     """
 
+    role: typing.Literal["system"] = "system"
     content: SystemMessageContent
 
     if IS_PYDANTIC_V2:
diff --git a/src/cohere/v2/v2/types/system_message_content.py b/src/cohere/v2/v2/types/system_message_content.py
new file mode 100644
index 000000000..e92f4230a
--- /dev/null
+++ b/src/cohere/v2/v2/types/system_message_content.py
@@ -0,0 +1,6 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .text_content import TextContent
+
+SystemMessageContent = typing.Union[str, typing.List[TextContent]]
diff --git a/src/cohere/v2/types/text_content.py b/src/cohere/v2/v2/types/text_content.py
similarity index 75%
rename from src/cohere/v2/types/text_content.py
rename to src/cohere/v2/v2/types/text_content.py
index 4e3ade261..7df9c6079 100644
--- a/src/cohere/v2/types/text_content.py
+++ b/src/cohere/v2/v2/types/text_content.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
@@ -11,6 +11,7 @@ class TextContent(UncheckedBaseModel):
     Text content of the message.
     """
 
+    type: typing.Literal["text"] = "text"
     text: str
 
     if IS_PYDANTIC_V2:
diff --git a/src/cohere/types/text_response_format.py b/src/cohere/v2/v2/types/text_response_format.py
similarity index 73%
rename from src/cohere/types/text_response_format.py
rename to src/cohere/v2/v2/types/text_response_format.py
index a7bad64f0..6abbd5700 100644
--- a/src/cohere/types/text_response_format.py
+++ b/src/cohere/v2/v2/types/text_response_format.py
@@ -1,12 +1,14 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
 class TextResponseFormat(UncheckedBaseModel):
+    type: typing.Literal["text"] = "text"
+
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:
diff --git a/src/cohere/v2/types/tool2.py b/src/cohere/v2/v2/types/tool.py
similarity index 65%
rename from src/cohere/v2/types/tool2.py
rename to src/cohere/v2/v2/types/tool.py
index 9891642b8..6f67349ad 100644
--- a/src/cohere/v2/types/tool2.py
+++ b/src/cohere/v2/v2/types/tool.py
@@ -1,15 +1,15 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from .tool2function import Tool2Function
+from .tool_function import ToolFunction
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
-class Tool2(UncheckedBaseModel):
+class Tool(UncheckedBaseModel):
     type: typing.Optional[typing.Literal["function"]] = None
-    function: typing.Optional[Tool2Function] = pydantic.Field(default=None)
+    function: typing.Optional[ToolFunction] = pydantic.Field(default=None)
     """
     The function to be executed.
     """
diff --git a/src/cohere/v2/types/tool_call2.py b/src/cohere/v2/v2/types/tool_call.py
similarity index 67%
rename from src/cohere/v2/types/tool_call2.py
rename to src/cohere/v2/v2/types/tool_call.py
index 2f3e0cd5a..94f2bcf63 100644
--- a/src/cohere/v2/types/tool_call2.py
+++ b/src/cohere/v2/v2/types/tool_call.py
@@ -1,20 +1,20 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from .tool_call2function import ToolCall2Function
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from .tool_call_function import ToolCallFunction
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
-class ToolCall2(UncheckedBaseModel):
+class ToolCall(UncheckedBaseModel):
     """
     A array of tool calls to be made.
     """
 
     id: typing.Optional[str] = None
     type: typing.Optional[typing.Literal["function"]] = None
-    function: typing.Optional[ToolCall2Function] = None
+    function: typing.Optional[ToolCallFunction] = None
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/types/get_connector_response.py b/src/cohere/v2/v2/types/tool_call_function.py
similarity index 61%
rename from src/cohere/types/get_connector_response.py
rename to src/cohere/v2/v2/types/tool_call_function.py
index 66c3b3a1e..56bc402ad 100644
--- a/src/cohere/types/get_connector_response.py
+++ b/src/cohere/v2/v2/types/tool_call_function.py
@@ -1,14 +1,14 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ..core.unchecked_base_model import UncheckedBaseModel
-from .connector import Connector
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
-class GetConnectorResponse(UncheckedBaseModel):
-    connector: Connector
+class ToolCallFunction(UncheckedBaseModel):
+    name: typing.Optional[str] = None
+    arguments: typing.Optional[str] = None
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/v2/v2/types/tool_content.py b/src/cohere/v2/v2/types/tool_content.py
new file mode 100644
index 000000000..f3a96dd1b
--- /dev/null
+++ b/src/cohere/v2/v2/types/tool_content.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .text_content import TextContent
+from .document_content import DocumentContent
+
+ToolContent = typing.Union[TextContent, DocumentContent]
diff --git a/src/cohere/v2/types/tool2function.py b/src/cohere/v2/v2/types/tool_function.py
similarity index 84%
rename from src/cohere/v2/types/tool2function.py
rename to src/cohere/v2/v2/types/tool_function.py
index cdc535826..7f39e492c 100644
--- a/src/cohere/v2/types/tool2function.py
+++ b/src/cohere/v2/v2/types/tool_function.py
@@ -1,12 +1,12 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
-class Tool2Function(UncheckedBaseModel):
+class ToolFunction(UncheckedBaseModel):
     """
     The function to be executed.
     """
diff --git a/src/cohere/v2/types/tool_message2.py b/src/cohere/v2/v2/types/tool_message.py
similarity index 56%
rename from src/cohere/v2/types/tool_message2.py
rename to src/cohere/v2/v2/types/tool_message.py
index 515226d72..1aaf0e37b 100644
--- a/src/cohere/v2/types/tool_message2.py
+++ b/src/cohere/v2/v2/types/tool_message.py
@@ -1,24 +1,26 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
-import pydantic
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+from .tool_message_tool_content import ToolMessageToolContent
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
-class ToolMessage2(UncheckedBaseModel):
+class ToolMessage(UncheckedBaseModel):
     """
     A message from the system.
     """
 
+    role: typing.Literal["tool"] = "tool"
     tool_call_id: str = pydantic.Field()
     """
     The id of the associated tool call that has provided the given content
     """
 
-    tool_content: typing.List[str] = pydantic.Field()
+    tool_content: ToolMessageToolContent = pydantic.Field()
     """
-    A list of outputs from a tool. The content should formatted as a JSON object string
+    A single or list of outputs from a tool. The content should formatted as a JSON object string, or a list of tool content blocks
     """
 
     if IS_PYDANTIC_V2:
diff --git a/src/cohere/v2/v2/types/tool_message_tool_content.py b/src/cohere/v2/v2/types/tool_message_tool_content.py
new file mode 100644
index 000000000..23b659ad6
--- /dev/null
+++ b/src/cohere/v2/v2/types/tool_message_tool_content.py
@@ -0,0 +1,6 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .tool_content import ToolContent
+
+ToolMessageToolContent = typing.Union[str, typing.List[ToolContent]]
diff --git a/src/cohere/v2/types/tool_source.py b/src/cohere/v2/v2/types/tool_source.py
similarity index 84%
rename from src/cohere/v2/types/tool_source.py
rename to src/cohere/v2/v2/types/tool_source.py
index 772229531..410aa7514 100644
--- a/src/cohere/v2/types/tool_source.py
+++ b/src/cohere/v2/v2/types/tool_source.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class ToolSource(UncheckedBaseModel):
diff --git a/src/cohere/v2/types/usage.py b/src/cohere/v2/v2/types/usage.py
similarity index 84%
rename from src/cohere/v2/types/usage.py
rename to src/cohere/v2/v2/types/usage.py
index e99efbdfa..4dfd766ae 100644
--- a/src/cohere/v2/types/usage.py
+++ b/src/cohere/v2/v2/types/usage.py
@@ -1,10 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .usage_billed_units import UsageBilledUnits
 from .usage_tokens import UsageTokens
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
diff --git a/src/cohere/v2/types/usage_billed_units.py b/src/cohere/v2/v2/types/usage_billed_units.py
similarity index 89%
rename from src/cohere/v2/types/usage_billed_units.py
rename to src/cohere/v2/v2/types/usage_billed_units.py
index abe2925b9..ecaacf222 100644
--- a/src/cohere/v2/types/usage_billed_units.py
+++ b/src/cohere/v2/v2/types/usage_billed_units.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class UsageBilledUnits(UncheckedBaseModel):
diff --git a/src/cohere/v2/types/usage_tokens.py b/src/cohere/v2/v2/types/usage_tokens.py
similarity index 85%
rename from src/cohere/v2/types/usage_tokens.py
rename to src/cohere/v2/v2/types/usage_tokens.py
index f10e583b6..e54837486 100644
--- a/src/cohere/v2/types/usage_tokens.py
+++ b/src/cohere/v2/v2/types/usage_tokens.py
@@ -1,9 +1,9 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
 import typing
 import pydantic
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class UsageTokens(UncheckedBaseModel):
diff --git a/src/cohere/v2/types/user_message.py b/src/cohere/v2/v2/types/user_message.py
similarity index 69%
rename from src/cohere/v2/types/user_message.py
rename to src/cohere/v2/v2/types/user_message.py
index f73c47177..41df98da0 100644
--- a/src/cohere/v2/types/user_message.py
+++ b/src/cohere/v2/v2/types/user_message.py
@@ -1,11 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
 
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ....core.unchecked_base_model import UncheckedBaseModel
+import typing
 from .user_message_content import UserMessageContent
 import pydantic
-import typing
-from ...types.chat_document import ChatDocument
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ....core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class UserMessage(UncheckedBaseModel):
@@ -13,17 +12,13 @@ class UserMessage(UncheckedBaseModel):
     A message from the user.
     """
 
+    role: typing.Literal["user"] = "user"
     content: UserMessageContent = pydantic.Field()
     """
     The content of the message. This can be a string or a list of content blocks.
     If a string is provided, it will be treated as a text content block.
     """
 
-    documents: typing.Optional[typing.List[ChatDocument]] = pydantic.Field(default=None)
-    """
-    Documents seen by the model when generating the reply.
-    """
-
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:
diff --git a/src/cohere/v2/v2/types/user_message_content.py b/src/cohere/v2/v2/types/user_message_content.py
new file mode 100644
index 000000000..11dc17813
--- /dev/null
+++ b/src/cohere/v2/v2/types/user_message_content.py
@@ -0,0 +1,6 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from ....types.components_schemas_text_content import ComponentsSchemasTextContent
+
+UserMessageContent = typing.Union[str, typing.List[ComponentsSchemasTextContent]]