From 09d77ef359ee66b481dd7a63dd31357344a591eb Mon Sep 17 00:00:00 2001
From: fern-api <115122769+fern-api[bot]@users.noreply.github.com>
Date: Fri, 30 Aug 2024 11:47:39 +0000
Subject: [PATCH] SDK regeneration

---
 poetry.lock                                   |  53 +--
 pyproject.toml                                |   2 +-
 reference.md                                  | 411 ++++++++++--------
 src/cohere/__init__.py                        |  53 ++-
 src/cohere/base_client.py                     | 358 ++++++++-------
 src/cohere/core/client_wrapper.py             |   2 +-
 src/cohere/types/__init__.py                  |  18 +
 .../chat_request_connectors_search_options.py |   1 +
 src/cohere/types/chat_request_safety_mode.py  |   5 +
 ...tream_request_connectors_search_options.py |   1 +
 .../types/chat_stream_request_safety_mode.py  |   5 +
 .../citation_end_event.py}                    |  11 +-
 src/cohere/types/citation_start_event.py      |  27 ++
 .../types/citation_start_event_delta.py       |  22 +
 .../citation_start_event_delta_message.py     |  22 +
 src/cohere/types/json_response_format.py      |   2 +-
 src/cohere/types/json_response_format2.py     |  38 ++
 src/cohere/types/response_format.py           |   4 +-
 src/cohere/types/response_format2.py          |  65 +++
 src/cohere/v2/__init__.py                     |  32 +-
 src/cohere/v2/client.py                       | 365 ++++++++--------
 src/cohere/v2/types/__init__.py               |  34 +-
 src/cohere/v2/types/assistant_message.py      |   4 +-
 .../v2/types/assistant_message_content.py     |   7 +
 .../types/assistant_message_content_item.py   |  30 ++
 .../v2/types/assistant_message_response.py    |  32 ++
 ...assistant_message_response_content_item.py |  30 ++
 src/cohere/v2/types/chat_message2.py          |  30 +-
 src/cohere/v2/types/citation.py               |   4 +-
 src/cohere/v2/types/content.py                |  21 +-
 .../v2/types/non_streamed_chat_response2.py   |   4 +-
 .../v2/types/streamed_chat_response2.py       |  40 ++
 src/cohere/v2/types/tool2.py                  |   5 +-
 src/cohere/v2/types/tool2function.py          |  16 +-
 src/cohere/v2/types/user_message.py           |  12 +-
 .../v2/types/v2chat_request_citation_mode.py  |   2 +-
 .../types/v2chat_request_response_format.py   |  29 --
 .../v2/types/v2chat_request_tool_choice.py    |   5 -
 .../types/v2chat_request_truncation_mode.py   |   5 -
 .../v2chat_stream_request_citation_mode.py    |   2 +-
 .../v2chat_stream_request_response_format.py  |  29 --
 .../v2chat_stream_request_tool_choice.py      |   5 -
 .../v2chat_stream_request_truncation_mode.py  |   5 -
 43 files changed, 1121 insertions(+), 727 deletions(-)
 create mode 100644 src/cohere/types/chat_request_safety_mode.py
 create mode 100644 src/cohere/types/chat_stream_request_safety_mode.py
 rename src/cohere/{v2/types/document_content.py => types/citation_end_event.py} (58%)
 create mode 100644 src/cohere/types/citation_start_event.py
 create mode 100644 src/cohere/types/citation_start_event_delta.py
 create mode 100644 src/cohere/types/citation_start_event_delta_message.py
 create mode 100644 src/cohere/types/json_response_format2.py
 create mode 100644 src/cohere/types/response_format2.py
 create mode 100644 src/cohere/v2/types/assistant_message_content.py
 create mode 100644 src/cohere/v2/types/assistant_message_content_item.py
 create mode 100644 src/cohere/v2/types/assistant_message_response.py
 create mode 100644 src/cohere/v2/types/assistant_message_response_content_item.py
 delete mode 100644 src/cohere/v2/types/v2chat_request_response_format.py
 delete mode 100644 src/cohere/v2/types/v2chat_request_tool_choice.py
 delete mode 100644 src/cohere/v2/types/v2chat_request_truncation_mode.py
 delete mode 100644 src/cohere/v2/types/v2chat_stream_request_response_format.py
 delete mode 100644 src/cohere/v2/types/v2chat_stream_request_tool_choice.py
 delete mode 100644 src/cohere/v2/types/v2chat_stream_request_truncation_mode.py

diff --git a/poetry.lock b/poetry.lock
index 74ed3aac1..8acffdb8e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -38,17 +38,17 @@ trio = ["trio (>=0.23)"]
 
 [[package]]
 name = "boto3"
-version = "1.34.161"
+version = "1.35.9"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.34.161-py3-none-any.whl", hash = "sha256:4ef285334a0edc3047e27a04caf00f7742e32c0f03a361101e768014ac5709dd"},
-    {file = "boto3-1.34.161.tar.gz", hash = "sha256:a872d8fdb3203c1eb0b12fa9e9d879e6f7fd02983a485f02189e6d5914ccd834"},
+    {file = "boto3-1.35.9-py3-none-any.whl", hash = "sha256:1ee9c52d83e8f4902300e985d62688cf31ca8fc47a80534b4295350ebc418e46"},
+    {file = "boto3-1.35.9.tar.gz", hash = "sha256:07e0f335d801765999da67325455ea8219c1a6d7f06bdaad0975ee505276bcbe"},
 ]
 
 [package.dependencies]
-botocore = ">=1.34.161,<1.35.0"
+botocore = ">=1.35.9,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -57,13 +57,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.34.161"
+version = "1.35.9"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.34.161-py3-none-any.whl", hash = "sha256:6c606d2da6f62fde06880aff1190566af208875c29938b6b68741e607817975a"},
-    {file = "botocore-1.34.161.tar.gz", hash = "sha256:16381bfb786142099abf170ce734b95a402a3a7f8e4016358712ac333c5568b2"},
+    {file = "botocore-1.35.9-py3-none-any.whl", hash = "sha256:92962460e4f35d139a23bca28149722030143257ee2916de442243c2464a7434"},
+    {file = "botocore-1.35.9.tar.gz", hash = "sha256:9e44572fd2401b89dd58bf8b71ac2c36d5b0437f8cbf40de83302c499965fb54"},
 ]
 
 [package.dependencies]
@@ -79,13 +79,13 @@ crt = ["awscrt (==0.21.2)"]
 
 [[package]]
 name = "certifi"
-version = "2024.7.4"
+version = "2024.8.30"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
-    {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
+    {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
+    {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
 ]
 
 [[package]]
@@ -347,13 +347,13 @@ trio = ["trio (>=0.22.0,<0.26.0)"]
 
 [[package]]
 name = "httpx"
-version = "0.27.0"
+version = "0.27.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
-    {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
+    {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
+    {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
 ]
 
 [package.dependencies]
@@ -368,6 +368,7 @@ brotli = ["brotli", "brotlicffi"]
 cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
+zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "httpx-sse"
@@ -382,13 +383,13 @@ files = [
 
 [[package]]
 name = "huggingface-hub"
-version = "0.24.5"
+version = "0.24.6"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.24.5-py3-none-any.whl", hash = "sha256:d93fb63b1f1a919a22ce91a14518974e81fc4610bf344dfe7572343ce8d3aced"},
-    {file = "huggingface_hub-0.24.5.tar.gz", hash = "sha256:7b45d6744dd53ce9cbf9880957de00e9d10a9ae837f1c9b7255fc8fa4e8264f3"},
+    {file = "huggingface_hub-0.24.6-py3-none-any.whl", hash = "sha256:a990f3232aa985fe749bc9474060cbad75e8b2f115f6665a9fda5b9c97818970"},
+    {file = "huggingface_hub-0.24.6.tar.gz", hash = "sha256:cc2579e761d070713eaa9c323e3debe39d5b464ae3a7261c39a9195b27bb8000"},
 ]
 
 [package.dependencies]
@@ -416,13 +417,13 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t
 
 [[package]]
 name = "idna"
-version = "3.7"
+version = "3.8"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.6"
 files = [
-    {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
-    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
+    {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"},
+    {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"},
 ]
 
 [[package]]
@@ -993,13 +994,13 @@ telegram = ["requests"]
 
 [[package]]
 name = "types-python-dateutil"
-version = "2.9.0.20240316"
+version = "2.9.0.20240821"
 description = "Typing stubs for python-dateutil"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-python-dateutil-2.9.0.20240316.tar.gz", hash = "sha256:5d2f2e240b86905e40944dd787db6da9263f0deabef1076ddaed797351ec0202"},
-    {file = "types_python_dateutil-2.9.0.20240316-py3-none-any.whl", hash = "sha256:6b8cb66d960771ce5ff974e9dd45e38facb81718cc1e208b10b1baccbfdbee3b"},
+    {file = "types-python-dateutil-2.9.0.20240821.tar.gz", hash = "sha256:9649d1dcb6fef1046fb18bebe9ea2aa0028b160918518c34589a46045f6ebd98"},
+    {file = "types_python_dateutil-2.9.0.20240821-py3-none-any.whl", hash = "sha256:f5889fcb4e63ed4aaa379b44f93c32593d50b9a94c9a60a0c854d8cc3511cd57"},
 ]
 
 [[package]]
@@ -1054,13 +1055,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "1.26.19"
+version = "1.26.20"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
-    {file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"},
-    {file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"},
+    {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
+    {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"},
 ]
 
 [package.extras]
diff --git a/pyproject.toml b/pyproject.toml
index 341641830..dfffa3b8a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cohere"
-version = "5.8.1"
+version = "5.9.0"
 description = ""
 readme = "README.md"
 authors = []
diff --git a/reference.md b/reference.md
index 9659f1120..ce8e683f6 100644
--- a/reference.md
+++ b/reference.md
@@ -109,6 +109,7 @@ response = client.chat_stream(
     ],
     force_single_step=True,
     response_format=ResponseFormat_Text(),
+    safety_mode="CONTEXTUAL",
 )
 for chunk in response:
     yield chunk
@@ -130,6 +131,7 @@ for chunk in response:
 **message:** `str` 
 
 Text input for the model to respond to.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -141,9 +143,10 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 **model:** `typing.Optional[str]` 
 
-Defaults to `command-r-plus`.
+Defaults to `command-r-plus-08-2024`.
 
 The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
 Compatible Deployments: Cohere Platform, Private Deployments
 
     
@@ -158,6 +161,7 @@ Compatible Deployments: Cohere Platform, Private Deployments
 When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
 
 The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -174,6 +178,7 @@ A list of previous messages between the user and the model, giving the model con
 Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
 
 The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -188,6 +193,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 An alternative to `chat_history`.
 
 Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
 Compatible Deployments: Cohere Platform
 
     
@@ -208,7 +214,10 @@ With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `d
 With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
 
 With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+Compatible Deployments: 
+ - AUTO: Cohere Platform Only
+ - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -222,6 +231,7 @@ Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sag
 Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
 
 When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
 Compatible Deployments: Cohere Platform
 
     
@@ -236,6 +246,7 @@ Compatible Deployments: Cohere Platform
 Defaults to `false`.
 
 When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -250,10 +261,12 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
 
 Example:
-`[
+```
+[
   { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
   { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-]`
+]
+```
 
 Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
 
@@ -264,6 +277,7 @@ An `id` field (string) can be optionally supplied to identify the document in th
 An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
 
 See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -278,6 +292,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 Defaults to `"accurate"`.
 
 Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -294,6 +309,7 @@ Defaults to `0.3`.
 A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
 
 Randomness can be further maximized by increasing the  value of the `p` parameter.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -306,6 +322,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 **max_tokens:** `typing.Optional[int]` 
 
 The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -320,6 +337,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
 Input will be truncated according to the `prompt_truncation` parameter.
+
 Compatible Deployments: Cohere Platform
 
     
@@ -333,6 +351,7 @@ Compatible Deployments: Cohere Platform
 
 Ensures only the top `k` most likely tokens are considered for generation at each step.
 Defaults to `0`, min value of `0`, max value of `500`.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -346,6 +365,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
 Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -361,6 +381,7 @@ If specified, the backend will make a best effort to sample tokens
 deterministically, such that repeated requests with the same
 seed and parameters should return the same result. However,
 determinism cannot be totally guaranteed.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -373,6 +394,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 **stop_sequences:** `typing.Optional[typing.Sequence[str]]` 
 
 A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -387,6 +409,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
 Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -401,6 +424,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
 Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -414,6 +438,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 When enabled, the user's prompt will be sent to the model without
 any pre-processing.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -436,6 +461,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
 When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -468,6 +494,7 @@ tool_results = [
 ]
 ```
 **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -487,6 +514,24 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 **response_format:** `typing.Optional[ResponseFormat]` 
     
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**safety_mode:** `typing.Optional[ChatStreamRequestSafetyMode]` 
+
+Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+When `NONE` is specified, the safety instruction will be omitted.
+
+Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
 
@@ -562,6 +607,7 @@ client.chat(
 **message:** `str` 
 
 Text input for the model to respond to.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -573,9 +619,10 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 **model:** `typing.Optional[str]` 
 
-Defaults to `command-r-plus`.
+Defaults to `command-r-plus-08-2024`.
 
 The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
 Compatible Deployments: Cohere Platform, Private Deployments
 
     
@@ -590,6 +637,7 @@ Compatible Deployments: Cohere Platform, Private Deployments
 When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
 
 The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -606,6 +654,7 @@ A list of previous messages between the user and the model, giving the model con
 Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
 
 The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -620,6 +669,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 An alternative to `chat_history`.
 
 Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
 Compatible Deployments: Cohere Platform
 
     
@@ -640,7 +690,10 @@ With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `d
 With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
 
 With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+Compatible Deployments: 
+ - AUTO: Cohere Platform Only
+ - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
 </dd>
@@ -654,6 +707,7 @@ Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sag
 Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
 
 When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
 Compatible Deployments: Cohere Platform
 
     
@@ -668,6 +722,7 @@ Compatible Deployments: Cohere Platform
 Defaults to `false`.
 
 When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -682,10 +737,12 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
 
 Example:
-`[
+```
+[
   { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
   { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-]`
+]
+```
 
 Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
 
@@ -696,6 +753,7 @@ An `id` field (string) can be optionally supplied to identify the document in th
 An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
 
 See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -710,6 +768,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 Defaults to `"accurate"`.
 
 Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -726,6 +785,7 @@ Defaults to `0.3`.
 A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
 
 Randomness can be further maximized by increasing the  value of the `p` parameter.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -738,6 +798,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 **max_tokens:** `typing.Optional[int]` 
 
 The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -752,6 +813,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
 Input will be truncated according to the `prompt_truncation` parameter.
+
 Compatible Deployments: Cohere Platform
 
     
@@ -765,6 +827,7 @@ Compatible Deployments: Cohere Platform
 
 Ensures only the top `k` most likely tokens are considered for generation at each step.
 Defaults to `0`, min value of `0`, max value of `500`.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -778,6 +841,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
 Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -793,6 +857,7 @@ If specified, the backend will make a best effort to sample tokens
 deterministically, such that repeated requests with the same
 seed and parameters should return the same result. However,
 determinism cannot be totally guaranteed.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -805,6 +870,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 **stop_sequences:** `typing.Optional[typing.Sequence[str]]` 
 
 A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -819,6 +885,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
 Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -833,6 +900,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
 Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -846,6 +914,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 When enabled, the user's prompt will be sent to the model without
 any pre-processing.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -868,6 +937,7 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
 When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -900,6 +970,7 @@ tool_results = [
 ]
 ```
 **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
 Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
     
@@ -919,6 +990,24 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 
 **response_format:** `typing.Optional[ResponseFormat]` 
     
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**safety_mode:** `typing.Optional[ChatRequestSafetyMode]` 
+
+Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+When `NONE` is specified, the safety instruction will be omitted.
+
+Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+    
 </dd>
 </dl>
 
@@ -949,10 +1038,9 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-> 🚧 Warning
->
-> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+<Warning>
+This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+</Warning>
 Generates realistic text conditioned on a given input.
 </dd>
 </dl>
@@ -1223,10 +1311,9 @@ If `ALL` is selected, the token likelihoods will be provided both for the prompt
 <dl>
 <dd>
 
-> 🚧 Warning
->
-> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+<Warning>
+This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+</Warning>
 Generates realistic text conditioned on a given input.
 </dd>
 </dl>
@@ -1507,6 +1594,7 @@ client = Client(
 )
 client.embed(
     texts=["string"],
+    images=["string"],
     model="string",
     input_type="search_document",
     embedding_types=["float"],
@@ -1642,14 +1730,8 @@ client = Client(
     token="YOUR_TOKEN",
 )
 client.rerank(
-    model="rerank-english-v3.0",
-    query="What is the capital of the United States?",
-    documents=[
-        "Carson City is the capital city of the American state of Nevada.",
-        "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
-        "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
-        "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
-    ],
+    query="query",
+    documents=["documents"],
 )
 
 ```
@@ -1772,7 +1854,6 @@ Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) tra
 <dd>
 
 ```python
-from cohere import ClassifyExample
 from cohere.client import Client
 
 client = Client(
@@ -1780,49 +1861,7 @@ client = Client(
     token="YOUR_TOKEN",
 )
 client.classify(
-    inputs=["Confirm your email address", "hey i need u to send some $"],
-    examples=[
-        ClassifyExample(
-            text="Dermatologists don't like her!",
-            label="Spam",
-        ),
-        ClassifyExample(
-            text="Hello, open to this?",
-            label="Spam",
-        ),
-        ClassifyExample(
-            text="I need help please wire me $1000 right now",
-            label="Spam",
-        ),
-        ClassifyExample(
-            text="Nice to know you ;)",
-            label="Spam",
-        ),
-        ClassifyExample(
-            text="Please help me?",
-            label="Spam",
-        ),
-        ClassifyExample(
-            text="Your parcel will be delivered today",
-            label="Not spam",
-        ),
-        ClassifyExample(
-            text="Review changes to our Terms and Conditions",
-            label="Not spam",
-        ),
-        ClassifyExample(
-            text="Weekly sync notes",
-            label="Not spam",
-        ),
-        ClassifyExample(
-            text="Re: Follow up from today’s meeting",
-            label="Not spam",
-        ),
-        ClassifyExample(
-            text="Pre-read for tomorrow",
-            label="Not spam",
-        ),
-    ],
+    inputs=["inputs"],
 )
 
 ```
@@ -1914,10 +1953,9 @@ If `NONE` is selected, when the input exceeds the maximum input token length an
 <dl>
 <dd>
 
-> 🚧 Warning
->
-> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+<Warning>
+This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+</Warning>
 Generates a summary in English for a given text.
 </dd>
 </dl>
@@ -1940,7 +1978,7 @@ client = Client(
     token="YOUR_TOKEN",
 )
 client.summarize(
-    text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\n\nThe meaning of the name "ice cream" varies from one country to another. In some countries, such as the United States, "ice cream" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled "frozen dairy dessert" instead. In other countries, such as Italy and Argentina, one word is used fo\r all variants. Analogues made from dairy alternatives, such as goat\'s or sheep\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',
+    text="text",
 )
 
 ```
@@ -2139,8 +2177,8 @@ client = Client(
     token="YOUR_TOKEN",
 )
 client.detokenize(
-    tokens=[10104, 12221, 1315, 34, 1420, 69],
-    model="command",
+    tokens=[1],
+    model="model",
 )
 
 ```
@@ -2251,6 +2289,20 @@ client.check_api_key()
 <dl>
 <dd>
 
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+</dd>
+</dl>
+</dd>
+</dl>
+
 #### 🔌 Usage
 
 <dl>
@@ -2260,17 +2312,7 @@ client.check_api_key()
 <dd>
 
 ```python
-from cohere import (
-    ChatMessage2_Assistant,
-    Citation,
-    Source_Tool,
-    TextContent,
-    Tool2,
-    Tool2Function,
-    ToolCall2,
-    ToolCall2Function,
-    V2ChatStreamRequestResponseFormat,
-)
+from cohere import ChatMessage2_User, ResponseFormat2_Text, Tool2, Tool2Function
 from cohere.client import Client
 
 client = Client(
@@ -2280,35 +2322,9 @@ client = Client(
 response = client.v2.chat_stream(
     model="string",
     messages=[
-        ChatMessage2_Assistant(
-            tool_calls=[
-                ToolCall2(
-                    id="string",
-                    function=ToolCall2Function(
-                        name="string",
-                        arguments="string",
-                    ),
-                )
-            ],
-            tool_plan="string",
-            content=[
-                TextContent(
-                    text="string",
-                )
-            ],
-            citations=[
-                Citation(
-                    start="string",
-                    end="string",
-                    text="string",
-                    sources=[
-                        Source_Tool(
-                            id="string",
-                            tool_output={"string": {"key": "value"}},
-                        )
-                    ],
-                )
-            ],
+        ChatMessage2_User(
+            content="string",
+            documents=[{"string": {"key": "value"}}],
         )
     ],
     tools=[
@@ -2320,21 +2336,16 @@ response = client.v2.chat_stream(
             ),
         )
     ],
-    tool_choice="AUTO",
     citation_mode="FAST",
-    truncation_mode="OFF",
-    response_format=V2ChatStreamRequestResponseFormat(
-        schema={"string": {"key": "value"}},
-    ),
+    response_format=ResponseFormat2_Text(),
     max_tokens=1,
     stop_sequences=["string"],
-    max_input_tokens=1,
     temperature=1.1,
     seed=1,
     frequency_penalty=1.1,
     presence_penalty=1.1,
-    k=1,
-    p=1,
+    k=1.1,
+    p=1.1,
     return_prompt=True,
 )
 for chunk in response:
@@ -2354,7 +2365,7 @@ for chunk in response:
 <dl>
 <dd>
 
-**model:** `str` — The model to use for the chat.
+**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
     
 </dd>
 </dl>
@@ -2371,14 +2382,11 @@ for chunk in response:
 <dd>
 
 **tools:** `typing.Optional[typing.Sequence[Tool2]]` 
-    
-</dd>
-</dl>
 
-<dl>
-<dd>
+A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
-**tool_choice:** `typing.Optional[V2ChatStreamRequestToolChoice]` 
     
 </dd>
 </dl>
@@ -2387,14 +2395,10 @@ for chunk in response:
 <dd>
 
 **citation_mode:** `typing.Optional[V2ChatStreamRequestCitationMode]` 
-    
-</dd>
-</dl>
 
-<dl>
-<dd>
+Defaults to `"accurate"`.
+Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
 
-**truncation_mode:** `typing.Optional[V2ChatStreamRequestTruncationMode]` 
     
 </dd>
 </dl>
@@ -2402,7 +2406,7 @@ for chunk in response:
 <dl>
 <dd>
 
-**response_format:** `typing.Optional[V2ChatStreamRequestResponseFormat]` 
+**response_format:** `typing.Optional[ResponseFormat2]` 
     
 </dd>
 </dl>
@@ -2410,7 +2414,8 @@ for chunk in response:
 <dl>
 <dd>
 
-**max_tokens:** `typing.Optional[int]` — The maximum number of tokens to generate.
+**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
     
 </dd>
 </dl>
@@ -2418,7 +2423,8 @@ for chunk in response:
 <dl>
 <dd>
 
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of strings that the model will stop generating at.
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
     
 </dd>
 </dl>
@@ -2426,15 +2432,14 @@ for chunk in response:
 <dl>
 <dd>
 
-**max_input_tokens:** `typing.Optional[int]` — The maximum number of tokens to feed into the model.
-    
-</dd>
-</dl>
+**temperature:** `typing.Optional[float]` 
 
-<dl>
-<dd>
+Defaults to `0.3`.
+
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+Randomness can be further maximized by increasing the  value of the `p` parameter.
 
-**temperature:** `typing.Optional[float]` — The temperature of the model.
     
 </dd>
 </dl>
@@ -2443,6 +2448,12 @@ for chunk in response:
 <dd>
 
 **seed:** `typing.Optional[int]` 
+
+If specified, the backend will make a best effort to sample tokens
+deterministically, such that repeated requests with the same
+seed and parameters should return the same result. However,
+determinism cannot be totally guaranteed.
+
     
 </dd>
 </dl>
@@ -2450,7 +2461,11 @@ for chunk in response:
 <dl>
 <dd>
 
-**frequency_penalty:** `typing.Optional[float]` — The frequency penalty of the model.
+**frequency_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
     
 </dd>
 </dl>
@@ -2458,7 +2473,11 @@ for chunk in response:
 <dl>
 <dd>
 
-**presence_penalty:** `typing.Optional[float]` — The presence penalty of the model.
+**presence_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
     
 </dd>
 </dl>
@@ -2466,7 +2485,11 @@ for chunk in response:
 <dl>
 <dd>
 
-**k:** `typing.Optional[int]` 
+**k:** `typing.Optional[float]` 
+
+Ensures only the top `k` most likely tokens are considered for generation at each step.
+Defaults to `0`, min value of `0`, max value of `500`.
+
     
 </dd>
 </dl>
@@ -2474,7 +2497,11 @@ for chunk in response:
 <dl>
 <dd>
 
-**p:** `typing.Optional[int]` 
+**p:** `typing.Optional[float]` 
+
+Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
     
 </dd>
 </dl>
@@ -2506,6 +2533,20 @@ for chunk in response:
 <dl>
 <dd>
 
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+</dd>
+</dl>
+</dd>
+</dl>
+
 #### 🔌 Usage
 
 <dl>
@@ -2540,7 +2581,7 @@ client.v2.chat(
 <dl>
 <dd>
 
-**model:** `str` — The model to use for the chat.
+**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
     
 </dd>
 </dl>
@@ -2557,14 +2598,11 @@ client.v2.chat(
 <dd>
 
 **tools:** `typing.Optional[typing.Sequence[Tool2]]` 
-    
-</dd>
-</dl>
 
-<dl>
-<dd>
+A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
-**tool_choice:** `typing.Optional[V2ChatRequestToolChoice]` 
     
 </dd>
 </dl>
@@ -2573,14 +2611,10 @@ client.v2.chat(
 <dd>
 
 **citation_mode:** `typing.Optional[V2ChatRequestCitationMode]` 
-    
-</dd>
-</dl>
 
-<dl>
-<dd>
+Defaults to `"accurate"`.
+Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
 
-**truncation_mode:** `typing.Optional[V2ChatRequestTruncationMode]` 
     
 </dd>
 </dl>
@@ -2588,7 +2622,7 @@ client.v2.chat(
 <dl>
 <dd>
 
-**response_format:** `typing.Optional[V2ChatRequestResponseFormat]` 
+**response_format:** `typing.Optional[ResponseFormat2]` 
     
 </dd>
 </dl>
@@ -2596,7 +2630,8 @@ client.v2.chat(
 <dl>
 <dd>
 
-**max_tokens:** `typing.Optional[int]` — The maximum number of tokens to generate.
+**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
     
 </dd>
 </dl>
@@ -2604,7 +2639,8 @@ client.v2.chat(
 <dl>
 <dd>
 
-**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of strings that the model will stop generating at.
+**stop_sequences:** `typing.Optional[typing.Sequence[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
     
 </dd>
 </dl>
@@ -2612,15 +2648,14 @@ client.v2.chat(
 <dl>
 <dd>
 
-**max_input_tokens:** `typing.Optional[int]` — The maximum number of tokens to feed into the model.
-    
-</dd>
-</dl>
+**temperature:** `typing.Optional[float]` 
 
-<dl>
-<dd>
+Defaults to `0.3`.
+
+A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+Randomness can be further maximized by increasing the  value of the `p` parameter.
 
-**temperature:** `typing.Optional[float]` — The temperature of the model.
     
 </dd>
 </dl>
@@ -2629,6 +2664,12 @@ client.v2.chat(
 <dd>
 
 **seed:** `typing.Optional[int]` 
+
+If specified, the backend will make a best effort to sample tokens
+deterministically, such that repeated requests with the same
+seed and parameters should return the same result. However,
+determinism cannot be totally guaranteed.
+
     
 </dd>
 </dl>
@@ -2636,7 +2677,11 @@ client.v2.chat(
 <dl>
 <dd>
 
-**frequency_penalty:** `typing.Optional[float]` — The frequency penalty of the model.
+**frequency_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
     
 </dd>
 </dl>
@@ -2644,7 +2689,11 @@ client.v2.chat(
 <dl>
 <dd>
 
-**presence_penalty:** `typing.Optional[float]` — The presence penalty of the model.
+**presence_penalty:** `typing.Optional[float]` 
+
+Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
     
 </dd>
 </dl>
@@ -2652,7 +2701,11 @@ client.v2.chat(
 <dl>
 <dd>
 
-**k:** `typing.Optional[int]` 
+**k:** `typing.Optional[float]` 
+
+Ensures only the top `k` most likely tokens are considered for generation at each step.
+Defaults to `0`, min value of `0`, max value of `500`.
+
     
 </dd>
 </dl>
@@ -2660,7 +2713,11 @@ client.v2.chat(
 <dl>
 <dd>
 
-**p:** `typing.Optional[int]` 
+**p:** `typing.Optional[float]` 
+
+Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
     
 </dd>
 </dl>
diff --git a/src/cohere/__init__.py b/src/cohere/__init__.py
index 269aba570..05c600132 100644
--- a/src/cohere/__init__.py
+++ b/src/cohere/__init__.py
@@ -15,6 +15,7 @@
     ChatRequestCitationQuality,
     ChatRequestConnectorsSearchOptions,
     ChatRequestPromptTruncation,
+    ChatRequestSafetyMode,
     ChatSearchQueriesGenerationEvent,
     ChatSearchQuery,
     ChatSearchResult,
@@ -26,11 +27,16 @@
     ChatStreamRequestCitationQuality,
     ChatStreamRequestConnectorsSearchOptions,
     ChatStreamRequestPromptTruncation,
+    ChatStreamRequestSafetyMode,
     ChatStreamStartEvent,
     ChatTextGenerationEvent,
     ChatToolCallsChunkEvent,
     ChatToolCallsGenerationEvent,
     CheckApiKeyResponse,
+    CitationEndEvent,
+    CitationStartEvent,
+    CitationStartEventDelta,
+    CitationStartEventDeltaMessage,
     ClassifyDataMetrics,
     ClassifyExample,
     ClassifyRequestTruncate,
@@ -85,6 +91,7 @@
     GetConnectorResponse,
     GetModelResponse,
     JsonResponseFormat,
+    JsonResponseFormat2,
     LabelMetric,
     ListConnectorsResponse,
     ListEmbedJobResponse,
@@ -108,6 +115,9 @@
     RerankResponseResultsItemDocument,
     RerankerDataMetrics,
     ResponseFormat,
+    ResponseFormat2,
+    ResponseFormat2_JsonObject,
+    ResponseFormat2_Text,
     ResponseFormat_JsonObject,
     ResponseFormat_Text,
     SingleGeneration,
@@ -153,7 +163,6 @@
 )
 from . import connectors, datasets, embed_jobs, finetuning, models, v2
 from .aws_client import AwsClient
-from .client_v2 import AsyncClientV2, ClientV2
 from .bedrock_client import BedrockClient
 from .client import AsyncClient, Client
 from .datasets import (
@@ -168,6 +177,12 @@
 from .sagemaker_client import SagemakerClient
 from .v2 import (
     AssistantMessage,
+    AssistantMessageContent,
+    AssistantMessageContentItem,
+    AssistantMessageContentItem_Text,
+    AssistantMessageResponse,
+    AssistantMessageResponseContentItem,
+    AssistantMessageResponseContentItem_Text,
     ChatContentDeltaEvent,
     ChatContentDeltaEventDelta,
     ChatContentDeltaEventDeltaMessage,
@@ -203,15 +218,15 @@
     ChatToolPlanDeltaEventDelta,
     Citation,
     Content,
-    Content_Document,
     Content_Text,
-    DocumentContent,
     DocumentSource,
     NonStreamedChatResponse2,
     Source,
     Source_Document,
     Source_Tool,
     StreamedChatResponse2,
+    StreamedChatResponse2_CitationEnd,
+    StreamedChatResponse2_CitationStart,
     StreamedChatResponse2_ContentDelta,
     StreamedChatResponse2_ContentEnd,
     StreamedChatResponse2_ContentStart,
@@ -241,13 +256,7 @@
     UserMessage,
     UserMessageContent,
     V2ChatRequestCitationMode,
-    V2ChatRequestResponseFormat,
-    V2ChatRequestToolChoice,
-    V2ChatRequestTruncationMode,
     V2ChatStreamRequestCitationMode,
-    V2ChatStreamRequestResponseFormat,
-    V2ChatStreamRequestToolChoice,
-    V2ChatStreamRequestTruncationMode,
 )
 from .version import __version__
 
@@ -257,6 +266,12 @@
     "ApiMetaBilledUnits",
     "ApiMetaTokens",
     "AssistantMessage",
+    "AssistantMessageContent",
+    "AssistantMessageContentItem",
+    "AssistantMessageContentItem_Text",
+    "AssistantMessageResponse",
+    "AssistantMessageResponseContentItem",
+    "AssistantMessageResponseContentItem_Text",
     "AsyncClient",
     "AuthTokenType",
     "AwsClient",
@@ -292,6 +307,7 @@
     "ChatRequestCitationQuality",
     "ChatRequestConnectorsSearchOptions",
     "ChatRequestPromptTruncation",
+    "ChatRequestSafetyMode",
     "ChatSearchQueriesGenerationEvent",
     "ChatSearchQuery",
     "ChatSearchResult",
@@ -304,6 +320,7 @@
     "ChatStreamRequestCitationQuality",
     "ChatStreamRequestConnectorsSearchOptions",
     "ChatStreamRequestPromptTruncation",
+    "ChatStreamRequestSafetyMode",
     "ChatStreamStartEvent",
     "ChatTextGenerationEvent",
     "ChatToolCallDeltaEvent",
@@ -321,6 +338,10 @@
     "ChatToolPlanDeltaEventDelta",
     "CheckApiKeyResponse",
     "Citation",
+    "CitationEndEvent",
+    "CitationStartEvent",
+    "CitationStartEventDelta",
+    "CitationStartEventDeltaMessage",
     "ClassifyDataMetrics",
     "ClassifyExample",
     "ClassifyRequestTruncate",
@@ -337,7 +358,6 @@
     "ConnectorAuthStatus",
     "ConnectorOAuth",
     "Content",
-    "Content_Document",
     "Content_Text",
     "CreateConnectorOAuth",
     "CreateConnectorResponse",
@@ -355,7 +375,6 @@
     "DatasetsListResponse",
     "DeleteConnectorResponse",
     "DetokenizeResponse",
-    "DocumentContent",
     "DocumentSource",
     "EmbedByTypeResponse",
     "EmbedByTypeResponseEmbeddings",
@@ -392,6 +411,7 @@
     "GetModelResponse",
     "InternalServerError",
     "JsonResponseFormat",
+    "JsonResponseFormat2",
     "LabelMetric",
     "ListConnectorsResponse",
     "ListEmbedJobResponse",
@@ -418,6 +438,9 @@
     "RerankResponseResultsItemDocument",
     "RerankerDataMetrics",
     "ResponseFormat",
+    "ResponseFormat2",
+    "ResponseFormat2_JsonObject",
+    "ResponseFormat2_Text",
     "ResponseFormat_JsonObject",
     "ResponseFormat_Text",
     "SagemakerClient",
@@ -430,6 +453,8 @@
     "Source_Tool",
     "StreamedChatResponse",
     "StreamedChatResponse2",
+    "StreamedChatResponse2_CitationEnd",
+    "StreamedChatResponse2_CitationStart",
     "StreamedChatResponse2_ContentDelta",
     "StreamedChatResponse2_ContentEnd",
     "StreamedChatResponse2_ContentStart",
@@ -485,13 +510,7 @@
     "UserMessage",
     "UserMessageContent",
     "V2ChatRequestCitationMode",
-    "V2ChatRequestResponseFormat",
-    "V2ChatRequestToolChoice",
-    "V2ChatRequestTruncationMode",
     "V2ChatStreamRequestCitationMode",
-    "V2ChatStreamRequestResponseFormat",
-    "V2ChatStreamRequestToolChoice",
-    "V2ChatStreamRequestTruncationMode",
     "__version__",
     "connectors",
     "datasets",
diff --git a/src/cohere/base_client.py b/src/cohere/base_client.py
index c9ac9a6f8..d70f463d0 100644
--- a/src/cohere/base_client.py
+++ b/src/cohere/base_client.py
@@ -32,8 +32,10 @@
 from .types.chat_document import ChatDocument
 from .types.chat_request_citation_quality import ChatRequestCitationQuality
 from .types.chat_request_prompt_truncation import ChatRequestPromptTruncation
+from .types.chat_request_safety_mode import ChatRequestSafetyMode
 from .types.chat_stream_request_citation_quality import ChatStreamRequestCitationQuality
 from .types.chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
+from .types.chat_stream_request_safety_mode import ChatStreamRequestSafetyMode
 from .types.check_api_key_response import CheckApiKeyResponse
 from .types.classify_example import ClassifyExample
 from .types.classify_request_truncate import ClassifyRequestTruncate
@@ -172,6 +174,7 @@ def chat_stream(
         tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
         force_single_step: typing.Optional[bool] = OMIT,
         response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> typing.Iterator[StreamedChatResponse]:
         """
@@ -182,13 +185,15 @@ def chat_stream(
         ----------
         message : str
             Text input for the model to respond to.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         model : typing.Optional[str]
-            Defaults to `command-r-plus`.
+            Defaults to `command-r-plus-08-2024`.
 
             The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
             Compatible Deployments: Cohere Platform, Private Deployments
 
 
@@ -196,6 +201,7 @@ def chat_stream(
             When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
 
             The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -205,6 +211,7 @@ def chat_stream(
             Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
 
             The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -212,6 +219,7 @@ def chat_stream(
             An alternative to `chat_history`.
 
             Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
             Compatible Deployments: Cohere Platform
 
 
@@ -225,13 +233,17 @@ def chat_stream(
             With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
 
             With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-            Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         connectors : typing.Optional[typing.Sequence[ChatConnector]]
             Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
 
             When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
             Compatible Deployments: Cohere Platform
 
 
@@ -239,6 +251,7 @@ def chat_stream(
             Defaults to `false`.
 
             When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -246,10 +259,12 @@ def chat_stream(
             A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
 
             Example:
-            `[
+            ```
+            [
               { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
               { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]`
+            ]
+            ```
 
             Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
 
@@ -260,6 +275,7 @@ def chat_stream(
             An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
 
             See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -267,6 +283,7 @@ def chat_stream(
             Defaults to `"accurate"`.
 
             Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -276,11 +293,13 @@ def chat_stream(
             A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
 
             Randomness can be further maximized by increasing the  value of the `p` parameter.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         max_tokens : typing.Optional[int]
             The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -288,18 +307,21 @@ def chat_stream(
             The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
             Input will be truncated according to the `prompt_truncation` parameter.
+
             Compatible Deployments: Cohere Platform
 
 
         k : typing.Optional[int]
             Ensures only the top `k` most likely tokens are considered for generation at each step.
             Defaults to `0`, min value of `0`, max value of `500`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         p : typing.Optional[float]
             Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
             Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -308,11 +330,13 @@ def chat_stream(
             deterministically, such that repeated requests with the same
             seed and parameters should return the same result. However,
             determinism cannot be totally guaranteed.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
             A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -320,6 +344,7 @@ def chat_stream(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -327,12 +352,14 @@ def chat_stream(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         raw_prompting : typing.Optional[bool]
             When enabled, the user's prompt will be sent to the model without
             any pre-processing.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -343,6 +370,7 @@ def chat_stream(
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -368,6 +396,7 @@ def chat_stream(
             ]
             ```
             **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -376,6 +405,17 @@ def chat_stream(
 
         response_format : typing.Optional[ResponseFormat]
 
+        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -468,6 +508,7 @@ def chat_stream(
             ],
             force_single_step=True,
             response_format=ResponseFormat_Text(),
+            safety_mode="CONTEXTUAL",
         )
         for chunk in response:
             yield chunk
@@ -501,6 +542,7 @@ def chat_stream(
                 "tool_results": tool_results,
                 "force_single_step": force_single_step,
                 "response_format": response_format,
+                "safety_mode": safety_mode,
                 "stream": True,
             },
             request_options=request_options,
@@ -594,6 +636,7 @@ def chat(
         tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
         force_single_step: typing.Optional[bool] = OMIT,
         response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> NonStreamedChatResponse:
         """
@@ -604,13 +647,15 @@ def chat(
         ----------
         message : str
             Text input for the model to respond to.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         model : typing.Optional[str]
-            Defaults to `command-r-plus`.
+            Defaults to `command-r-plus-08-2024`.
 
             The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
             Compatible Deployments: Cohere Platform, Private Deployments
 
 
@@ -618,6 +663,7 @@ def chat(
             When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
 
             The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -627,6 +673,7 @@ def chat(
             Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
 
             The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -634,6 +681,7 @@ def chat(
             An alternative to `chat_history`.
 
             Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
             Compatible Deployments: Cohere Platform
 
 
@@ -647,13 +695,17 @@ def chat(
             With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
 
             With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-            Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         connectors : typing.Optional[typing.Sequence[ChatConnector]]
             Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
 
             When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
             Compatible Deployments: Cohere Platform
 
 
@@ -661,6 +713,7 @@ def chat(
             Defaults to `false`.
 
             When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -668,10 +721,12 @@ def chat(
             A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
 
             Example:
-            `[
+            ```
+            [
               { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
               { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]`
+            ]
+            ```
 
             Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
 
@@ -682,6 +737,7 @@ def chat(
             An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
 
             See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -689,6 +745,7 @@ def chat(
             Defaults to `"accurate"`.
 
             Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -698,11 +755,13 @@ def chat(
             A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
 
             Randomness can be further maximized by increasing the  value of the `p` parameter.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         max_tokens : typing.Optional[int]
             The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -710,18 +769,21 @@ def chat(
             The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
             Input will be truncated according to the `prompt_truncation` parameter.
+
             Compatible Deployments: Cohere Platform
 
 
         k : typing.Optional[int]
             Ensures only the top `k` most likely tokens are considered for generation at each step.
             Defaults to `0`, min value of `0`, max value of `500`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         p : typing.Optional[float]
             Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
             Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -730,11 +792,13 @@ def chat(
             deterministically, such that repeated requests with the same
             seed and parameters should return the same result. However,
             determinism cannot be totally guaranteed.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
             A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -742,6 +806,7 @@ def chat(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -749,12 +814,14 @@ def chat(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         raw_prompting : typing.Optional[bool]
             When enabled, the user's prompt will be sent to the model without
             any pre-processing.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -765,6 +832,7 @@ def chat(
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -790,6 +858,7 @@ def chat(
             ]
             ```
             **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -798,6 +867,17 @@ def chat(
 
         response_format : typing.Optional[ResponseFormat]
 
+        safety_mode : typing.Optional[ChatRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -849,6 +929,7 @@ def chat(
                 "tool_results": tool_results,
                 "force_single_step": force_single_step,
                 "response_format": response_format,
+                "safety_mode": safety_mode,
                 "stream": False,
             },
             request_options=request_options,
@@ -928,10 +1009,9 @@ def generate_stream(
         request_options: typing.Optional[RequestOptions] = None
     ) -> typing.Iterator[GenerateStreamedResponse]:
         """
-        > 🚧 Warning
-        >
-        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
         Generates realistic text conditioned on a given input.
 
         Parameters
@@ -1166,10 +1246,9 @@ def generate(
         request_options: typing.Optional[RequestOptions] = None
     ) -> Generation:
         """
-        > 🚧 Warning
-        >
-        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
         Generates realistic text conditioned on a given input.
 
         Parameters
@@ -1431,6 +1510,7 @@ def embed(
         )
         client.embed(
             texts=["string"],
+            images=["string"],
             model="string",
             input_type="search_document",
             embedding_types=["float"],
@@ -1563,14 +1643,8 @@ def rerank(
             token="YOUR_TOKEN",
         )
         client.rerank(
-            model="rerank-english-v3.0",
-            query="What is the capital of the United States?",
-            documents=[
-                "Carson City is the capital city of the American state of Nevada.",
-                "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
-                "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
-                "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
-            ],
+            query="query",
+            documents=["documents"],
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -1686,7 +1760,6 @@ def classify(
 
         Examples
         --------
-        from cohere import ClassifyExample
         from cohere.client import Client
 
         client = Client(
@@ -1694,49 +1767,7 @@ def classify(
             token="YOUR_TOKEN",
         )
         client.classify(
-            inputs=["Confirm your email address", "hey i need u to send some $"],
-            examples=[
-                ClassifyExample(
-                    text="Dermatologists don't like her!",
-                    label="Spam",
-                ),
-                ClassifyExample(
-                    text="Hello, open to this?",
-                    label="Spam",
-                ),
-                ClassifyExample(
-                    text="I need help please wire me $1000 right now",
-                    label="Spam",
-                ),
-                ClassifyExample(
-                    text="Nice to know you ;)",
-                    label="Spam",
-                ),
-                ClassifyExample(
-                    text="Please help me?",
-                    label="Spam",
-                ),
-                ClassifyExample(
-                    text="Your parcel will be delivered today",
-                    label="Not spam",
-                ),
-                ClassifyExample(
-                    text="Review changes to our Terms and Conditions",
-                    label="Not spam",
-                ),
-                ClassifyExample(
-                    text="Weekly sync notes",
-                    label="Not spam",
-                ),
-                ClassifyExample(
-                    text="Re: Follow up from today’s meeting",
-                    label="Not spam",
-                ),
-                ClassifyExample(
-                    text="Pre-read for tomorrow",
-                    label="Not spam",
-                ),
-            ],
+            inputs=["inputs"],
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -1811,10 +1842,9 @@ def summarize(
         request_options: typing.Optional[RequestOptions] = None
     ) -> SummarizeResponse:
         """
-        > 🚧 Warning
-        >
-        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
         Generates a summary in English for a given text.
 
         Parameters
@@ -1857,7 +1887,7 @@ def summarize(
             token="YOUR_TOKEN",
         )
         client.summarize(
-            text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\n\nThe meaning of the name "ice cream" varies from one country to another. In some countries, such as the United States, "ice cream" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled "frozen dairy dessert" instead. In other countries, such as Italy and Argentina, one word is used fo\r all variants. Analogues made from dairy alternatives, such as goat\'s or sheep\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',
+            text="text",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2052,8 +2082,8 @@ def detokenize(
             token="YOUR_TOKEN",
         )
         client.detokenize(
-            tokens=[10104, 12221, 1315, 34, 1420, 69],
-            model="command",
+            tokens=[1],
+            model="model",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -2294,6 +2324,7 @@ async def chat_stream(
         tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
         force_single_step: typing.Optional[bool] = OMIT,
         response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> typing.AsyncIterator[StreamedChatResponse]:
         """
@@ -2304,13 +2335,15 @@ async def chat_stream(
         ----------
         message : str
             Text input for the model to respond to.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         model : typing.Optional[str]
-            Defaults to `command-r-plus`.
+            Defaults to `command-r-plus-08-2024`.
 
             The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
             Compatible Deployments: Cohere Platform, Private Deployments
 
 
@@ -2318,6 +2351,7 @@ async def chat_stream(
             When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
 
             The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2327,6 +2361,7 @@ async def chat_stream(
             Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
 
             The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2334,6 +2369,7 @@ async def chat_stream(
             An alternative to `chat_history`.
 
             Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
             Compatible Deployments: Cohere Platform
 
 
@@ -2347,13 +2383,17 @@ async def chat_stream(
             With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
 
             With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-            Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         connectors : typing.Optional[typing.Sequence[ChatConnector]]
             Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
 
             When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
             Compatible Deployments: Cohere Platform
 
 
@@ -2361,6 +2401,7 @@ async def chat_stream(
             Defaults to `false`.
 
             When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2368,10 +2409,12 @@ async def chat_stream(
             A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
 
             Example:
-            `[
+            ```
+            [
               { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
               { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]`
+            ]
+            ```
 
             Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
 
@@ -2382,6 +2425,7 @@ async def chat_stream(
             An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
 
             See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2389,6 +2433,7 @@ async def chat_stream(
             Defaults to `"accurate"`.
 
             Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2398,11 +2443,13 @@ async def chat_stream(
             A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
 
             Randomness can be further maximized by increasing the  value of the `p` parameter.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         max_tokens : typing.Optional[int]
             The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2410,18 +2457,21 @@ async def chat_stream(
             The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
             Input will be truncated according to the `prompt_truncation` parameter.
+
             Compatible Deployments: Cohere Platform
 
 
         k : typing.Optional[int]
             Ensures only the top `k` most likely tokens are considered for generation at each step.
             Defaults to `0`, min value of `0`, max value of `500`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         p : typing.Optional[float]
             Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
             Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2430,11 +2480,13 @@ async def chat_stream(
             deterministically, such that repeated requests with the same
             seed and parameters should return the same result. However,
             determinism cannot be totally guaranteed.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
             A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2442,6 +2494,7 @@ async def chat_stream(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2449,12 +2502,14 @@ async def chat_stream(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         raw_prompting : typing.Optional[bool]
             When enabled, the user's prompt will be sent to the model without
             any pre-processing.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2465,6 +2520,7 @@ async def chat_stream(
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2490,6 +2546,7 @@ async def chat_stream(
             ]
             ```
             **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2498,6 +2555,17 @@ async def chat_stream(
 
         response_format : typing.Optional[ResponseFormat]
 
+        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -2595,6 +2663,7 @@ async def main() -> None:
                 ],
                 force_single_step=True,
                 response_format=ResponseFormat_Text(),
+                safety_mode="CONTEXTUAL",
             )
             async for chunk in response:
                 yield chunk
@@ -2631,6 +2700,7 @@ async def main() -> None:
                 "tool_results": tool_results,
                 "force_single_step": force_single_step,
                 "response_format": response_format,
+                "safety_mode": safety_mode,
                 "stream": True,
             },
             request_options=request_options,
@@ -2724,6 +2794,7 @@ async def chat(
         tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,
         force_single_step: typing.Optional[bool] = OMIT,
         response_format: typing.Optional[ResponseFormat] = OMIT,
+        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> NonStreamedChatResponse:
         """
@@ -2734,13 +2805,15 @@ async def chat(
         ----------
         message : str
             Text input for the model to respond to.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         model : typing.Optional[str]
-            Defaults to `command-r-plus`.
+            Defaults to `command-r-plus-08-2024`.
 
             The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+
             Compatible Deployments: Cohere Platform, Private Deployments
 
 
@@ -2748,6 +2821,7 @@ async def chat(
             When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
 
             The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2757,6 +2831,7 @@ async def chat(
             Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
 
             The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2764,6 +2839,7 @@ async def chat(
             An alternative to `chat_history`.
 
             Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
+
             Compatible Deployments: Cohere Platform
 
 
@@ -2777,13 +2853,17 @@ async def chat(
             With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
 
             With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
-            Compatible Deployments: Cohere Platform Only AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+            Compatible Deployments:
+             - AUTO: Cohere Platform Only
+             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         connectors : typing.Optional[typing.Sequence[ChatConnector]]
             Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
 
             When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).
+
             Compatible Deployments: Cohere Platform
 
 
@@ -2791,6 +2871,7 @@ async def chat(
             Defaults to `false`.
 
             When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2798,10 +2879,12 @@ async def chat(
             A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
 
             Example:
-            `[
+            ```
+            [
               { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
               { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
-            ]`
+            ]
+            ```
 
             Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
 
@@ -2812,6 +2895,7 @@ async def chat(
             An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
 
             See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2819,6 +2903,7 @@ async def chat(
             Defaults to `"accurate"`.
 
             Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2828,11 +2913,13 @@ async def chat(
             A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
 
             Randomness can be further maximized by increasing the  value of the `p` parameter.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         max_tokens : typing.Optional[int]
             The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2840,18 +2927,21 @@ async def chat(
             The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.
 
             Input will be truncated according to the `prompt_truncation` parameter.
+
             Compatible Deployments: Cohere Platform
 
 
         k : typing.Optional[int]
             Ensures only the top `k` most likely tokens are considered for generation at each step.
             Defaults to `0`, min value of `0`, max value of `500`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         p : typing.Optional[float]
             Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
             Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2860,11 +2950,13 @@ async def chat(
             deterministically, such that repeated requests with the same
             seed and parameters should return the same result. However,
             determinism cannot be totally guaranteed.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
             A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2872,6 +2964,7 @@ async def chat(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2879,12 +2972,14 @@ async def chat(
             Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
 
             Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
         raw_prompting : typing.Optional[bool]
             When enabled, the user's prompt will be sent to the model without
             any pre-processing.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2895,6 +2990,7 @@ async def chat(
             A list of available tools (functions) that the model may suggest invoking before producing a text response.
 
             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2920,6 +3016,7 @@ async def chat(
             ]
             ```
             **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
+
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
 
@@ -2928,6 +3025,17 @@ async def chat(
 
         response_format : typing.Optional[ResponseFormat]
 
+        safety_mode : typing.Optional[ChatRequestSafetyMode]
+            Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+            When `NONE` is specified, the safety instruction will be omitted.
+
+            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
+
+            **Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
+
+            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -2987,6 +3095,7 @@ async def main() -> None:
                 "tool_results": tool_results,
                 "force_single_step": force_single_step,
                 "response_format": response_format,
+                "safety_mode": safety_mode,
                 "stream": False,
             },
             request_options=request_options,
@@ -3066,10 +3175,9 @@ async def generate_stream(
         request_options: typing.Optional[RequestOptions] = None
     ) -> typing.AsyncIterator[GenerateStreamedResponse]:
         """
-        > 🚧 Warning
-        >
-        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
         Generates realistic text conditioned on a given input.
 
         Parameters
@@ -3312,10 +3420,9 @@ async def generate(
         request_options: typing.Optional[RequestOptions] = None
     ) -> Generation:
         """
-        > 🚧 Warning
-        >
-        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
         Generates realistic text conditioned on a given input.
 
         Parameters
@@ -3590,6 +3697,7 @@ async def embed(
         async def main() -> None:
             await client.embed(
                 texts=["string"],
+                images=["string"],
                 model="string",
                 input_type="search_document",
                 embedding_types=["float"],
@@ -3730,14 +3838,8 @@ async def rerank(
 
         async def main() -> None:
             await client.rerank(
-                model="rerank-english-v3.0",
-                query="What is the capital of the United States?",
-                documents=[
-                    "Carson City is the capital city of the American state of Nevada.",
-                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
-                    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
-                    "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
-                ],
+                query="query",
+                documents=["documents"],
             )
 
 
@@ -3858,7 +3960,6 @@ async def classify(
         --------
         import asyncio
 
-        from cohere import ClassifyExample
         from cohere.client import AsyncClient
 
         client = AsyncClient(
@@ -3869,49 +3970,7 @@ async def classify(
 
         async def main() -> None:
             await client.classify(
-                inputs=["Confirm your email address", "hey i need u to send some $"],
-                examples=[
-                    ClassifyExample(
-                        text="Dermatologists don't like her!",
-                        label="Spam",
-                    ),
-                    ClassifyExample(
-                        text="Hello, open to this?",
-                        label="Spam",
-                    ),
-                    ClassifyExample(
-                        text="I need help please wire me $1000 right now",
-                        label="Spam",
-                    ),
-                    ClassifyExample(
-                        text="Nice to know you ;)",
-                        label="Spam",
-                    ),
-                    ClassifyExample(
-                        text="Please help me?",
-                        label="Spam",
-                    ),
-                    ClassifyExample(
-                        text="Your parcel will be delivered today",
-                        label="Not spam",
-                    ),
-                    ClassifyExample(
-                        text="Review changes to our Terms and Conditions",
-                        label="Not spam",
-                    ),
-                    ClassifyExample(
-                        text="Weekly sync notes",
-                        label="Not spam",
-                    ),
-                    ClassifyExample(
-                        text="Re: Follow up from today’s meeting",
-                        label="Not spam",
-                    ),
-                    ClassifyExample(
-                        text="Pre-read for tomorrow",
-                        label="Not spam",
-                    ),
-                ],
+                inputs=["inputs"],
             )
 
 
@@ -3989,10 +4048,9 @@ async def summarize(
         request_options: typing.Optional[RequestOptions] = None
     ) -> SummarizeResponse:
         """
-        > 🚧 Warning
-        >
-        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
-
+        <Warning>
+        This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
+        </Warning>
         Generates a summary in English for a given text.
 
         Parameters
@@ -4040,7 +4098,7 @@ async def summarize(
 
         async def main() -> None:
             await client.summarize(
-                text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\n\nThe meaning of the name "ice cream" varies from one country to another. In some countries, such as the United States, "ice cream" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled "frozen dairy dessert" instead. In other countries, such as Italy and Argentina, one word is used fo\r all variants. Analogues made from dairy alternatives, such as goat\'s or sheep\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',
+                text="text",
             )
 
 
@@ -4251,8 +4309,8 @@ async def detokenize(
 
         async def main() -> None:
             await client.detokenize(
-                tokens=[10104, 12221, 1315, 34, 1420, 69],
-                model="command",
+                tokens=[1],
+                model="model",
             )
 
 
diff --git a/src/cohere/core/client_wrapper.py b/src/cohere/core/client_wrapper.py
index 91bf8d83b..34bcb59e6 100644
--- a/src/cohere/core/client_wrapper.py
+++ b/src/cohere/core/client_wrapper.py
@@ -25,7 +25,7 @@ def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cohere",
-            "X-Fern-SDK-Version": "5.8.1",
+            "X-Fern-SDK-Version": "5.9.0",
         }
         if self._client_name is not None:
             headers["X-Client-Name"] = self._client_name
diff --git a/src/cohere/types/__init__.py b/src/cohere/types/__init__.py
index b15bf979c..8725658cc 100644
--- a/src/cohere/types/__init__.py
+++ b/src/cohere/types/__init__.py
@@ -14,6 +14,7 @@
 from .chat_request_citation_quality import ChatRequestCitationQuality
 from .chat_request_connectors_search_options import ChatRequestConnectorsSearchOptions
 from .chat_request_prompt_truncation import ChatRequestPromptTruncation
+from .chat_request_safety_mode import ChatRequestSafetyMode
 from .chat_search_queries_generation_event import ChatSearchQueriesGenerationEvent
 from .chat_search_query import ChatSearchQuery
 from .chat_search_result import ChatSearchResult
@@ -25,11 +26,16 @@
 from .chat_stream_request_citation_quality import ChatStreamRequestCitationQuality
 from .chat_stream_request_connectors_search_options import ChatStreamRequestConnectorsSearchOptions
 from .chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
+from .chat_stream_request_safety_mode import ChatStreamRequestSafetyMode
 from .chat_stream_start_event import ChatStreamStartEvent
 from .chat_text_generation_event import ChatTextGenerationEvent
 from .chat_tool_calls_chunk_event import ChatToolCallsChunkEvent
 from .chat_tool_calls_generation_event import ChatToolCallsGenerationEvent
 from .check_api_key_response import CheckApiKeyResponse
+from .citation_end_event import CitationEndEvent
+from .citation_start_event import CitationStartEvent
+from .citation_start_event_delta import CitationStartEventDelta
+from .citation_start_event_delta_message import CitationStartEventDeltaMessage
 from .classify_data_metrics import ClassifyDataMetrics
 from .classify_example import ClassifyExample
 from .classify_request_truncate import ClassifyRequestTruncate
@@ -86,6 +92,7 @@
 from .get_connector_response import GetConnectorResponse
 from .get_model_response import GetModelResponse
 from .json_response_format import JsonResponseFormat
+from .json_response_format2 import JsonResponseFormat2
 from .label_metric import LabelMetric
 from .list_connectors_response import ListConnectorsResponse
 from .list_embed_job_response import ListEmbedJobResponse
@@ -105,6 +112,7 @@
 from .rerank_response_results_item_document import RerankResponseResultsItemDocument
 from .reranker_data_metrics import RerankerDataMetrics
 from .response_format import ResponseFormat, ResponseFormat_JsonObject, ResponseFormat_Text
+from .response_format2 import ResponseFormat2, ResponseFormat2_JsonObject, ResponseFormat2_Text
 from .single_generation import SingleGeneration
 from .single_generation_in_stream import SingleGenerationInStream
 from .single_generation_token_likelihoods_item import SingleGenerationTokenLikelihoodsItem
@@ -150,6 +158,7 @@
     "ChatRequestCitationQuality",
     "ChatRequestConnectorsSearchOptions",
     "ChatRequestPromptTruncation",
+    "ChatRequestSafetyMode",
     "ChatSearchQueriesGenerationEvent",
     "ChatSearchQuery",
     "ChatSearchResult",
@@ -161,11 +170,16 @@
     "ChatStreamRequestCitationQuality",
     "ChatStreamRequestConnectorsSearchOptions",
     "ChatStreamRequestPromptTruncation",
+    "ChatStreamRequestSafetyMode",
     "ChatStreamStartEvent",
     "ChatTextGenerationEvent",
     "ChatToolCallsChunkEvent",
     "ChatToolCallsGenerationEvent",
     "CheckApiKeyResponse",
+    "CitationEndEvent",
+    "CitationStartEvent",
+    "CitationStartEventDelta",
+    "CitationStartEventDeltaMessage",
     "ClassifyDataMetrics",
     "ClassifyExample",
     "ClassifyRequestTruncate",
@@ -220,6 +234,7 @@
     "GetConnectorResponse",
     "GetModelResponse",
     "JsonResponseFormat",
+    "JsonResponseFormat2",
     "LabelMetric",
     "ListConnectorsResponse",
     "ListEmbedJobResponse",
@@ -243,6 +258,9 @@
     "RerankResponseResultsItemDocument",
     "RerankerDataMetrics",
     "ResponseFormat",
+    "ResponseFormat2",
+    "ResponseFormat2_JsonObject",
+    "ResponseFormat2_Text",
     "ResponseFormat_JsonObject",
     "ResponseFormat_Text",
     "SingleGeneration",
diff --git a/src/cohere/types/chat_request_connectors_search_options.py b/src/cohere/types/chat_request_connectors_search_options.py
index 8de5ff0ef..3bf2e015f 100644
--- a/src/cohere/types/chat_request_connectors_search_options.py
+++ b/src/cohere/types/chat_request_connectors_search_options.py
@@ -19,6 +19,7 @@ class ChatRequestConnectorsSearchOptions(UncheckedBaseModel):
     deterministically, such that repeated requests with the same
     seed and parameters should return the same result. However,
     determinism cannot be totally guaranteed.
+    
     Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
     """
 
diff --git a/src/cohere/types/chat_request_safety_mode.py b/src/cohere/types/chat_request_safety_mode.py
new file mode 100644
index 000000000..8e2e7c08f
--- /dev/null
+++ b/src/cohere/types/chat_request_safety_mode.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+ChatRequestSafetyMode = typing.Union[typing.Literal["CONTEXTUAL", "STRICT", "NONE"], typing.Any]
diff --git a/src/cohere/types/chat_stream_request_connectors_search_options.py b/src/cohere/types/chat_stream_request_connectors_search_options.py
index a1e9031c0..90ac04bff 100644
--- a/src/cohere/types/chat_stream_request_connectors_search_options.py
+++ b/src/cohere/types/chat_stream_request_connectors_search_options.py
@@ -19,6 +19,7 @@ class ChatStreamRequestConnectorsSearchOptions(UncheckedBaseModel):
     deterministically, such that repeated requests with the same
     seed and parameters should return the same result. However,
     determinism cannot be totally guaranteed.
+    
     Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
     """
 
diff --git a/src/cohere/types/chat_stream_request_safety_mode.py b/src/cohere/types/chat_stream_request_safety_mode.py
new file mode 100644
index 000000000..83a6f2d56
--- /dev/null
+++ b/src/cohere/types/chat_stream_request_safety_mode.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+ChatStreamRequestSafetyMode = typing.Union[typing.Literal["CONTEXTUAL", "STRICT", "NONE"], typing.Any]
diff --git a/src/cohere/v2/types/document_content.py b/src/cohere/types/citation_end_event.py
similarity index 58%
rename from src/cohere/v2/types/document_content.py
rename to src/cohere/types/citation_end_event.py
index b50a5e0d7..1f202554b 100644
--- a/src/cohere/v2/types/document_content.py
+++ b/src/cohere/types/citation_end_event.py
@@ -4,17 +4,16 @@
 
 import pydantic
 
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-from ...core.unchecked_base_model import UncheckedBaseModel
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..v2.types.chat_stream_event_type import ChatStreamEventType
 
 
-class DocumentContent(UncheckedBaseModel):
+class CitationEndEvent(ChatStreamEventType):
     """
-    Content block of the message that contains information about documents.
+    A streamed event which signifies a citation has finished streaming.
     """
 
-    id: str
-    document: typing.Dict[str, typing.Any]
+    index: typing.Optional[int] = None
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/types/citation_start_event.py b/src/cohere/types/citation_start_event.py
new file mode 100644
index 000000000..8512258c6
--- /dev/null
+++ b/src/cohere/types/citation_start_event.py
@@ -0,0 +1,27 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..v2.types.chat_stream_event_type import ChatStreamEventType
+from .citation_start_event_delta import CitationStartEventDelta
+
+
+class CitationStartEvent(ChatStreamEventType):
+    """
+    A streamed event which signifies a citation has been created.
+    """
+
+    index: typing.Optional[int] = None
+    delta: typing.Optional[CitationStartEventDelta] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/citation_start_event_delta.py b/src/cohere/types/citation_start_event_delta.py
new file mode 100644
index 000000000..3e77d36cc
--- /dev/null
+++ b/src/cohere/types/citation_start_event_delta.py
@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.unchecked_base_model import UncheckedBaseModel
+from .citation_start_event_delta_message import CitationStartEventDeltaMessage
+
+
+class CitationStartEventDelta(UncheckedBaseModel):
+    message: typing.Optional[CitationStartEventDeltaMessage] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/citation_start_event_delta_message.py b/src/cohere/types/citation_start_event_delta_message.py
new file mode 100644
index 000000000..0f755a4ba
--- /dev/null
+++ b/src/cohere/types/citation_start_event_delta_message.py
@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.unchecked_base_model import UncheckedBaseModel
+from ..v2.types.citation import Citation
+
+
+class CitationStartEventDeltaMessage(UncheckedBaseModel):
+    citations: typing.Optional[Citation] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/json_response_format.py b/src/cohere/types/json_response_format.py
index eb5da691d..cd8d91ad3 100644
--- a/src/cohere/types/json_response_format.py
+++ b/src/cohere/types/json_response_format.py
@@ -11,7 +11,7 @@
 class JsonResponseFormat(UncheckedBaseModel):
     schema_: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(alias="schema", default=None)
     """
-    [BETA] A JSON schema object that the output will adhere to. There are some restrictions we have on the schema, refer to [our guide](/docs/structured-outputs-json#schema-constraints) for more information.
+    A JSON schema object that the output will adhere to. There are some restrictions we have on the schema, refer to [our guide](/docs/structured-outputs-json#schema-constraints) for more information.
     Example (required name and age object):
     
     ```json
diff --git a/src/cohere/types/json_response_format2.py b/src/cohere/types/json_response_format2.py
new file mode 100644
index 000000000..d6d24856f
--- /dev/null
+++ b/src/cohere/types/json_response_format2.py
@@ -0,0 +1,38 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.unchecked_base_model import UncheckedBaseModel
+
+
+class JsonResponseFormat2(UncheckedBaseModel):
+    json_schema: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
+    """
+    [BETA] A JSON schema object that the output will adhere to. There are some restrictions we have on the schema, refer to [our guide](/docs/structured-outputs-json#schema-constraints) for more information.
+    Example (required name and age object):
+    
+    ```json
+    {
+      "type": "object",
+      "properties": {
+        "name": { "type": "string" },
+        "age": { "type": "integer" }
+      },
+      "required": ["name", "age"]
+    }
+    ```
+    
+    **Note**: This field must not be specified when the `type` is set to `"text"`.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/types/response_format.py b/src/cohere/types/response_format.py
index 3b1b75d7e..c7c77f63e 100644
--- a/src/cohere/types/response_format.py
+++ b/src/cohere/types/response_format.py
@@ -13,7 +13,7 @@
 
 class ResponseFormat_Text(UncheckedBaseModel):
     """
-    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/docs/command-r), [Command R+](https://docs.cohere.com/docs/command-r-plus) and newer models.
+    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R 03-2024](https://docs.cohere.com/docs/command-r), [Command R+ 04-2024](https://docs.cohere.com/docs/command-r-plus) and newer models.
 
     The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
 
@@ -37,7 +37,7 @@ class Config:
 
 class ResponseFormat_JsonObject(UncheckedBaseModel):
     """
-    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/docs/command-r), [Command R+](https://docs.cohere.com/docs/command-r-plus) and newer models.
+    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R 03-2024](https://docs.cohere.com/docs/command-r), [Command R+ 04-2024](https://docs.cohere.com/docs/command-r-plus) and newer models.
 
     The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
 
diff --git a/src/cohere/types/response_format2.py b/src/cohere/types/response_format2.py
new file mode 100644
index 000000000..b0fee7a24
--- /dev/null
+++ b/src/cohere/types/response_format2.py
@@ -0,0 +1,65 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import typing
+
+import pydantic
+import typing_extensions
+
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata
+
+
+class ResponseFormat2_Text(UncheckedBaseModel):
+    """
+    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/docs/command-r), [Command R+](https://docs.cohere.com/docs/command-r-plus) and newer models.
+
+    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
+
+    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.
+
+    **Note**: When using `{ "type": "json_object" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _"Generate a JSON ..."_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.
+    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).
+    """
+
+    type: typing.Literal["text"] = "text"
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+class ResponseFormat2_JsonObject(UncheckedBaseModel):
+    """
+    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/docs/command-r), [Command R+](https://docs.cohere.com/docs/command-r-plus) and newer models.
+
+    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ "type": "json_object" }`.
+
+    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.
+
+    **Note**: When using `{ "type": "json_object" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _"Generate a JSON ..."_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.
+    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).
+    """
+
+    json_schema: typing.Optional[typing.Dict[str, typing.Any]] = None
+    type: typing.Literal["json_object"] = "json_object"
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+ResponseFormat2 = typing_extensions.Annotated[
+    typing.Union[ResponseFormat2_Text, ResponseFormat2_JsonObject], UnionMetadata(discriminant="type")
+]
diff --git a/src/cohere/v2/__init__.py b/src/cohere/v2/__init__.py
index e5565ae7d..f8842bb14 100644
--- a/src/cohere/v2/__init__.py
+++ b/src/cohere/v2/__init__.py
@@ -2,6 +2,12 @@
 
 from .types import (
     AssistantMessage,
+    AssistantMessageContent,
+    AssistantMessageContentItem,
+    AssistantMessageContentItem_Text,
+    AssistantMessageResponse,
+    AssistantMessageResponseContentItem,
+    AssistantMessageResponseContentItem_Text,
     ChatContentDeltaEvent,
     ChatContentDeltaEventDelta,
     ChatContentDeltaEventDeltaMessage,
@@ -37,15 +43,15 @@
     ChatToolPlanDeltaEventDelta,
     Citation,
     Content,
-    Content_Document,
     Content_Text,
-    DocumentContent,
     DocumentSource,
     NonStreamedChatResponse2,
     Source,
     Source_Document,
     Source_Tool,
     StreamedChatResponse2,
+    StreamedChatResponse2_CitationEnd,
+    StreamedChatResponse2_CitationStart,
     StreamedChatResponse2_ContentDelta,
     StreamedChatResponse2_ContentEnd,
     StreamedChatResponse2_ContentStart,
@@ -75,17 +81,17 @@
     UserMessage,
     UserMessageContent,
     V2ChatRequestCitationMode,
-    V2ChatRequestResponseFormat,
-    V2ChatRequestToolChoice,
-    V2ChatRequestTruncationMode,
     V2ChatStreamRequestCitationMode,
-    V2ChatStreamRequestResponseFormat,
-    V2ChatStreamRequestToolChoice,
-    V2ChatStreamRequestTruncationMode,
 )
 
 __all__ = [
     "AssistantMessage",
+    "AssistantMessageContent",
+    "AssistantMessageContentItem",
+    "AssistantMessageContentItem_Text",
+    "AssistantMessageResponse",
+    "AssistantMessageResponseContentItem",
+    "AssistantMessageResponseContentItem_Text",
     "ChatContentDeltaEvent",
     "ChatContentDeltaEventDelta",
     "ChatContentDeltaEventDeltaMessage",
@@ -121,15 +127,15 @@
     "ChatToolPlanDeltaEventDelta",
     "Citation",
     "Content",
-    "Content_Document",
     "Content_Text",
-    "DocumentContent",
     "DocumentSource",
     "NonStreamedChatResponse2",
     "Source",
     "Source_Document",
     "Source_Tool",
     "StreamedChatResponse2",
+    "StreamedChatResponse2_CitationEnd",
+    "StreamedChatResponse2_CitationStart",
     "StreamedChatResponse2_ContentDelta",
     "StreamedChatResponse2_ContentEnd",
     "StreamedChatResponse2_ContentStart",
@@ -159,11 +165,5 @@
     "UserMessage",
     "UserMessageContent",
     "V2ChatRequestCitationMode",
-    "V2ChatRequestResponseFormat",
-    "V2ChatRequestToolChoice",
-    "V2ChatRequestTruncationMode",
     "V2ChatStreamRequestCitationMode",
-    "V2ChatStreamRequestResponseFormat",
-    "V2ChatStreamRequestToolChoice",
-    "V2ChatStreamRequestTruncationMode",
 ]
diff --git a/src/cohere/v2/client.py b/src/cohere/v2/client.py
index c5f3fca70..63620be69 100644
--- a/src/cohere/v2/client.py
+++ b/src/cohere/v2/client.py
@@ -24,6 +24,7 @@
 from ..types.client_closed_request_error_body import ClientClosedRequestErrorBody
 from ..types.gateway_timeout_error_body import GatewayTimeoutErrorBody
 from ..types.not_implemented_error_body import NotImplementedErrorBody
+from ..types.response_format2 import ResponseFormat2
 from ..types.too_many_requests_error_body import TooManyRequestsErrorBody
 from ..types.unprocessable_entity_error_body import UnprocessableEntityErrorBody
 from .types.chat_messages import ChatMessages
@@ -31,13 +32,7 @@
 from .types.streamed_chat_response2 import StreamedChatResponse2
 from .types.tool2 import Tool2
 from .types.v2chat_request_citation_mode import V2ChatRequestCitationMode
-from .types.v2chat_request_response_format import V2ChatRequestResponseFormat
-from .types.v2chat_request_tool_choice import V2ChatRequestToolChoice
-from .types.v2chat_request_truncation_mode import V2ChatRequestTruncationMode
 from .types.v2chat_stream_request_citation_mode import V2ChatStreamRequestCitationMode
-from .types.v2chat_stream_request_response_format import V2ChatStreamRequestResponseFormat
-from .types.v2chat_stream_request_tool_choice import V2ChatStreamRequestToolChoice
-from .types.v2chat_stream_request_truncation_mode import V2ChatStreamRequestTruncationMode
 
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -53,63 +48,84 @@ def chat_stream(
         model: str,
         messages: ChatMessages,
         tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,
         citation_mode: typing.Optional[V2ChatStreamRequestCitationMode] = OMIT,
-        truncation_mode: typing.Optional[V2ChatStreamRequestTruncationMode] = OMIT,
-        response_format: typing.Optional[V2ChatStreamRequestResponseFormat] = OMIT,
+        response_format: typing.Optional[ResponseFormat2] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
         temperature: typing.Optional[float] = OMIT,
         seed: typing.Optional[int] = OMIT,
         frequency_penalty: typing.Optional[float] = OMIT,
         presence_penalty: typing.Optional[float] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[int] = OMIT,
+        k: typing.Optional[float] = OMIT,
+        p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> typing.Iterator[StreamedChatResponse2]:
         """
+        Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+
         Parameters
         ----------
         model : str
-            The model to use for the chat.
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
 
         messages : ChatMessages
 
         tools : typing.Optional[typing.Sequence[Tool2]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
-        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]
 
         citation_mode : typing.Optional[V2ChatStreamRequestCitationMode]
+            Defaults to `"accurate"`.
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
 
-        truncation_mode : typing.Optional[V2ChatStreamRequestTruncationMode]
 
-        response_format : typing.Optional[V2ChatStreamRequestResponseFormat]
+        response_format : typing.Optional[ResponseFormat2]
 
         max_tokens : typing.Optional[int]
-            The maximum number of tokens to generate.
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of strings that the model will stop generating at.
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
 
-        max_input_tokens : typing.Optional[int]
-            The maximum number of tokens to feed into the model.
 
         temperature : typing.Optional[float]
-            The temperature of the model.
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
 
         seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
 
         frequency_penalty : typing.Optional[float]
-            The frequency penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
 
         presence_penalty : typing.Optional[float]
-            The presence penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
 
-        k : typing.Optional[int]
+        k : typing.Optional[float]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 
-        p : typing.Optional[int]
 
         return_prompt : typing.Optional[bool]
             Whether to return the prompt in the response.
@@ -124,17 +140,7 @@ def chat_stream(
 
         Examples
         --------
-        from cohere import (
-            ChatMessage2_Assistant,
-            Citation,
-            Source_Tool,
-            TextContent,
-            Tool2,
-            Tool2Function,
-            ToolCall2,
-            ToolCall2Function,
-            V2ChatStreamRequestResponseFormat,
-        )
+        from cohere import ChatMessage2_User, ResponseFormat2_Text, Tool2, Tool2Function
         from cohere.client import Client
 
         client = Client(
@@ -144,35 +150,9 @@ def chat_stream(
         response = client.v2.chat_stream(
             model="string",
             messages=[
-                ChatMessage2_Assistant(
-                    tool_calls=[
-                        ToolCall2(
-                            id="string",
-                            function=ToolCall2Function(
-                                name="string",
-                                arguments="string",
-                            ),
-                        )
-                    ],
-                    tool_plan="string",
-                    content=[
-                        TextContent(
-                            text="string",
-                        )
-                    ],
-                    citations=[
-                        Citation(
-                            start="string",
-                            end="string",
-                            text="string",
-                            sources=[
-                                Source_Tool(
-                                    id="string",
-                                    tool_output={"string": {"key": "value"}},
-                                )
-                            ],
-                        )
-                    ],
+                ChatMessage2_User(
+                    content="string",
+                    documents=[{"string": {"key": "value"}}],
                 )
             ],
             tools=[
@@ -184,21 +164,16 @@ def chat_stream(
                     ),
                 )
             ],
-            tool_choice="AUTO",
             citation_mode="FAST",
-            truncation_mode="OFF",
-            response_format=V2ChatStreamRequestResponseFormat(
-                schema={"string": {"key": "value"}},
-            ),
+            response_format=ResponseFormat2_Text(),
             max_tokens=1,
             stop_sequences=["string"],
-            max_input_tokens=1,
             temperature=1.1,
             seed=1,
             frequency_penalty=1.1,
             presence_penalty=1.1,
-            k=1,
-            p=1,
+            k=1.1,
+            p=1.1,
             return_prompt=True,
         )
         for chunk in response:
@@ -211,13 +186,10 @@ def chat_stream(
                 "model": model,
                 "messages": messages,
                 "tools": tools,
-                "tool_choice": tool_choice,
                 "citation_mode": citation_mode,
-                "truncation_mode": truncation_mode,
                 "response_format": response_format,
                 "max_tokens": max_tokens,
                 "stop_sequences": stop_sequences,
-                "max_input_tokens": max_input_tokens,
                 "temperature": temperature,
                 "seed": seed,
                 "frequency_penalty": frequency_penalty,
@@ -295,63 +267,84 @@ def chat(
         model: str,
         messages: ChatMessages,
         tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,
         citation_mode: typing.Optional[V2ChatRequestCitationMode] = OMIT,
-        truncation_mode: typing.Optional[V2ChatRequestTruncationMode] = OMIT,
-        response_format: typing.Optional[V2ChatRequestResponseFormat] = OMIT,
+        response_format: typing.Optional[ResponseFormat2] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
         temperature: typing.Optional[float] = OMIT,
         seed: typing.Optional[int] = OMIT,
         frequency_penalty: typing.Optional[float] = OMIT,
         presence_penalty: typing.Optional[float] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[int] = OMIT,
+        k: typing.Optional[float] = OMIT,
+        p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> NonStreamedChatResponse2:
         """
+        Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+
         Parameters
         ----------
         model : str
-            The model to use for the chat.
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
 
         messages : ChatMessages
 
         tools : typing.Optional[typing.Sequence[Tool2]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
-        tool_choice : typing.Optional[V2ChatRequestToolChoice]
 
         citation_mode : typing.Optional[V2ChatRequestCitationMode]
+            Defaults to `"accurate"`.
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
 
-        truncation_mode : typing.Optional[V2ChatRequestTruncationMode]
 
-        response_format : typing.Optional[V2ChatRequestResponseFormat]
+        response_format : typing.Optional[ResponseFormat2]
 
         max_tokens : typing.Optional[int]
-            The maximum number of tokens to generate.
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of strings that the model will stop generating at.
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
 
-        max_input_tokens : typing.Optional[int]
-            The maximum number of tokens to feed into the model.
 
         temperature : typing.Optional[float]
-            The temperature of the model.
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
 
         seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
 
         frequency_penalty : typing.Optional[float]
-            The frequency penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
 
         presence_penalty : typing.Optional[float]
-            The presence penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+
+        k : typing.Optional[float]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
 
-        k : typing.Optional[int]
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 
-        p : typing.Optional[int]
 
         return_prompt : typing.Optional[bool]
             Whether to return the prompt in the response.
@@ -384,13 +377,10 @@ def chat(
                 "model": model,
                 "messages": messages,
                 "tools": tools,
-                "tool_choice": tool_choice,
                 "citation_mode": citation_mode,
-                "truncation_mode": truncation_mode,
                 "response_format": response_format,
                 "max_tokens": max_tokens,
                 "stop_sequences": stop_sequences,
-                "max_input_tokens": max_input_tokens,
                 "temperature": temperature,
                 "seed": seed,
                 "frequency_penalty": frequency_penalty,
@@ -466,63 +456,84 @@ async def chat_stream(
         model: str,
         messages: ChatMessages,
         tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,
         citation_mode: typing.Optional[V2ChatStreamRequestCitationMode] = OMIT,
-        truncation_mode: typing.Optional[V2ChatStreamRequestTruncationMode] = OMIT,
-        response_format: typing.Optional[V2ChatStreamRequestResponseFormat] = OMIT,
+        response_format: typing.Optional[ResponseFormat2] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
         temperature: typing.Optional[float] = OMIT,
         seed: typing.Optional[int] = OMIT,
         frequency_penalty: typing.Optional[float] = OMIT,
         presence_penalty: typing.Optional[float] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[int] = OMIT,
+        k: typing.Optional[float] = OMIT,
+        p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> typing.AsyncIterator[StreamedChatResponse2]:
         """
+        Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+
         Parameters
         ----------
         model : str
-            The model to use for the chat.
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
 
         messages : ChatMessages
 
         tools : typing.Optional[typing.Sequence[Tool2]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
-        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]
 
         citation_mode : typing.Optional[V2ChatStreamRequestCitationMode]
+            Defaults to `"accurate"`.
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
 
-        truncation_mode : typing.Optional[V2ChatStreamRequestTruncationMode]
 
-        response_format : typing.Optional[V2ChatStreamRequestResponseFormat]
+        response_format : typing.Optional[ResponseFormat2]
 
         max_tokens : typing.Optional[int]
-            The maximum number of tokens to generate.
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of strings that the model will stop generating at.
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
 
-        max_input_tokens : typing.Optional[int]
-            The maximum number of tokens to feed into the model.
 
         temperature : typing.Optional[float]
-            The temperature of the model.
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
 
         seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
 
         frequency_penalty : typing.Optional[float]
-            The frequency penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
 
         presence_penalty : typing.Optional[float]
-            The presence penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
 
-        k : typing.Optional[int]
+        k : typing.Optional[float]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
+
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 
-        p : typing.Optional[int]
 
         return_prompt : typing.Optional[bool]
             Whether to return the prompt in the response.
@@ -539,17 +550,7 @@ async def chat_stream(
         --------
         import asyncio
 
-        from cohere import (
-            ChatMessage2_Assistant,
-            Citation,
-            Source_Tool,
-            TextContent,
-            Tool2,
-            Tool2Function,
-            ToolCall2,
-            ToolCall2Function,
-            V2ChatStreamRequestResponseFormat,
-        )
+        from cohere import ChatMessage2_User, ResponseFormat2_Text, Tool2, Tool2Function
         from cohere.client import AsyncClient
 
         client = AsyncClient(
@@ -562,35 +563,9 @@ async def main() -> None:
             response = await client.v2.chat_stream(
                 model="string",
                 messages=[
-                    ChatMessage2_Assistant(
-                        tool_calls=[
-                            ToolCall2(
-                                id="string",
-                                function=ToolCall2Function(
-                                    name="string",
-                                    arguments="string",
-                                ),
-                            )
-                        ],
-                        tool_plan="string",
-                        content=[
-                            TextContent(
-                                text="string",
-                            )
-                        ],
-                        citations=[
-                            Citation(
-                                start="string",
-                                end="string",
-                                text="string",
-                                sources=[
-                                    Source_Tool(
-                                        id="string",
-                                        tool_output={"string": {"key": "value"}},
-                                    )
-                                ],
-                            )
-                        ],
+                    ChatMessage2_User(
+                        content="string",
+                        documents=[{"string": {"key": "value"}}],
                     )
                 ],
                 tools=[
@@ -602,21 +577,16 @@ async def main() -> None:
                         ),
                     )
                 ],
-                tool_choice="AUTO",
                 citation_mode="FAST",
-                truncation_mode="OFF",
-                response_format=V2ChatStreamRequestResponseFormat(
-                    schema={"string": {"key": "value"}},
-                ),
+                response_format=ResponseFormat2_Text(),
                 max_tokens=1,
                 stop_sequences=["string"],
-                max_input_tokens=1,
                 temperature=1.1,
                 seed=1,
                 frequency_penalty=1.1,
                 presence_penalty=1.1,
-                k=1,
-                p=1,
+                k=1.1,
+                p=1.1,
                 return_prompt=True,
             )
             async for chunk in response:
@@ -632,13 +602,10 @@ async def main() -> None:
                 "model": model,
                 "messages": messages,
                 "tools": tools,
-                "tool_choice": tool_choice,
                 "citation_mode": citation_mode,
-                "truncation_mode": truncation_mode,
                 "response_format": response_format,
                 "max_tokens": max_tokens,
                 "stop_sequences": stop_sequences,
-                "max_input_tokens": max_input_tokens,
                 "temperature": temperature,
                 "seed": seed,
                 "frequency_penalty": frequency_penalty,
@@ -716,63 +683,84 @@ async def chat(
         model: str,
         messages: ChatMessages,
         tools: typing.Optional[typing.Sequence[Tool2]] = OMIT,
-        tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,
         citation_mode: typing.Optional[V2ChatRequestCitationMode] = OMIT,
-        truncation_mode: typing.Optional[V2ChatRequestTruncationMode] = OMIT,
-        response_format: typing.Optional[V2ChatRequestResponseFormat] = OMIT,
+        response_format: typing.Optional[ResponseFormat2] = OMIT,
         max_tokens: typing.Optional[int] = OMIT,
         stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
-        max_input_tokens: typing.Optional[int] = OMIT,
         temperature: typing.Optional[float] = OMIT,
         seed: typing.Optional[int] = OMIT,
         frequency_penalty: typing.Optional[float] = OMIT,
         presence_penalty: typing.Optional[float] = OMIT,
-        k: typing.Optional[int] = OMIT,
-        p: typing.Optional[int] = OMIT,
+        k: typing.Optional[float] = OMIT,
+        p: typing.Optional[float] = OMIT,
         return_prompt: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None
     ) -> NonStreamedChatResponse2:
         """
+        Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+
         Parameters
         ----------
         model : str
-            The model to use for the chat.
+            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
 
         messages : ChatMessages
 
         tools : typing.Optional[typing.Sequence[Tool2]]
+            A list of available tools (functions) that the model may suggest invoking before producing a text response.
+
+            When `tools` is passed (without `tool_results`), the `text` content in the response will be empty and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
 
-        tool_choice : typing.Optional[V2ChatRequestToolChoice]
 
         citation_mode : typing.Optional[V2ChatRequestCitationMode]
+            Defaults to `"accurate"`.
+            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
 
-        truncation_mode : typing.Optional[V2ChatRequestTruncationMode]
 
-        response_format : typing.Optional[V2ChatRequestResponseFormat]
+        response_format : typing.Optional[ResponseFormat2]
 
         max_tokens : typing.Optional[int]
-            The maximum number of tokens to generate.
+            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+
 
         stop_sequences : typing.Optional[typing.Sequence[str]]
-            A list of strings that the model will stop generating at.
+            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
 
-        max_input_tokens : typing.Optional[int]
-            The maximum number of tokens to feed into the model.
 
         temperature : typing.Optional[float]
-            The temperature of the model.
+            Defaults to `0.3`.
+
+            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
+
+            Randomness can be further maximized by increasing the  value of the `p` parameter.
+
 
         seed : typing.Optional[int]
+            If specified, the backend will make a best effort to sample tokens
+            deterministically, such that repeated requests with the same
+            seed and parameters should return the same result. However,
+            determinism cannot be totally guaranteed.
+
 
         frequency_penalty : typing.Optional[float]
-            The frequency penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
+
 
         presence_penalty : typing.Optional[float]
-            The presence penalty of the model.
+            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
+            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
+
+
+        k : typing.Optional[float]
+            Ensures only the top `k` most likely tokens are considered for generation at each step.
+            Defaults to `0`, min value of `0`, max value of `500`.
+
 
-        k : typing.Optional[int]
+        p : typing.Optional[float]
+            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 
-        p : typing.Optional[int]
 
         return_prompt : typing.Optional[bool]
             Whether to return the prompt in the response.
@@ -813,13 +801,10 @@ async def main() -> None:
                 "model": model,
                 "messages": messages,
                 "tools": tools,
-                "tool_choice": tool_choice,
                 "citation_mode": citation_mode,
-                "truncation_mode": truncation_mode,
                 "response_format": response_format,
                 "max_tokens": max_tokens,
                 "stop_sequences": stop_sequences,
-                "max_input_tokens": max_input_tokens,
                 "temperature": temperature,
                 "seed": seed,
                 "frequency_penalty": frequency_penalty,
diff --git a/src/cohere/v2/types/__init__.py b/src/cohere/v2/types/__init__.py
index adcfa0ae6..20f7a5f28 100644
--- a/src/cohere/v2/types/__init__.py
+++ b/src/cohere/v2/types/__init__.py
@@ -1,6 +1,13 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from .assistant_message import AssistantMessage
+from .assistant_message_content import AssistantMessageContent
+from .assistant_message_content_item import AssistantMessageContentItem, AssistantMessageContentItem_Text
+from .assistant_message_response import AssistantMessageResponse
+from .assistant_message_response_content_item import (
+    AssistantMessageResponseContentItem,
+    AssistantMessageResponseContentItem_Text,
+)
 from .chat_content_delta_event import ChatContentDeltaEvent
 from .chat_content_delta_event_delta import ChatContentDeltaEventDelta
 from .chat_content_delta_event_delta_message import ChatContentDeltaEventDeltaMessage
@@ -37,13 +44,14 @@
 from .chat_tool_plan_delta_event import ChatToolPlanDeltaEvent
 from .chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta
 from .citation import Citation
-from .content import Content, Content_Document, Content_Text
-from .document_content import DocumentContent
+from .content import Content, Content_Text
 from .document_source import DocumentSource
 from .non_streamed_chat_response2 import NonStreamedChatResponse2
 from .source import Source, Source_Document, Source_Tool
 from .streamed_chat_response2 import (
     StreamedChatResponse2,
+    StreamedChatResponse2_CitationEnd,
+    StreamedChatResponse2_CitationStart,
     StreamedChatResponse2_ContentDelta,
     StreamedChatResponse2_ContentEnd,
     StreamedChatResponse2_ContentStart,
@@ -72,16 +80,16 @@
 from .user_message import UserMessage
 from .user_message_content import UserMessageContent
 from .v2chat_request_citation_mode import V2ChatRequestCitationMode
-from .v2chat_request_response_format import V2ChatRequestResponseFormat
-from .v2chat_request_tool_choice import V2ChatRequestToolChoice
-from .v2chat_request_truncation_mode import V2ChatRequestTruncationMode
 from .v2chat_stream_request_citation_mode import V2ChatStreamRequestCitationMode
-from .v2chat_stream_request_response_format import V2ChatStreamRequestResponseFormat
-from .v2chat_stream_request_tool_choice import V2ChatStreamRequestToolChoice
-from .v2chat_stream_request_truncation_mode import V2ChatStreamRequestTruncationMode
 
 __all__ = [
     "AssistantMessage",
+    "AssistantMessageContent",
+    "AssistantMessageContentItem",
+    "AssistantMessageContentItem_Text",
+    "AssistantMessageResponse",
+    "AssistantMessageResponseContentItem",
+    "AssistantMessageResponseContentItem_Text",
     "ChatContentDeltaEvent",
     "ChatContentDeltaEventDelta",
     "ChatContentDeltaEventDeltaMessage",
@@ -117,15 +125,15 @@
     "ChatToolPlanDeltaEventDelta",
     "Citation",
     "Content",
-    "Content_Document",
     "Content_Text",
-    "DocumentContent",
     "DocumentSource",
     "NonStreamedChatResponse2",
     "Source",
     "Source_Document",
     "Source_Tool",
     "StreamedChatResponse2",
+    "StreamedChatResponse2_CitationEnd",
+    "StreamedChatResponse2_CitationStart",
     "StreamedChatResponse2_ContentDelta",
     "StreamedChatResponse2_ContentEnd",
     "StreamedChatResponse2_ContentStart",
@@ -155,11 +163,5 @@
     "UserMessage",
     "UserMessageContent",
     "V2ChatRequestCitationMode",
-    "V2ChatRequestResponseFormat",
-    "V2ChatRequestToolChoice",
-    "V2ChatRequestTruncationMode",
     "V2ChatStreamRequestCitationMode",
-    "V2ChatStreamRequestResponseFormat",
-    "V2ChatStreamRequestToolChoice",
-    "V2ChatStreamRequestTruncationMode",
 ]
diff --git a/src/cohere/v2/types/assistant_message.py b/src/cohere/v2/types/assistant_message.py
index 90d11b7ce..3e36c59be 100644
--- a/src/cohere/v2/types/assistant_message.py
+++ b/src/cohere/v2/types/assistant_message.py
@@ -6,8 +6,8 @@
 
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 from ...core.unchecked_base_model import UncheckedBaseModel
+from .assistant_message_content import AssistantMessageContent
 from .citation import Citation
-from .text_content import TextContent
 from .tool_call2 import ToolCall2
 
 
@@ -18,7 +18,7 @@ class AssistantMessage(UncheckedBaseModel):
 
     tool_calls: typing.Optional[typing.List[ToolCall2]] = None
     tool_plan: typing.Optional[str] = None
-    content: typing.Optional[typing.List[TextContent]] = None
+    content: typing.Optional[AssistantMessageContent] = None
     citations: typing.Optional[typing.List[Citation]] = None
 
     if IS_PYDANTIC_V2:
diff --git a/src/cohere/v2/types/assistant_message_content.py b/src/cohere/v2/types/assistant_message_content.py
new file mode 100644
index 000000000..20fc90cd5
--- /dev/null
+++ b/src/cohere/v2/types/assistant_message_content.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from .assistant_message_content_item import AssistantMessageContentItem
+
+AssistantMessageContent = typing.Union[str, typing.List[AssistantMessageContentItem]]
diff --git a/src/cohere/v2/types/assistant_message_content_item.py b/src/cohere/v2/types/assistant_message_content_item.py
new file mode 100644
index 000000000..ed1456255
--- /dev/null
+++ b/src/cohere/v2/types/assistant_message_content_item.py
@@ -0,0 +1,30 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import typing
+
+import pydantic
+import typing_extensions
+
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel, UnionMetadata
+
+
+class AssistantMessageContentItem_Text(UncheckedBaseModel):
+    text: str
+    type: typing.Literal["text"] = "text"
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+AssistantMessageContentItem = typing_extensions.Annotated[
+    AssistantMessageContentItem_Text, UnionMetadata(discriminant="type")
+]
diff --git a/src/cohere/v2/types/assistant_message_response.py b/src/cohere/v2/types/assistant_message_response.py
new file mode 100644
index 000000000..d143e1353
--- /dev/null
+++ b/src/cohere/v2/types/assistant_message_response.py
@@ -0,0 +1,32 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+from .assistant_message_response_content_item import AssistantMessageResponseContentItem
+from .citation import Citation
+from .tool_call2 import ToolCall2
+
+
+class AssistantMessageResponse(UncheckedBaseModel):
+    """
+    A message from the assistant role can contain text and tool call information.
+    """
+
+    role: typing.Literal["assistant"] = "assistant"
+    tool_calls: typing.Optional[typing.List[ToolCall2]] = None
+    tool_plan: typing.Optional[str] = None
+    content: typing.Optional[typing.List[AssistantMessageResponseContentItem]] = None
+    citations: typing.Optional[typing.List[Citation]] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/assistant_message_response_content_item.py b/src/cohere/v2/types/assistant_message_response_content_item.py
new file mode 100644
index 000000000..8bcd745f0
--- /dev/null
+++ b/src/cohere/v2/types/assistant_message_response_content_item.py
@@ -0,0 +1,30 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import typing
+
+import pydantic
+import typing_extensions
+
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel, UnionMetadata
+
+
+class AssistantMessageResponseContentItem_Text(UncheckedBaseModel):
+    text: str
+    type: typing.Literal["text"] = "text"
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+AssistantMessageResponseContentItem = typing_extensions.Annotated[
+    AssistantMessageResponseContentItem_Text, UnionMetadata(discriminant="type")
+]
diff --git a/src/cohere/v2/types/chat_message2.py b/src/cohere/v2/types/chat_message2.py
index 93529b40c..7c11d2b56 100644
--- a/src/cohere/v2/types/chat_message2.py
+++ b/src/cohere/v2/types/chat_message2.py
@@ -9,24 +9,23 @@
 
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 from ...core.unchecked_base_model import UncheckedBaseModel, UnionMetadata
+from ...types.chat_document import ChatDocument
+from .assistant_message_content import AssistantMessageContent
 from .citation import Citation
 from .system_message_content import SystemMessageContent
-from .text_content import TextContent
 from .tool_call2 import ToolCall2
 from .tool_message2tool_content_item import ToolMessage2ToolContentItem
 from .user_message_content import UserMessageContent
 
 
-class ChatMessage2_Assistant(UncheckedBaseModel):
+class ChatMessage2_User(UncheckedBaseModel):
     """
     Represents a single message in the chat history from a given role.
     """
 
-    tool_calls: typing.Optional[typing.List[ToolCall2]] = None
-    tool_plan: typing.Optional[str] = None
-    content: typing.Optional[typing.List[TextContent]] = None
-    citations: typing.Optional[typing.List[Citation]] = None
-    role: typing.Literal["assistant"] = "assistant"
+    content: UserMessageContent
+    documents: typing.Optional[typing.List[ChatDocument]] = None
+    role: typing.Literal["user"] = "user"
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
@@ -38,13 +37,16 @@ class Config:
             extra = pydantic.Extra.allow
 
 
-class ChatMessage2_System(UncheckedBaseModel):
+class ChatMessage2_Assistant(UncheckedBaseModel):
     """
     Represents a single message in the chat history from a given role.
     """
 
-    content: SystemMessageContent
-    role: typing.Literal["system"] = "system"
+    tool_calls: typing.Optional[typing.List[ToolCall2]] = None
+    tool_plan: typing.Optional[str] = None
+    content: typing.Optional[AssistantMessageContent] = None
+    citations: typing.Optional[typing.List[Citation]] = None
+    role: typing.Literal["assistant"] = "assistant"
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
@@ -56,13 +58,13 @@ class Config:
             extra = pydantic.Extra.allow
 
 
-class ChatMessage2_User(UncheckedBaseModel):
+class ChatMessage2_System(UncheckedBaseModel):
     """
     Represents a single message in the chat history from a given role.
     """
 
-    content: UserMessageContent
-    role: typing.Literal["user"] = "user"
+    content: SystemMessageContent
+    role: typing.Literal["system"] = "system"
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
@@ -94,6 +96,6 @@ class Config:
 
 
 ChatMessage2 = typing_extensions.Annotated[
-    typing.Union[ChatMessage2_Assistant, ChatMessage2_System, ChatMessage2_User, ChatMessage2_Tool],
+    typing.Union[ChatMessage2_User, ChatMessage2_Assistant, ChatMessage2_System, ChatMessage2_Tool],
     UnionMetadata(discriminant="role"),
 ]
diff --git a/src/cohere/v2/types/citation.py b/src/cohere/v2/types/citation.py
index e0b7e61a4..6fe5e5aad 100644
--- a/src/cohere/v2/types/citation.py
+++ b/src/cohere/v2/types/citation.py
@@ -14,8 +14,8 @@ class Citation(UncheckedBaseModel):
     Citation information containing sources and the text cited.
     """
 
-    start: typing.Optional[str] = None
-    end: typing.Optional[str] = None
+    start: typing.Optional[int] = None
+    end: typing.Optional[int] = None
     text: typing.Optional[str] = None
     sources: typing.Optional[typing.List[Source]] = None
 
diff --git a/src/cohere/v2/types/content.py b/src/cohere/v2/types/content.py
index 281637fb5..655c75021 100644
--- a/src/cohere/v2/types/content.py
+++ b/src/cohere/v2/types/content.py
@@ -29,23 +29,4 @@ class Config:
             extra = pydantic.Extra.allow
 
 
-class Content_Document(UncheckedBaseModel):
-    """
-    A Content block which contains information about the content type and the content itself.
-    """
-
-    id: str
-    document: typing.Dict[str, typing.Any]
-    type: typing.Literal["document"] = "document"
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-Content = typing_extensions.Annotated[typing.Union[Content_Text, Content_Document], UnionMetadata(discriminant="type")]
+Content = typing_extensions.Annotated[Content_Text, UnionMetadata(discriminant="type")]
diff --git a/src/cohere/v2/types/non_streamed_chat_response2.py b/src/cohere/v2/types/non_streamed_chat_response2.py
index 303549dc4..de8799a02 100644
--- a/src/cohere/v2/types/non_streamed_chat_response2.py
+++ b/src/cohere/v2/types/non_streamed_chat_response2.py
@@ -6,7 +6,7 @@
 
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 from ...core.unchecked_base_model import UncheckedBaseModel
-from .assistant_message import AssistantMessage
+from .assistant_message_response import AssistantMessageResponse
 from .chat_finish_reason import ChatFinishReason
 from .usage import Usage
 
@@ -23,7 +23,7 @@ class NonStreamedChatResponse2(UncheckedBaseModel):
     The prompt that was used. Only present when `return_prompt` in the request is set to true.
     """
 
-    message: typing.Optional[AssistantMessage] = None
+    message: typing.Optional[AssistantMessageResponse] = None
     usage: typing.Optional[Usage] = None
 
     if IS_PYDANTIC_V2:
diff --git a/src/cohere/v2/types/streamed_chat_response2.py b/src/cohere/v2/types/streamed_chat_response2.py
index 76232e2f7..212211154 100644
--- a/src/cohere/v2/types/streamed_chat_response2.py
+++ b/src/cohere/v2/types/streamed_chat_response2.py
@@ -9,6 +9,7 @@
 
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 from ...core.unchecked_base_model import UncheckedBaseModel, UnionMetadata
+from ...types.citation_start_event_delta import CitationStartEventDelta
 from .chat_content_delta_event_delta import ChatContentDeltaEventDelta
 from .chat_content_start_event_delta import ChatContentStartEventDelta
 from .chat_message_end_event_delta import ChatMessageEndEventDelta
@@ -167,6 +168,43 @@ class Config:
             extra = pydantic.Extra.allow
 
 
+class StreamedChatResponse2_CitationStart(UncheckedBaseModel):
+    """
+    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
+    """
+
+    index: typing.Optional[int] = None
+    delta: typing.Optional[CitationStartEventDelta] = None
+    type: typing.Literal["citation-start"] = "citation-start"
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+class StreamedChatResponse2_CitationEnd(UncheckedBaseModel):
+    """
+    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
+    """
+
+    index: typing.Optional[int] = None
+    type: typing.Literal["citation-end"] = "citation-end"
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
 class StreamedChatResponse2_MessageEnd(UncheckedBaseModel):
     """
     StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).
@@ -196,6 +234,8 @@ class Config:
         StreamedChatResponse2_ToolCallStart,
         StreamedChatResponse2_ToolCallDelta,
         StreamedChatResponse2_ToolCallEnd,
+        StreamedChatResponse2_CitationStart,
+        StreamedChatResponse2_CitationEnd,
         StreamedChatResponse2_MessageEnd,
     ],
     UnionMetadata(discriminant="type"),
diff --git a/src/cohere/v2/types/tool2.py b/src/cohere/v2/types/tool2.py
index 97a2d49c6..720602983 100644
--- a/src/cohere/v2/types/tool2.py
+++ b/src/cohere/v2/types/tool2.py
@@ -11,7 +11,10 @@
 
 class Tool2(UncheckedBaseModel):
     type: typing.Optional[typing.Literal["function"]] = None
-    function: typing.Optional[Tool2Function] = None
+    function: typing.Optional[Tool2Function] = pydantic.Field(default=None)
+    """
+    The function to be executed.
+    """
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/v2/types/tool2function.py b/src/cohere/v2/types/tool2function.py
index 328c4ff96..ef81e03b6 100644
--- a/src/cohere/v2/types/tool2function.py
+++ b/src/cohere/v2/types/tool2function.py
@@ -9,8 +9,20 @@
 
 
 class Tool2Function(UncheckedBaseModel):
-    name: typing.Optional[str] = None
-    description: typing.Optional[str] = None
+    """
+    The function to be executed.
+    """
+
+    name: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    The name of the function.
+    """
+
+    description: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    The description of the function.
+    """
+
     parameters: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
     """
     The parameters of the function as a JSON schema.
diff --git a/src/cohere/v2/types/user_message.py b/src/cohere/v2/types/user_message.py
index 40cdbb540..3c0b41439 100644
--- a/src/cohere/v2/types/user_message.py
+++ b/src/cohere/v2/types/user_message.py
@@ -6,6 +6,7 @@
 
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 from ...core.unchecked_base_model import UncheckedBaseModel
+from ...types.chat_document import ChatDocument
 from .user_message_content import UserMessageContent
 
 
@@ -14,7 +15,16 @@ class UserMessage(UncheckedBaseModel):
     A message from the user.
     """
 
-    content: UserMessageContent
+    content: UserMessageContent = pydantic.Field()
+    """
+    The content of the message. This can be a string or a list of content blocks.
+    If a string is provided, it will be treated as a text content block.
+    """
+
+    documents: typing.Optional[typing.List[ChatDocument]] = pydantic.Field(default=None)
+    """
+    Documents seen by the model when generating the reply.
+    """
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
diff --git a/src/cohere/v2/types/v2chat_request_citation_mode.py b/src/cohere/v2/types/v2chat_request_citation_mode.py
index 4acfc4ea3..9b5dc7d5a 100644
--- a/src/cohere/v2/types/v2chat_request_citation_mode.py
+++ b/src/cohere/v2/types/v2chat_request_citation_mode.py
@@ -2,4 +2,4 @@
 
 import typing
 
-V2ChatRequestCitationMode = typing.Union[typing.Literal["FAST", "ACCURATE"], typing.Any]
+V2ChatRequestCitationMode = typing.Union[typing.Literal["FAST", "ACCURATE", "OFF"], typing.Any]
diff --git a/src/cohere/v2/types/v2chat_request_response_format.py b/src/cohere/v2/types/v2chat_request_response_format.py
deleted file mode 100644
index a3fd4508f..000000000
--- a/src/cohere/v2/types/v2chat_request_response_format.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-import pydantic
-
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-from ...core.unchecked_base_model import UncheckedBaseModel
-
-
-class V2ChatRequestResponseFormat(UncheckedBaseModel):
-    type: typing.Optional[typing.Literal["json_object"]] = pydantic.Field(default=None)
-    """
-    When set to JSON, the output will be parse-able valid JSON (or run out of context).
-    """
-
-    schema_: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(alias="schema", default=None)
-    """
-    A JSON schema object that the output will adhere to. Refer to https://json-schema.org/ for reference about schemas.
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/v2chat_request_tool_choice.py b/src/cohere/v2/types/v2chat_request_tool_choice.py
deleted file mode 100644
index e87f63c60..000000000
--- a/src/cohere/v2/types/v2chat_request_tool_choice.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-V2ChatRequestToolChoice = typing.Union[typing.Literal["AUTO", "NONE", "ANY"], typing.Any]
diff --git a/src/cohere/v2/types/v2chat_request_truncation_mode.py b/src/cohere/v2/types/v2chat_request_truncation_mode.py
deleted file mode 100644
index 2711da011..000000000
--- a/src/cohere/v2/types/v2chat_request_truncation_mode.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-V2ChatRequestTruncationMode = typing.Union[typing.Literal["OFF", "AUTO", "AUTO_PRESERVE_ORDER"], typing.Any]
diff --git a/src/cohere/v2/types/v2chat_stream_request_citation_mode.py b/src/cohere/v2/types/v2chat_stream_request_citation_mode.py
index a910488f1..2e07c9ebe 100644
--- a/src/cohere/v2/types/v2chat_stream_request_citation_mode.py
+++ b/src/cohere/v2/types/v2chat_stream_request_citation_mode.py
@@ -2,4 +2,4 @@
 
 import typing
 
-V2ChatStreamRequestCitationMode = typing.Union[typing.Literal["FAST", "ACCURATE"], typing.Any]
+V2ChatStreamRequestCitationMode = typing.Union[typing.Literal["FAST", "ACCURATE", "OFF"], typing.Any]
diff --git a/src/cohere/v2/types/v2chat_stream_request_response_format.py b/src/cohere/v2/types/v2chat_stream_request_response_format.py
deleted file mode 100644
index b02e38fd4..000000000
--- a/src/cohere/v2/types/v2chat_stream_request_response_format.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-import pydantic
-
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
-from ...core.unchecked_base_model import UncheckedBaseModel
-
-
-class V2ChatStreamRequestResponseFormat(UncheckedBaseModel):
-    type: typing.Optional[typing.Literal["json_object"]] = pydantic.Field(default=None)
-    """
-    When set to JSON, the output will be parse-able valid JSON (or run out of context).
-    """
-
-    schema_: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(alias="schema", default=None)
-    """
-    A JSON schema object that the output will adhere to. Refer to https://json-schema.org/ for reference about schemas.
-    """
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
diff --git a/src/cohere/v2/types/v2chat_stream_request_tool_choice.py b/src/cohere/v2/types/v2chat_stream_request_tool_choice.py
deleted file mode 100644
index 156c0e62c..000000000
--- a/src/cohere/v2/types/v2chat_stream_request_tool_choice.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-V2ChatStreamRequestToolChoice = typing.Union[typing.Literal["AUTO", "NONE", "ANY"], typing.Any]
diff --git a/src/cohere/v2/types/v2chat_stream_request_truncation_mode.py b/src/cohere/v2/types/v2chat_stream_request_truncation_mode.py
deleted file mode 100644
index 6cf1bd5d5..000000000
--- a/src/cohere/v2/types/v2chat_stream_request_truncation_mode.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-V2ChatStreamRequestTruncationMode = typing.Union[typing.Literal["OFF", "AUTO", "AUTO_PRESERVE_ORDER"], typing.Any]