Merge branch 'main' into vision_peft

pytorch · Nov 11, 2024 · 83820cf · 83820cf
2 parents 6be3747 + e1caa9f
commit 83820cf
Show file tree

Hide file tree

Showing 190 changed files with 6,565 additions and 1,456 deletions.
diff --git a/.github/workflows/regression_test.yaml b/.github/workflows/regression_test.yaml
@@ -26,6 +26,8 @@ jobs:
         python-version: ['3.11']
         torch-version: ["stable", "nightly"]
       fail-fast: false
+    env:
+      PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
     steps:
       - name: Check out repo
         uses: actions/checkout@v3

diff --git a/README.md b/README.md
@@ -44,6 +44,7 @@ torchtune currently supports the following models.
 | [Code-Llama2](https://ai.meta.com/blog/code-llama-large-language-model-coding/)   | 7B, 13B, 70B [[models](torchtune/models/code_llama2/_model_builders.py), [configs](recipes/configs/code_llama2/)] |
 | [Mistral](https://huggingface.co/mistralai)   | 7B [[models](torchtune/models/mistral/_model_builders.py), [configs](recipes/configs/mistral/)] |
 | [Gemma](https://huggingface.co/collections/google/gemma-release-65d5efbccdbb8c4202ec078b)   | 2B, 7B [[models](torchtune/models/gemma/_model_builders.py), [configs](recipes/configs/gemma/)] |
+| [Gemma2](https://huggingface.co/docs/transformers/main/en/model_doc/gemma2)   | 2B, 9B, 27B [[models](torchtune/models/gemma2/_model_builders.py), [configs](recipes/configs/gemma2/)] |
 | [Microsoft Phi3](https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3) | Mini [[models](torchtune/models/phi3/), [configs](recipes/configs/phi3/)]
 | [Qwen2](https://qwenlm.github.io/blog/qwen2/) | 0.5B, 1.5B, 7B [[models](torchtune/models/qwen2/), [configs](recipes/configs/qwen2/)]
 

diff --git a/docs/source/api_ref_datasets.rst b/docs/source/api_ref_datasets.rst
@@ -37,6 +37,7 @@ Image + Text datasets
 
     multimodal.llava_instruct_dataset
     multimodal.the_cauldron_dataset
+    multimodal.vqa_dataset
 
 .. _dataset_builders:
 

diff --git a/docs/source/api_ref_models.rst b/docs/source/api_ref_models.rst
@@ -208,6 +208,47 @@ To download the CodeLlama-7B model:
     code_llama2.lora_code_llama2_70b
     code_llama2.qlora_code_llama2_70b
 
+qwen-2.5
+--------
+
+Models of size 0.5B, 1.5B, 3B, 7B, 14B, 32B, 72B from the `Qwen2.5 family <https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e>`_.
+
+To download the Qwen2.5 1.5B model, for example:
+
+.. code-block:: bash
+
+    tune download Qwen/Qwen2.5-1.5B-Instruct --output-dir /tmp/Qwen2_5-1_5B-Instruct --ignore-patterns None
+
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+
+    qwen2_5.qwen2_5_0_5b
+    qwen2_5.lora_qwen2_5_0_5b
+    qwen2_5.qwen2_5_1_5b_base
+    qwen2_5.qwen2_5_1_5b_instruct
+    qwen2_5.lora_qwen2_5_1_5b_base
+    qwen2_5.lora_qwen2_5_1_5b_instruct
+    qwen2_5.qwen2_5_3b
+    qwen2_5.lora_qwen2_5_3b
+    qwen2_5.qwen2_5_7b_base
+    qwen2_5.qwen2_5_7b_instruct
+    qwen2_5.lora_qwen2_5_7b_base
+    qwen2_5.lora_qwen2_5_7b_instruct
+    qwen2_5.qwen2_5_14b_base
+    qwen2_5.qwen2_5_14b_instruct
+    qwen2_5.lora_qwen2_5_14b_base
+    qwen2_5.lora_qwen2_5_14b_instruct
+    qwen2_5.qwen2_5_32b_base
+    qwen2_5.qwen2_5_32b_instruct
+    qwen2_5.lora_qwen2_5_32b_base
+    qwen2_5.lora_qwen2_5_32b_instruct
+    qwen2_5.qwen2_5_72b_base
+    qwen2_5.qwen2_5_72b_instruct
+    qwen2_5.lora_qwen2_5_72b_base
+    qwen2_5.lora_qwen2_5_72b_instruct
+    qwen2_5.qwen2_5_tokenizer
+
 qwen-2
 ------
 
@@ -225,12 +266,12 @@ To download the Qwen2 1.5B model, for example:
 
     qwen2.qwen2
     qwen2.lora_qwen2
-    qwen2.qwen2_7b
     qwen2.qwen2_0_5b
-    qwen2.qwen2_1_5b
-    qwen2.lora_qwen2_7b
     qwen2.lora_qwen2_0_5b
+    qwen2.qwen2_1_5b
     qwen2.lora_qwen2_1_5b
+    qwen2.qwen2_7b
+    qwen2.lora_qwen2_7b
     qwen2.qwen2_tokenizer
 
 phi-3
@@ -320,8 +361,39 @@ To download the Gemma 7B model:
     gemma.gemma_tokenizer
 
 
+gemma2 :
+--------
+
+Models of size 2B, 9B, 27B from the `Gemma family <https://blog.google/technology/developers/gemma-open-models/>`_.
+
+Important: You need to request access on `Hugging Face <https://huggingface.co/google/gemma-2-2b>`__ to use this model.
+
+To download the Gemma2 2B, 9B, 27B models :
+
+.. code-block:: bash
+
+    tune download google/gemma-2-<MODEL_SIZE>b --ignore-patterns "gemma-2-<MODEL_SIZE>b.gguf"  --hf-token <HF_TOKEN>
+
+
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+
+    gemma2.gemma2
+    gemma2.lora_gemma2
+    gemma2.gemma2_2b
+    gemma2.lora_gemma2_2b
+    gemma2.qlora_gemma2_2b
+    gemma2.gemma2_9b
+    gemma2.lora_gemma2_9b
+    gemma2.qlora_gemma2_9b
+    gemma2.gemma2_27b
+    gemma2.lora_gemma2_27b
+    gemma2.qlora_gemma2_27b
+    gemma.gemma_tokenizer
+
 clip
------
+----
 
 Vision components to support multimodality using `CLIP encoder <https://arxiv.org/abs/2103.00020>`_.
 

diff --git a/docs/source/api_ref_modules.rst b/docs/source/api_ref_modules.rst
@@ -75,6 +75,7 @@ PEFT Components
     peft.AdapterModule
     peft.get_adapter_params
     peft.set_trainable_params
+    peft.get_adapter_state_dict
     peft.validate_missing_and_unexpected_for_lora
     peft.validate_state_dict_for_lora
     peft.disable_adapter

diff --git a/docs/source/api_ref_training.rst b/docs/source/api_ref_training.rst
@@ -56,6 +56,7 @@ Utilities for enabling and working with distributed training.
     get_world_size_and_rank
     get_full_finetune_fsdp_wrap_policy
     lora_fsdp_wrap_policy
+    gather_cpu_state_dict
 
 .. _ac_label:
 

diff --git a/docs/source/deep_dives/checkpointer.rst b/docs/source/deep_dives/checkpointer.rst
@@ -443,7 +443,7 @@ For this section we'll use the Llama2 13B model in HF format.
         checkpoint_dir=checkpoint_dir,
         checkpoint_files=pytorch_files,
         output_dir=checkpoint_dir,
-        model_type=ModelType.LLAMA2
+        model_type="LLAMA2"
     )
     torchtune_sd = checkpointer.load_checkpoint()