diff --git a/README.md b/README.md index 2585ef05c..d92290062 100644 --- a/README.md +++ b/README.md @@ -32,10 +32,6 @@ Choose your path: - **PAI-DSW**: [Llama3 Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) - **Amazon SageMaker**: [Blog](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) -Recent activities: - -- **2024/10/18-2024/11/30**: Build a personal tour guide bot using PAI+LLaMA Factory. [[website]](https://developer.aliyun.com/topic/llamafactory2) - > [!NOTE] > Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them. @@ -206,7 +202,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | | [OLMo](https://huggingface.co/allenai) | 1B/7B | - | -| [PaliGemma](https://huggingface.co/google) | 3B | paligemma | +| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | | [Phi-3](https://huggingface.co/microsoft) | 4B/14B | phi | | [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small | @@ -215,7 +211,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | -| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/12B/35B/115B | telechat2 | +| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi | | [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | diff --git a/README_zh.md b/README_zh.md index 5db914f9d..4d7ed1290 100644 --- a/README_zh.md +++ b/README_zh.md @@ -33,10 +33,6 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 - **PAI-DSW**:[Llama3 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) - **Amazon SageMaker**:[博客](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) -近期活动: - -- **2024/10/18-2024/11/30**:使用 PAI+LLaMA Factory 构建个性化导游机器人。[[活动页面]](https://developer.aliyun.com/topic/llamafactory2) - > [!NOTE] > 除上述链接以外的其他网站均为未经许可的第三方网站,请小心甄别。 @@ -207,15 +203,16 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | | [OLMo](https://huggingface.co/allenai) | 1B/7B | - | -| [PaliGemma](https://huggingface.co/google) | 3B | paligemma | +| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | -| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | +| [Phi-3](https://huggingface.co/microsoft) | 4B/14B | phi | +| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | -| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/12B/35B/115B | telechat2 | +| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai) | 1.5B/6B/9B/34B | yi | | [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index f777ac118..73f620963 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1159,7 +1159,6 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args: default_system=( "你是中国电信星辰语义大模型,英文名是TeleChat,你是由中电信人工智能科技有限公司和中国电信人工智能研究院(TeleAI)研发的人工智能助手。" ), - replace_jinja_template=False, ) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 483f2ec2e..d36258367 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -105,7 +105,7 @@ def register_model_group( ) -> None: for name, path in models.items(): SUPPORTED_MODELS[name] = path - if template is not None and any(suffix in name for suffix in ("-Chat", "-Instruct")): + if template is not None and (any(suffix in name for suffix in ("-Chat", "-Instruct")) or vision): DEFAULT_TEMPLATE[name] = template if vision: VISION_MODELS.add(name) @@ -848,10 +848,18 @@ def register_model_group( register_model_group( models={ + "Llama-3.2-11B-Vision": { + DownloadSource.DEFAULT: "meta-llama/Llama-3.2-11B-Vision", + DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-11B-Vision", + }, "Llama-3.2-11B-Vision-Instruct": { DownloadSource.DEFAULT: "meta-llama/Llama-3.2-11B-Vision-Instruct", DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-11B-Vision-Instruct", }, + "Llama-3.2-90B-Vision": { + DownloadSource.DEFAULT: "meta-llama/Llama-3.2-90B-Vision", + DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-90B-Vision", + }, "Llama-3.2-90B-Vision-Instruct": { DownloadSource.DEFAULT: "meta-llama/Llama-3.2-90B-Vision-Instruct", DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-90B-Vision-Instruct", @@ -1175,23 +1183,23 @@ def register_model_group( register_model_group( models={ - "PaliGemma-3B-pt-224-Chat": { + "PaliGemma-3B-pt-224": { DownloadSource.DEFAULT: "google/paligemma-3b-pt-224", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-224", }, - "PaliGemma-3B-pt-448-Chat": { + "PaliGemma-3B-pt-448": { DownloadSource.DEFAULT: "google/paligemma-3b-pt-448", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-448", }, - "PaliGemma-3B-pt-896-Chat": { + "PaliGemma-3B-pt-896": { DownloadSource.DEFAULT: "google/paligemma-3b-pt-896", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-pt-896", }, - "PaliGemma-3B-mix-224-Chat": { + "PaliGemma-3B-mix-224": { DownloadSource.DEFAULT: "google/paligemma-3b-mix-224", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-mix-224", }, - "PaliGemma-3B-mix-448-Chat": { + "PaliGemma-3B-mix-448": { DownloadSource.DEFAULT: "google/paligemma-3b-mix-448", DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma-3b-mix-448", }, @@ -1201,6 +1209,43 @@ def register_model_group( ) +register_model_group( + models={ + "PaliGemma2-3B-pt-224": { + DownloadSource.DEFAULT: "google/paligemma2-3b-pt-224", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-3b-pt-224", + }, + "PaliGemma2-3B-pt-448": { + DownloadSource.DEFAULT: "google/paligemma2-3b-pt-448", + }, + "PaliGemma2-3B-pt-896": { + DownloadSource.DEFAULT: "google/paligemma2-3b-pt-896", + }, + "PaliGemma2-10B-pt-224": { + DownloadSource.DEFAULT: "google/paligemma2-10b-pt-224", + }, + "PaliGemma2-10B-pt-448": { + DownloadSource.DEFAULT: "google/paligemma2-10b-pt-448", + }, + "PaliGemma2-10B-pt-896": { + DownloadSource.DEFAULT: "google/paligemma2-10b-pt-896", + DownloadSource.MODELSCOPE: "AI-ModelScope/paligemma2-10b-pt-896", + }, + "PaliGemma2-28B-pt-224": { + DownloadSource.DEFAULT: "google/paligemma2-28b-pt-224", + }, + "PaliGemma2-28B-pt-448": { + DownloadSource.DEFAULT: "google/paligemma2-28b-pt-448", + }, + "PaliGemma2-28B-pt-896": { + DownloadSource.DEFAULT: "google/paligemma2-28b-pt-896", + }, + }, + template="paligemma", + vision=True, +) + + register_model_group( models={ "Phi-1.5-1.3B": { @@ -1255,7 +1300,7 @@ def register_model_group( register_model_group( models={ - "Pixtral-12B-Chat": { + "Pixtral-12B-Instruct": { DownloadSource.DEFAULT: "mistral-community/pixtral-12b", DownloadSource.MODELSCOPE: "AI-ModelScope/pixtral-12b", } @@ -1958,10 +2003,13 @@ def register_model_group( DownloadSource.OPENMIND: "TeleAI/TeleChat-7B-pt", }, "TeleChat-12B-Chat": { - DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B", - DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B", + DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2", + DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2", DownloadSource.OPENMIND: "TeleAI/TeleChat-12B-pt", - } + }, + "TeleChat-52B-Chat": { + DownloadSource.DEFAULT: "Tele-AI/TeleChat-52B", + }, }, template="telechat", ) @@ -1977,13 +2025,8 @@ def register_model_group( DownloadSource.DEFAULT: "Tele-AI/TeleChat2-7B", DownloadSource.MODELSCOPE: "TeleAI/TeleChat2-7B", }, - "TeleChat2-12B-Chat": { - DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2", - DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2", - }, "TeleChat2-35B-Chat": { - DownloadSource.DEFAULT: "Tele-AI/TeleChat2-35B", - DownloadSource.MODELSCOPE: "TeleAI/TeleChat2-35B", + DownloadSource.MODELSCOPE: "TeleAI/TeleChat2-35B-Nov", }, "TeleChat2-115B-Chat": { DownloadSource.DEFAULT: "Tele-AI/TeleChat2-115B", diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 7e2d5bb98..45b847b4c 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -30,15 +30,19 @@ "model_name": { "en": { "label": "Model name", + "info": "Input the name prefix to search for the model.", }, "ru": { "label": "Название модели", + "info": "Введите префикс имени для поиска модели.", }, "zh": { "label": "模型名称", + "info": "输入首单词以检索模型。", }, "ko": { "label": "모델 이름", + "info": "모델을 검색하기 위해 이름 접두어를 입력하세요.", }, }, "model_path": {