From 6ce9bb743f21c4d17c177da7477a5212a9a1f0f9 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Thu, 21 Nov 2024 13:49:31 -0800 Subject: [PATCH] Bump ubuntu 22.04 + torch 2.5.1 (#1666) Signed-off-by: dependabot[bot] Co-authored-by: Chuck Tang Co-authored-by: v-chen_data Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Charles Tang --- .github/workflows/docker.yaml | 6 ------ .github/workflows/pr-cpu.yaml | 4 ++-- .github/workflows/pr-gpu.yaml | 12 ++++++------ .github/workflows/release.yaml | 4 ++-- README.md | 12 ++++++------ setup.py | 2 +- 6 files changed, 17 insertions(+), 23 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index b548f34234..39043ef92a 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -17,12 +17,6 @@ jobs: strategy: matrix: include: - - name: "2.4.0_cu124" - base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04 - dep_groups: "[all]" - - name: "2.4.0_cu124_aws" - base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws - dep_groups: "[all]" - name: "2.5.1_cu124" base_image: mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04 dep_groups: "[all]" diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml index 056b070143..c500df9cd7 100644 --- a/.github/workflows/pr-cpu.yaml +++ b/.github/workflows/pr-cpu.yaml @@ -21,9 +21,9 @@ jobs: strategy: matrix: include: - - name: "cpu-2.4.0" + - name: "cpu-2.5.1" pip_deps: "[all-cpu]" - container: mosaicml/pytorch:2.4.0_cpu-python3.11-ubuntu20.04 + container: mosaicml/pytorch:2.5.1_cpu-python3.11-ubuntu22.04 markers: "not gpu" pytest_command: "coverage run -m pytest" steps: diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml index 5b91d54442..cdfc4b7b64 100644 --- a/.github/workflows/pr-gpu.yaml +++ b/.github/workflows/pr-gpu.yaml @@ -22,8 +22,8 @@ jobs: fail-fast: false matrix: include: - - name: "gpu-2.4.0-1" - container: mosaicml/llm-foundry:2.4.0_cu124-latest + - name: "gpu-2.5.1-1" + container: mosaicml/llm-foundry:2.5.1_cu124-latest markers: "gpu" pip_deps: "[all]" pytest_command: "coverage run -m pytest" @@ -51,8 +51,8 @@ jobs: fail-fast: false matrix: include: - - name: "gpu-2.4.0-2" - container: mosaicml/llm-foundry:2.4.0_cu124-latest + - name: "gpu-2.5.1-2" + container: mosaicml/llm-foundry:2.5.1_cu124-latest markers: "gpu" pip_deps: "[all]" pytest_command: "coverage run -m pytest" @@ -80,8 +80,8 @@ jobs: fail-fast: false matrix: include: - - name: "gpu-2.4.0-4" - container: mosaicml/llm-foundry:2.4.0_cu124-latest + - name: "gpu-2.5.1-4" + container: mosaicml/llm-foundry:2.5.1_cu124-latest markers: "gpu" pip_deps: "[all]" pytest_command: "coverage run -m pytest" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 15c83035e0..19bb050eb7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -93,7 +93,7 @@ jobs: ${{ env.AWS_DOCKER_TAG }} ${{ env.AWS_LATEST_TAG }} build-args: | - BASE_IMAGE=mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws + BASE_IMAGE=mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04-aws BRANCH_NAME=${{ env.BRANCH_NAME }} DEP_GROUPS=[all] KEEP_FOUNDRY=true @@ -108,7 +108,7 @@ jobs: ${{ env.DOCKER_TAG }} ${{ env.LATEST_TAG }} build-args: | - BASE_IMAGE=mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04 + BASE_IMAGE=mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04 BRANCH_NAME=${{ env.BRANCH_NAME }} DEP_GROUPS=[all] KEEP_FOUNDRY=true diff --git a/README.md b/README.md index bc4eff48fd..b62e4c2bca 100644 --- a/README.md +++ b/README.md @@ -113,8 +113,8 @@ If you have success/failure using LLM Foundry on other systems, please let us kn | Device | Torch Version | Cuda Version | Status | | -------------- | ------------- | ------------ | ---------------------------- | -| A100-40GB/80GB | 2.4.0 | 12.4 | :white_check_mark: Supported | -| H100-80GB | 2.4.0 | 12.4 | :white_check_mark: Supported | +| A100-40GB/80GB | 2.5.1 | 12.4 | :white_check_mark: Supported | +| H100-80GB | 2.5.1 | 12.4 | :white_check_mark: Supported | ## MosaicML Docker Images We highly recommend using our prebuilt Docker images. You can find them here: https://hub.docker.com/orgs/mosaicml/repositories. @@ -122,15 +122,15 @@ We highly recommend using our prebuilt Docker images. You can find them here: ht The `mosaicml/pytorch` images are pinned to specific PyTorch and CUDA versions, and are stable and rarely updated. The `mosaicml/llm-foundry` images are built with new tags upon every commit to the `main` branch. -You can select a specific commit hash such as `mosaicml/llm-foundry:2.4.0_cu124-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.4.0_cu124-latest`. +You can select a specific commit hash such as `mosaicml/llm-foundry:2.5.1_cu124-9867a7b` or take the latest one using `mosaicml/llm-foundry:2.5.1_cu124-latest`. **Please Note:** The `mosaicml/llm-foundry` images do not come with the `llm-foundry` package preinstalled, just the dependencies. You will still need to `pip install llm-foundry` either from PyPi or from source. | Docker Image | Torch Version | Cuda Version | LLM Foundry dependencies installed? | | ------------------------------------------------------ | ------------- | ----------------- | ----------------------------------- | -| `mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04` | 2.4.0 | 12.4 (Infiniband) | No | -| `mosaicml/llm-foundry:2.4.0_cu124-latest` | 2.4.0 | 12.4 (Infiniband) | Yes | -| `mosaicml/llm-foundry:2.4.0_cu124_aws-latest` | 2.4.0 | 12.4 (EFA) | Yes | +| `mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04` | 2.5.1 | 12.4 (Infiniband) | No | +| `mosaicml/llm-foundry:2.5.1_cu124-latest` | 2.5.1 | 12.4 (Infiniband) | Yes | +| `mosaicml/llm-foundry:2.5.1_cu124_aws-latest` | 2.5.1 | 12.4 (EFA) | Yes | # Installation diff --git a/setup.py b/setup.py index 9c54fdc232..b401f4d7b8 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ 'accelerate>=0.25,<1.2', # for HF inference `device_map` 'transformers>=4.43.2,<4.47', 'mosaicml-streaming>=0.9.0,<0.10', - 'torch>=2.4.0,<2.5.2', + 'torch>=2.5.1,<2.5.2', 'datasets>=2.20.0,<2.21', 'fsspec==2023.6.0', # newer version results in a bug in datasets that duplicates data 'sentencepiece==0.2.0',