Skip to content

Commit

Permalink
update gpt-neox image to gpt-neox v2.0 and combine with gpt-neox build
Browse files Browse the repository at this point in the history
  • Loading branch information
liamcli committed Apr 27, 2023
1 parent c84f857 commit 86be527
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
8 changes: 5 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,11 @@ build-deepspeed-gpu: build-gpu-cuda-113-base
-t $(NGC_REGISTRY)/$(GPU_DEEPSPEED_ENVIRONMENT_NAME)-$(VERSION) \
.

# This builds deepspeed environment off of a patched version of EleutherAI's fork of DeepSpeed
# that we need for gpt-neox support.
# This builds the environment for GPT-NeoX using EleutherAI's fork of DeepSpeed and our fork of the gpt-neox repo.
# We need to disable BUILDKIT to build deepspeed ops which require access to nvidia-runtime during build.
# See https://github.com/NVIDIA/nvidia-container-runtime/issues/153.
.PHONY: build-gpt-neox-deepspeed-gpu
build-gpt-neox-deepspeed-gpu: export DOCKER_BUILDKIT=0
build-gpt-neox-deepspeed-gpu: build-gpu-cuda-113-base
docker build -f Dockerfile-default-gpu \
--build-arg BASE_IMAGE="$(DOCKERHUB_REGISTRY)/$(GPU_CUDA_113_BASE_NAME)-$(SHORT_GIT_HASH)" \
Expand All @@ -223,7 +225,7 @@ build-gpt-neox-deepspeed-gpu: build-gpu-cuda-113-base
--build-arg TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0" \
--build-arg APEX_GIT="https://github.com/determined-ai/apex.git@3caf0f40c92e92b40051d3afff8568a24b8be28d" \
--build-arg DET_BUILD_NCCL="" \
--build-arg DEEPSPEED_PIP="git+https://github.com/determined-ai/deepspeed.git@eleuther_dai" \
--build-arg DEEPSPEED_PIP="git+https://github.com/EleutherAI/DeeperSpeed.git@0a237296f760efd4f58eb3c32b6cdc429a39041a#egg=deepspeed" \
-t $(DOCKERHUB_REGISTRY)/$(GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME)-$(SHORT_GIT_HASH) \
-t $(DOCKERHUB_REGISTRY)/$(GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME)-$(VERSION) \
-t $(NGC_REGISTRY)/$(GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME)-$(SHORT_GIT_HASH) \
Expand Down
21 changes: 21 additions & 0 deletions dockerfile_scripts/install_deepspeed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,24 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y pdsh libaio-dev
python -m pip install triton==1.0.0
DS_BUILD_OPS=1 python -m pip install $DEEPSPEED_PIP --no-binary deepspeed
python -m deepspeed.env_report

if [[ "$DEEPSPEED_PIP" == *"EleutherAI"* ]]; then
# This is a dependency of gpt-neox
apt-get install -y mpich
# Need this to avoid `AttributeError: module 'distutils' has no attribute 'version'` when importing tensorboard. See https://github.com/pytorch/pytorch/issues/69894.
pip install setuptools==59.5.0
# Install gpt-neox and dependencies
git clone -b determined2 https://github.com/determined-ai/gpt-neox.git
python gpt-neox/megatron/fused_kernels/setup.py install

# Exclude DeeperSpeed reinstall since the version in requirements is not pinned.
pip install $(grep -ivE "DeeperSpeed" gpt-neox/requirements/requirements.txt)
pip install -r /gpt-neox/requirements/requirements-flashattention.txt

# Download sample data
gsutil cp -r gs://determined-ai-public-datasets/text_data /gpt-neox && mv /gpt-neox/text_data /gpt-neox/data

# Modify permissions to enable example to run in nonroot mode
chmod -R 777 /gpt-neox
chmod -R 777 /tmp
fi

0 comments on commit 86be527

Please sign in to comment.