kubernetes-dev-environment.yaml

# This is a configuration file for provisioning a GPU machine on Kubernetes.
apiVersion: batch/v1
kind: Job
metadata:
  name: allennlp-dev-env # TODO: make this unique by appending your username
  namespace: allennlp
  labels:
    contact: # TODO: enter your username
spec:
  template:
    metadata:
      labels:
        autoscaler-task: "yes"
    spec:
      restartPolicy: Never
      containers:
        - name: allennlp-dev-env
          image: allennlp/allennlp:latest
          stdin: true
          tty: true
          resources:
            requests:
              # p2.xlarge has ~55GB free memory, ~4 cores of CPU. We request most of that.
              cpu: 3000m
              memory: 50Gi
            # "limits" specify the max your container will be allowed to use.
            limits:
              # Set this to the number of GPUs you want to use. Note that if you set this higher
              # than the maxiumum available on our GPU instances, your job will never get scheduled.
              # Note that this can ONLY appear in "limits" - "requests" should not have GPU
              # resources.
              # See https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/ for more info.
              alpha.kubernetes.io/nvidia-gpu: 1
              # Set the limit slightly higher than the request for CPU.
              cpu: 3500m
              memory: 50Gi
          volumeMounts:
          - name: nvidia-driver
            mountPath: /usr/local/nvidia
            readOnly: true
          - name: nfs
            mountPath: /net/efs/aristo
            readOnly: true
          command:
             - /bin/sh
      volumes:
      # Mount in the GPU driver from the host machine. This guarantees compatibility.
      - name: nvidia-driver
        hostPath:
          path: /var/lib/nvidia-docker/volumes/nvidia_driver/367.57
      # Mount the efs drive
      - name: nfs
        hostPath:
            path: /net/efs/aristo
      tolerations:
        # This is required to run on our GPU machines. Do not remove.
        - key: "GpuOnly"
          operator: "Equal"
          value: "true"