From e1502a80fb3243edf8678d879621fafc31507bf2 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Fri, 26 Jul 2024 12:38:16 +0100 Subject: [PATCH] Cleanup Signed-off-by: Eric Curtin --- .clang-format | 2 - README.md | 82 ++- ci.sh | 9 +- .../ramalama/39-nvidia/Containerfile | 32 - container-images/ramalama/39/Containerfile | 36 -- .../ramalama/41-amd/Containerfile | 39 -- .../ramalama/latest-amd/Containerfile | 39 -- .../ramalama/latest-nvidia/Containerfile | 32 - hf-db/granite | 2 - hf-db/granite:3b | 2 - hf-db/merlinite | 2 - hf-db/merlinite:7b | 2 - hf-db/mistral | 2 - hf-db/mistral:7b | 2 - install.sh | 2 +- ramalama | 559 +++++------------- ramalama.py | 157 ----- 17 files changed, 242 insertions(+), 759 deletions(-) delete mode 100644 .clang-format delete mode 100644 container-images/ramalama/39-nvidia/Containerfile delete mode 100644 container-images/ramalama/39/Containerfile delete mode 100644 container-images/ramalama/41-amd/Containerfile delete mode 100644 container-images/ramalama/latest-amd/Containerfile delete mode 100644 container-images/ramalama/latest-nvidia/Containerfile delete mode 100644 hf-db/granite delete mode 100644 hf-db/granite:3b delete mode 100644 hf-db/merlinite delete mode 100644 hf-db/merlinite:7b delete mode 100644 hf-db/mistral delete mode 100644 hf-db/mistral:7b delete mode 100755 ramalama.py diff --git a/.clang-format b/.clang-format deleted file mode 100644 index ad9ec73..0000000 --- a/.clang-format +++ /dev/null @@ -1,2 +0,0 @@ -BasedOnStyle: chromium - diff --git a/README.md b/README.md index 6632110..c61a8d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,81 @@ -# Project archived +# Project undergoing rewrite -New version at https://github.com/containers/ramalama +The project is undergoing a complete rewrite in python3, the dependancy on containers will be removed. It will run on macOS and Linux native at a minimum and will support pulling, running and serving models from the following types of repos: huggingface, oci, ollama. + +You can still play away with the POC though :) + +# ramalama + +The goal of ramalama is to make AI even more boring. + +## Install + +Install ramalama by running this one-liner: + +``` +curl -fsSL https://raw.githubusercontent.com/containers/ramalama/main/install.sh | sudo bash +``` + +## Usage + +### Pulling Models + +You can pull a model using the `pull` command. By default, it pulls from the ollama registry. + +``` +$ ramalama pull granite-code +``` + +## Diagram + +``` ++----------------+ +| | +| ramalama run | +| | ++-------+--------+ + | + v ++----------------+ +-----------------------+ +------------------+ +| | | Pull runtime layer | | Pull model layer | +| Auto-detect +--->| for llama.cpp +--->| i.e. granite | +| hardware type | | (CPU, Vulkan, AMD, | | | +| | | Nvidia, Intel, | +------------------+ ++----------------+ | Apple Silicon, etc.) | | Repo options: | + +-----------------------+ +-+-------+------+-+ + | | | + v v v + +---------+ +------+ +----------+ + | Hugging | | quay | | Ollama | + | Face | | | | Registry | + +-------+-+ +---+--+ +-+--------+ + | | | + v v v + +------------------+ + | Start container | + | with llama.cpp | + | and granite | + | model | + +------------------+ +``` + +## In development + +Regard this alpha, everything is under development, so expect breaking changes, luckily it's easy to reset everything and re-install: + +``` +rm -rf /var/lib/ramalama # only required if running as root user +rm -rf $HOME/.local/share/ramalama +``` + +and install again. + +## Credit where credit is due + +For the vast majority of AI/LLM software we use, under the covers the heavy lifting is being done by: + +https://github.com/ggerganov/llama.cpp + +so if you like this tool, give llama.cpp repo a :star:, and hey, give us a :star: too while you are at it. + +![image](https://github.com/user-attachments/assets/d7a91662-5903-4117-ad41-2b193a852ea1) diff --git a/ci.sh b/ci.sh index aa25d49..60973c9 100755 --- a/ci.sh +++ b/ci.sh @@ -7,13 +7,12 @@ main() { curl -fsSL https://raw.githubusercontent.com/containers/ramalama/main/install.sh | sudo bash set +o pipefail - ramalama -h | grep Usage: + ./ramalama.py -h | grep Usage: set -o pipefail - ramalama pull granite - ramalama list | grep granite - ramalama rm granite - shellcheck "$(command -v ramalama)" + ramalama.py pull granite-code +# ramalama list | grep granite-code +# ramalama rm granite-code } main diff --git a/container-images/ramalama/39-nvidia/Containerfile b/container-images/ramalama/39-nvidia/Containerfile deleted file mode 100644 index a864395..0000000 --- a/container-images/ramalama/39-nvidia/Containerfile +++ /dev/null @@ -1,32 +0,0 @@ -FROM fedora:39 - -RUN mkdir -p /models -RUN dnf install -y git jq procps-ng vim clblast-devel vulkan-headers \ - vulkan-loader-devel glslc glslang 'dnf-command(config-manager)' \ - python3-pip cmake gcc-c++ && \ - dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo \ - dnf install -y cuda && \ - dnf clean all && \ - rm -rf /var/cache/*dnf* - -RUN pip install -U "huggingface_hub[cli]" - -ENV LLAMA_CCACHE=0 -ENV LLAMA_CURL=1 -ENV LLAMA_VULKAN=1 -ENV GGML_CUDA=1 - -RUN git clone -b ramlama https://github.com/ericcurtin/llama.cpp.git && \ - cd llama.cpp && \ - cmake -B build -DLLAMA_CCACHE=0 -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 \ - -DGGML_CUDA=1 && \ - cmake --build build --config Release -j $(nproc) && \ - cd build/bin && \ - for file in *; do \ - if [ -f "$file" ] && [ -x "$file" ]; then \ - echo "$file" && \ - mv "$file" /usr/bin/llama-"$file"; \ - fi; \ - done; \ - cd / && \ - rm -rf llama.cpp diff --git a/container-images/ramalama/39/Containerfile b/container-images/ramalama/39/Containerfile deleted file mode 100644 index 63bc165..0000000 --- a/container-images/ramalama/39/Containerfile +++ /dev/null @@ -1,36 +0,0 @@ -FROM fedora:39 - -RUN mkdir -p /models -RUN dnf install -y git jq procps-ng vim clblast-devel vulkan-headers \ - vulkan-loader-devel glslc glslang python3-pip dnf-plugins-core \ - python3-dnf-plugin-versionlock cmake gcc-c++ libcurl-devel && \ - if [ "$(uname -m)" = "aarch64" ]; then \ - dnf copr enable -y slp/mesa-krunkit && \ - dnf install -y mesa-libOpenCL-23.3.5-102.aarch64 \ - mesa-vulkan-drivers-23.3.5-102.aarch64 && \ - dnf versionlock mesa-libOpenCL-23.3.5-102.aarch64 \ - mesa-vulkan-drivers-23.3.5-102.aarch64; \ - fi && \ - dnf install -y vulkan-tools && \ - dnf clean all && \ - rm -rf /var/cache/*dnf* - -RUN pip install -U "huggingface_hub[cli]" - -ENV LLAMA_CCACHE=0 -ENV LLAMA_CURL=1 -ENV LLAMA_VULKAN=1 - -RUN git clone -b ramlama https://github.com/ericcurtin/llama.cpp.git && \ - cd llama.cpp && \ - cmake -B build -DLLAMA_CCACHE=0 -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ - cmake --build build --config Release -j $(nproc) && \ - cd build/bin && \ - for file in *; do \ - if [ -f "$file" ] && [ -x "$file" ]; then \ - echo "$file" && \ - mv "$file" /usr/bin/llama-"$file"; \ - fi; \ - done; \ - cd / && \ - rm -rf llama.cpp diff --git a/container-images/ramalama/41-amd/Containerfile b/container-images/ramalama/41-amd/Containerfile deleted file mode 100644 index c3c0352..0000000 --- a/container-images/ramalama/41-amd/Containerfile +++ /dev/null @@ -1,39 +0,0 @@ -FROM fedora:41 - -RUN mkdir -p /models -RUN if [ "$(uname -m)" != "aarch64" ]; then \ - dnf install -y rocminfo rocm-opencl rocm-clinfo rocm-hip hipblas \ - hipblas-devel; \ - fi; \ - \ - dnf install -y git jq procps-ng vim clblast-devel vulkan-headers \ - vulkan-loader-devel glslc glslang 'dnf5-command(builddep)' \ - python3-pip && \ - dnf builddep -y llama-cpp && \ - dnf clean all && \ - rm -rf /var/cache/*dnf* && \ - rm -rf /usr/lib64/rocm/gfx8 /usr/lib64/rocm/gfx9 /usr/lib64/rocm/gfx10 \ - /usr/lib64/rocm/gfx11 /usr/lib64/librocsparse.so.1.0 \ - /usr/lib64/librocblas.so.4.1 - -RUN pip install -U "huggingface_hub[cli]" - -ENV LLAMA_CCACHE=0 -ENV LLAMA_CURL=1 -ENV LLAMA_VULKAN=1 -ENV GGML_HIPBLAS=1 - -RUN git clone -b ramlama https://github.com/ericcurtin/llama.cpp.git && \ - cd llama.cpp && \ - cmake -B build -DLLAMA_CCACHE=0 -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 \ - -DGGML_HIPBLAS=1 && \ - cmake --build build --config Release -j $(nproc) && \ - cd build/bin && \ - for file in *; do \ - if [ -f "$file" ] && [ -x "$file" ]; then \ - echo "$file" && \ - mv "$file" /usr/bin/llama-"$file"; \ - fi; \ - done; \ - cd / && \ - rm -rf llama.cpp diff --git a/container-images/ramalama/latest-amd/Containerfile b/container-images/ramalama/latest-amd/Containerfile deleted file mode 100644 index c3c0352..0000000 --- a/container-images/ramalama/latest-amd/Containerfile +++ /dev/null @@ -1,39 +0,0 @@ -FROM fedora:41 - -RUN mkdir -p /models -RUN if [ "$(uname -m)" != "aarch64" ]; then \ - dnf install -y rocminfo rocm-opencl rocm-clinfo rocm-hip hipblas \ - hipblas-devel; \ - fi; \ - \ - dnf install -y git jq procps-ng vim clblast-devel vulkan-headers \ - vulkan-loader-devel glslc glslang 'dnf5-command(builddep)' \ - python3-pip && \ - dnf builddep -y llama-cpp && \ - dnf clean all && \ - rm -rf /var/cache/*dnf* && \ - rm -rf /usr/lib64/rocm/gfx8 /usr/lib64/rocm/gfx9 /usr/lib64/rocm/gfx10 \ - /usr/lib64/rocm/gfx11 /usr/lib64/librocsparse.so.1.0 \ - /usr/lib64/librocblas.so.4.1 - -RUN pip install -U "huggingface_hub[cli]" - -ENV LLAMA_CCACHE=0 -ENV LLAMA_CURL=1 -ENV LLAMA_VULKAN=1 -ENV GGML_HIPBLAS=1 - -RUN git clone -b ramlama https://github.com/ericcurtin/llama.cpp.git && \ - cd llama.cpp && \ - cmake -B build -DLLAMA_CCACHE=0 -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 \ - -DGGML_HIPBLAS=1 && \ - cmake --build build --config Release -j $(nproc) && \ - cd build/bin && \ - for file in *; do \ - if [ -f "$file" ] && [ -x "$file" ]; then \ - echo "$file" && \ - mv "$file" /usr/bin/llama-"$file"; \ - fi; \ - done; \ - cd / && \ - rm -rf llama.cpp diff --git a/container-images/ramalama/latest-nvidia/Containerfile b/container-images/ramalama/latest-nvidia/Containerfile deleted file mode 100644 index a864395..0000000 --- a/container-images/ramalama/latest-nvidia/Containerfile +++ /dev/null @@ -1,32 +0,0 @@ -FROM fedora:39 - -RUN mkdir -p /models -RUN dnf install -y git jq procps-ng vim clblast-devel vulkan-headers \ - vulkan-loader-devel glslc glslang 'dnf-command(config-manager)' \ - python3-pip cmake gcc-c++ && \ - dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo \ - dnf install -y cuda && \ - dnf clean all && \ - rm -rf /var/cache/*dnf* - -RUN pip install -U "huggingface_hub[cli]" - -ENV LLAMA_CCACHE=0 -ENV LLAMA_CURL=1 -ENV LLAMA_VULKAN=1 -ENV GGML_CUDA=1 - -RUN git clone -b ramlama https://github.com/ericcurtin/llama.cpp.git && \ - cd llama.cpp && \ - cmake -B build -DLLAMA_CCACHE=0 -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 \ - -DGGML_CUDA=1 && \ - cmake --build build --config Release -j $(nproc) && \ - cd build/bin && \ - for file in *; do \ - if [ -f "$file" ] && [ -x "$file" ]; then \ - echo "$file" && \ - mv "$file" /usr/bin/llama-"$file"; \ - fi; \ - done; \ - cd / && \ - rm -rf llama.cpp diff --git a/hf-db/granite b/hf-db/granite deleted file mode 100644 index dbbc481..0000000 --- a/hf-db/granite +++ /dev/null @@ -1,2 +0,0 @@ -hf-repo ibm-granite/granite-3b-code-instruct-GGUF -model granite-3b-code-instruct.Q4_K_M.gguf diff --git a/hf-db/granite:3b b/hf-db/granite:3b deleted file mode 100644 index dbbc481..0000000 --- a/hf-db/granite:3b +++ /dev/null @@ -1,2 +0,0 @@ -hf-repo ibm-granite/granite-3b-code-instruct-GGUF -model granite-3b-code-instruct.Q4_K_M.gguf diff --git a/hf-db/merlinite b/hf-db/merlinite deleted file mode 100644 index 4d0609d..0000000 --- a/hf-db/merlinite +++ /dev/null @@ -1,2 +0,0 @@ -hf-repo instructlab/merlinite-7b-pt-GGUF -model merlinite-7b-pt-Q4_K_M.gguf diff --git a/hf-db/merlinite:7b b/hf-db/merlinite:7b deleted file mode 100644 index 4d0609d..0000000 --- a/hf-db/merlinite:7b +++ /dev/null @@ -1,2 +0,0 @@ -hf-repo instructlab/merlinite-7b-pt-GGUF -model merlinite-7b-pt-Q4_K_M.gguf diff --git a/hf-db/mistral b/hf-db/mistral deleted file mode 100644 index 6186c03..0000000 --- a/hf-db/mistral +++ /dev/null @@ -1,2 +0,0 @@ -hf-repo TheBloke/Mistral-7B-Instruct-v0.1-GGUF -model mistral-7b-instruct-v0.1.Q2_K.gguf diff --git a/hf-db/mistral:7b b/hf-db/mistral:7b deleted file mode 100644 index 6186c03..0000000 --- a/hf-db/mistral:7b +++ /dev/null @@ -1,2 +0,0 @@ -hf-repo TheBloke/Mistral-7B-Instruct-v0.1-GGUF -model mistral-7b-instruct-v0.1.Q2_K.gguf diff --git a/install.sh b/install.sh index 0d529b7..9a02588 100755 --- a/install.sh +++ b/install.sh @@ -62,7 +62,7 @@ main() { TMP="$(mktemp -d)" trap cleanup EXIT local from="ramalama" - local url="raw.githubusercontent.com/containers/ramalama/s/$from" + local url="raw.githubusercontent.com/containers/ramalama/main/$from" local from="$TMP/$from" download install -D -m755 "$from" "$bindir/" diff --git a/ramalama b/ramalama index a472c82..108d775 100755 --- a/ramalama +++ b/ramalama @@ -1,402 +1,157 @@ -#!/bin/bash - -available() { - command -v "$1" > /dev/null -} - -select_container_manager() { - if available podman; then - conman_bin="podman" - return 0 - elif available docker; then - conman_bin="docker" - return 0 - fi - - conman_bin="podman" -} - -exists() { - [ -e "$1" ] || [ -L "$1" ] -} - -image_available() { - exists "$llm_store/models/$model_name" -} - -wcurl() { - local wcurl_cmd=("curl" "--globoff" "--location" "--proto-default" "https") - wcurl_cmd+=("--remote-time" "--retry" "10" "--retry-max-time" "10" "$url") - "${wcurl_cmd[@]}" -} - -hf_download() { - local conman_hf_download=("${conman_run[@]}" "quay.io/ramalama/ramalama:latest") - conman_hf_download+=("huggingface-cli" "download" "$hf_repo" "$model") - conman_hf_download+=("--cache-dir" "/models/repo/hf/.cache") - conman_hf_download+=("--local-dir" "/models/repo/hf") - "${conman_hf_download[@]}" -} - -hf_download_with_trim() { - hf_download | sed "s#^/models/##g" | tr -d '\r' -} - -check_if_in_hf_db() { - local host="raw.githubusercontent.com" - local url="https://$host/containers/ramalama/main/hf-db/$model_name" - local image_data - if ! image_available && image_data="$(wcurl 2>&1)"; then - local hf_repo - hf_repo="$(echo "$image_data" | sed -ne "s/^hf-repo\s//pg" | xargs)" - local model - model="$(echo "$image_data" | sed -ne "s/^model\s//pg" | xargs)" - hf_download - local sym_target - sym_target=$(hf_download_with_trim) - "${conman_run[@]}" "quay.io/ramalama/ramalama:latest" "ln" "-s" "$sym_target" "/models/$model_name" - fi -} - -get_model() { - if image_available; then - echo "/models/$model_name" - fi -} - -get_dangling_images() { - "${conman[@]}" images --filter "dangling=true" -q --no-trunc -} - -rm_dir() { - xargs dirname -} - -get_model_dir() { - "${conman_run[@]}" "$model_name" readlink -f "$model" | rm_dir | rm_dir -} - -add_dri() { - if [ -e "/dev/dri" ]; then - conman_run+=("--device" "/dev/dri") - fi -} - -run_prep() { - vol="-v$llm_store/models:/models:z" - conman_run=("${conman[@]}" "run" "--rm" "-it") - conman_run+=("--security-opt=label=disable" "-v$HOME:$HOME" "-v/tmp:/tmp") - conman_run+=("$vol") - - if [ -e "/proc/driver/nvidia/gpus" ] || available nvidia-smi; then - conman_run+=("--gpus=all" "--device" "nvidia.com/gpu=all") - elif [ -e "/dev/kfd" ]; then - for i in /sys/bus/pci/devices/*/mem_info_vram_total; do - # AMD GPU needs more than 512M VRAM - if [ "$(< "$i")" -gt "600000000" ]; then - conman_run+=("--device" "/dev/kfd") - add_dri - ngl="true" - break - fi - done - elif [ "$(uname -m)" = "aarch64" ]; then # Don't do this on x86_64, slow perf - add_dri - ngl="true" - fi -} - -rm_cli() { - shift - local model_name="$1" - - # To be completed, only delete the directory once all associated images, 3b, - # latest, etc. are removed - if false; then - local dir_to_rm - dir_to_rm=$(get_model_dir) - "${conman_run[@]}" "$model_name" rm -rf "$dir_to_rm" || true - fi - - local sym_target - sym_target=$(readlink "$llm_store/models/$model_name") - rm -f "$llm_store/models/$sym_target" - rm -f "$llm_store/models/$model_name" -} - -build_cli() { - shift - local model_name="$1" - - run_prep - exec "${conman[@]}" build "$vol" -t "$model_name" . -} - -serve_cli() { - shift - if [ "$#" -lt 1 ]; then - serve_usage - fi - - local dryrun="false" - while [ $# -gt 0 ]; do - case $1 in - -d|--dryrun) - dryrun="true" - shift - ;; - -*) - serve_usage - ;; - *) - local model_name="$1" - shift # past argument - ;; - esac - done - - run_prep - check_if_in_hf_db - local model - model="$(get_model)" - conman_run+=("-p" "${RAMALAMA_HOST:-8080}:8080" "quay.io/ramalama/ramalama:latest") - conman_run+=("llama-server" "-m" "$model") - if $dryrun; then - echo "${conman_run[@]}" - return 0 - fi - - exec "${conman_run[@]}" -} - -get_llm_store() { - if [ "$EUID" -eq 0 ]; then - llm_store="/var/lib/ramalama/storage" - return 0 - fi - - llm_store="$HOME/.local/share/ramalama/storage" -} - -pull_cli() { - shift - local model_name="$1" - - run_prep - check_if_in_hf_db -} - -serve_usage() { - echo "Usage:" - echo " $(basename "$0") serve MODEL" - echo - echo "Aliases:" - echo " serve, start" - echo - echo "Environment Variables:" - echo " RAMALAMA_HOST The host:port to bind to (default \"0.0.0.0:8080\")" - - return 1 -} - -run_usage() { - echo "Usage:" - echo " $(basename "$0") run MODEL" - - return 1 -} - -run_cli() { - shift - if [ "$#" -lt 1 ]; then - run_usage - fi - - local dryrun="false" - while [ $# -gt 0 ]; do - case $1 in - -d|--dryrun) - dryrun="true" - shift - ;; - -*) - run_usage - ;; - *) - local model_name="$1" - shift # past argument - ;; - esac - done - - run_prep - check_if_in_hf_db - local model - model="$(get_model)" - conman_run+=("quay.io/ramalama/ramalama:latest" "llama-main" "-m" "$model" "--log-disable") - conman_run+=("--instruct") - if $ngl; then - conman_run+=("-ngl" "999") - fi - - if $dryrun; then - echo "${conman_run[@]}" - return 0 - fi - - exec "${conman_run[@]}" -} - -conman_cli() { - conman=("$1" "--root" "$llm_store") - shift - exec "${conman[@]}" "$@" -} - -usage() { - echo "Usage:" - echo " $(basename "$0") COMMAND" - echo - echo "Commands:" - echo " run MODEL Run a model" - echo " pull MODEL Pull a model" - echo " serve MODEL Serve a model" - echo " list List models" - echo " rm MODEL Remove a model" - - return 1 -} - -mkdirs() { - local repo_base="$llm_store/models/repo" - mkdir -p "$repo_base/hf" "$repo_base/ollama" "$repo_base/oci" -} - -human_duration() { - local d=$1 - if (( d < 1 )); then - echo -n "Less than a second" - elif (( d == 1 )); then - echo -n "1 second" - elif (( d < 60 )); then - echo -n "$d seconds" - elif (( d < 120 )); then - echo -n "1 minute" - elif (( d < 3600 )); then - echo -n "$(( d / 60 )) minutes" - elif (( d < 7200 )); then - echo -n "1 hour" - elif (( d < 86400 )); then - echo -n "$(( d / 3600 )) hours" - elif (( d < 172800 )); then - echo -n "1 day" - elif (( d < 604800 )); then - echo -n "$(( d / 86400 )) days" - elif (( d < 1209600 )); then - echo -n "1 week" - elif (( d < 2419200 )); then - echo -n "$(( d / 604800 )) weeks" - elif (( d < 4838400 )); then - echo -n "1 month" - elif (( d < 31536000 )); then - echo -n "$(( d / 2419200 )) months" - elif (( d < 63072000 )); then - echo -n "1 year" - else - echo -n "$(( d / 31536000 )) years" - fi -} - -list_files_by_modification() { - ls -t -} - -list_cli() { - printf "%-16s %-16s %-16s\n" "NAME" "MODIFIED" "SIZE" - cd "$llm_store/models/" - for i in $(list_files_by_modification); do - if [ -L "$i" ]; then - file_info=$(stat -c'%n %Z' "$i") - local name - name=$(echo "$file_info" | cut -d ' ' -f 1) - local file_epoch - file_epoch=$(echo "$file_info" | cut -d ' ' -f 2) - local diff=$((EPOCHSECONDS - file_epoch)) - local modified - modified="$(human_duration $diff) ago" - local size - size=$(readlink "$i" | xargs du -h | awk '{print $1"\n"}') - printf "%-16s %-16s %-16s\n" "$name" "$modified" "$size" - fi - done - - cd - > /dev/null -} - -build() { - local from="$sym_target" - local to="/$model_name" - cd "$llm_store/models" - local containerfile - containerfile=$(mktemp) - echo "FROM scratch -COPY $from $to" > "$containerfile" - "${conman[@]}" build -t "$model_name" -f "$containerfile" . - rm "$containerfile" - cd - > /dev/null -} - -push_cli() { - shift - local model_name="$1" - local to_location="$2" - local sym_target - sym_target=$(readlink "$llm_store/models/$model_name") - build - "${conman[@]}" push "$model_name" "$to_location" -} - -init_rama() { - select_container_manager - get_llm_store - mkdirs -} - -main() { - set -eu -o pipefail - - local conman_bin - local llm_store - init_rama - - local conman=("$conman_bin" "--root" "$llm_store") - local conman_run - local vol - local ngl="false" - if [ "$#" -lt 1 ]; then - usage - fi - - if [ "$1" = "run" ]; then - run_cli "$@" - elif [ "$1" = "pull" ]; then - pull_cli "$@" - elif [ "$1" = "push" ]; then - push_cli "$@" - elif [ "$1" = "serve" ] || [ "$1" = "start" ]; then - serve_cli "$@" - elif [ "$1" = "podman" ] || [ "$1" = "docker" ]; then - conman_cli "$@" - elif [ "$1" = "list" ] || [ "$1" = "ls" ]; then - list_cli "$@" - elif [ "$1" = "rm" ]; then - rm_cli "$@" - elif [ "$1" = "build" ]; then - build_cli "$@" - else - usage - fi -} - -main "$@" - +#!/usr/bin/python3 + +import os +import sys +import subprocess +import json +import hashlib + + +def verify_checksum(filename): + """ + Verifies if the SHA-256 checksum of a file matches the checksum provided in the filename. + + Args: + filename (str): The filename containing the checksum prefix (e.g., "sha256:") + + Returns: + bool: True if the checksum matches, False otherwise. + """ + + if not os.path.exists(filename): + return False + + # Check if the filename starts with "sha256:" + fn_base = os.path.basename(filename) + if not fn_base.startswith("sha256:"): + raise ValueError(f"Filename does not start with 'sha256:': {fn_base}") + + # Extract the expected checksum from the filename + expected_checksum = fn_base.split(":")[1] + if len(expected_checksum) != 64: + raise ValueError("Invalid checksum length in filename") + + # Calculate the SHA-256 checksum of the file contents + sha256_hash = hashlib.sha256() + with open(filename, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + + # Get the calculated checksum + calculated_checksum = sha256_hash.hexdigest() + + # Compare the checksums + return calculated_checksum == expected_checksum + + +def run_command(args): + try: + subprocess.run(args, check=True) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) + + +def run_curl_command(args, filename): + if not verify_checksum(filename): + run_command(args) + + +def pull_ollama_manifest(ramalama_store, manifests, accept, registry_head, model_tag): + os.makedirs(os.path.dirname(manifests), exist_ok=True) + os.makedirs(os.path.join(ramalama_store, "blobs"), exist_ok=True) + curl_command = [ + "curl", "-f", "-s", "--header", accept, + "-o", manifests, + f"{registry_head}/manifests/{model_tag}" + ] + run_command(curl_command) + + +def pull_ollama_config_blob(ramalama_store, accept, registry_head, manifest_data): + cfg_hash = manifest_data["config"]["digest"] + config_blob_path = os.path.join(ramalama_store, "blobs", cfg_hash) + curl_command = [ + "curl", "-f", "-s", "-L", "-C", "-", "--header", accept, + "-o", config_blob_path, + f"{registry_head}/blobs/{cfg_hash}" + ] + run_curl_command(curl_command, config_blob_path) + + +def pull_ollama_blob(ramalama_store, layer_digest, accept, registry_head, ramalama_models, model_name, model_tag, symlink_path): + layer_blob_path = os.path.join(ramalama_store, "blobs", layer_digest) + curl_command = ["curl", "-f", "-L", "-C", "-", "--progress-bar", "--header", + accept, "-o", layer_blob_path, f"{registry_head}/blobs/{layer_digest}"] + run_curl_command(curl_command, layer_blob_path) + os.makedirs(ramalama_models, exist_ok=True) + relative_target_path = os.path.relpath( + layer_blob_path, start=os.path.dirname(symlink_path)) + run_command(["ln", "-sf", relative_target_path, symlink_path]) + + +def pull_cli(ramalama_store, ramalama_models, model): + registry_scheme = "https" + registry = "registry.ollama.ai" + model = "library/" + model + accept = "Accept: application/vnd.docker.distribution.manifest.v2+json" + if ':' in model: + model_name, model_tag = model.split(':', 1) + else: + model_name = model + model_tag = "latest" + + model_base = os.path.basename(model_name) + symlink_path = os.path.join(ramalama_models, f"{model_base}:{model_tag}") + if os.path.exists(symlink_path): + return + + manifests = os.path.join(ramalama_store, "manifests", + registry, model_name, model_tag) + registry_head = f"{registry_scheme}://{registry}/v2/{model_name}" + pull_ollama_manifest(ramalama_store, manifests, + accept, registry_head, model_tag) + with open(manifests, 'r') as f: + manifest_data = json.load(f) + + pull_ollama_config_blob(ramalama_store, accept, + registry_head, manifest_data) + for layer in manifest_data["layers"]: + layer_digest = layer["digest"] + if layer["mediaType"] != 'application/vnd.ollama.image.model': + continue + + pull_ollama_blob(ramalama_store, layer_digest, accept, + registry_head, ramalama_models, model_name, model_tag, symlink_path) + + +def usage(): + print("Usage:") + print(f" {os.path.basename(__file__)} COMMAND") + print() + print("Commands:") + print(" pull MODEL Pull a model") + sys.exit(1) + + +def get_ramalama_store(): + if os.geteuid() == 0: + return "/var/lib/ramalama" + + return os.path.expanduser("~/.local/share/ramalama") + + +def main(): + if len(sys.argv) < 2: + usage() + + ramalama_store = get_ramalama_store() + command = sys.argv[1] + if command == "pull" and len(sys.argv) > 2: + pull_cli(ramalama_store + "/repos/ollama", + ramalama_store + "/models/ollama", sys.argv[2]) + else: + usage() + + +if __name__ == "__main__": + main() diff --git a/ramalama.py b/ramalama.py deleted file mode 100755 index 108d775..0000000 --- a/ramalama.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/python3 - -import os -import sys -import subprocess -import json -import hashlib - - -def verify_checksum(filename): - """ - Verifies if the SHA-256 checksum of a file matches the checksum provided in the filename. - - Args: - filename (str): The filename containing the checksum prefix (e.g., "sha256:") - - Returns: - bool: True if the checksum matches, False otherwise. - """ - - if not os.path.exists(filename): - return False - - # Check if the filename starts with "sha256:" - fn_base = os.path.basename(filename) - if not fn_base.startswith("sha256:"): - raise ValueError(f"Filename does not start with 'sha256:': {fn_base}") - - # Extract the expected checksum from the filename - expected_checksum = fn_base.split(":")[1] - if len(expected_checksum) != 64: - raise ValueError("Invalid checksum length in filename") - - # Calculate the SHA-256 checksum of the file contents - sha256_hash = hashlib.sha256() - with open(filename, "rb") as f: - for byte_block in iter(lambda: f.read(4096), b""): - sha256_hash.update(byte_block) - - # Get the calculated checksum - calculated_checksum = sha256_hash.hexdigest() - - # Compare the checksums - return calculated_checksum == expected_checksum - - -def run_command(args): - try: - subprocess.run(args, check=True) - except subprocess.CalledProcessError as e: - sys.exit(e.returncode) - - -def run_curl_command(args, filename): - if not verify_checksum(filename): - run_command(args) - - -def pull_ollama_manifest(ramalama_store, manifests, accept, registry_head, model_tag): - os.makedirs(os.path.dirname(manifests), exist_ok=True) - os.makedirs(os.path.join(ramalama_store, "blobs"), exist_ok=True) - curl_command = [ - "curl", "-f", "-s", "--header", accept, - "-o", manifests, - f"{registry_head}/manifests/{model_tag}" - ] - run_command(curl_command) - - -def pull_ollama_config_blob(ramalama_store, accept, registry_head, manifest_data): - cfg_hash = manifest_data["config"]["digest"] - config_blob_path = os.path.join(ramalama_store, "blobs", cfg_hash) - curl_command = [ - "curl", "-f", "-s", "-L", "-C", "-", "--header", accept, - "-o", config_blob_path, - f"{registry_head}/blobs/{cfg_hash}" - ] - run_curl_command(curl_command, config_blob_path) - - -def pull_ollama_blob(ramalama_store, layer_digest, accept, registry_head, ramalama_models, model_name, model_tag, symlink_path): - layer_blob_path = os.path.join(ramalama_store, "blobs", layer_digest) - curl_command = ["curl", "-f", "-L", "-C", "-", "--progress-bar", "--header", - accept, "-o", layer_blob_path, f"{registry_head}/blobs/{layer_digest}"] - run_curl_command(curl_command, layer_blob_path) - os.makedirs(ramalama_models, exist_ok=True) - relative_target_path = os.path.relpath( - layer_blob_path, start=os.path.dirname(symlink_path)) - run_command(["ln", "-sf", relative_target_path, symlink_path]) - - -def pull_cli(ramalama_store, ramalama_models, model): - registry_scheme = "https" - registry = "registry.ollama.ai" - model = "library/" + model - accept = "Accept: application/vnd.docker.distribution.manifest.v2+json" - if ':' in model: - model_name, model_tag = model.split(':', 1) - else: - model_name = model - model_tag = "latest" - - model_base = os.path.basename(model_name) - symlink_path = os.path.join(ramalama_models, f"{model_base}:{model_tag}") - if os.path.exists(symlink_path): - return - - manifests = os.path.join(ramalama_store, "manifests", - registry, model_name, model_tag) - registry_head = f"{registry_scheme}://{registry}/v2/{model_name}" - pull_ollama_manifest(ramalama_store, manifests, - accept, registry_head, model_tag) - with open(manifests, 'r') as f: - manifest_data = json.load(f) - - pull_ollama_config_blob(ramalama_store, accept, - registry_head, manifest_data) - for layer in manifest_data["layers"]: - layer_digest = layer["digest"] - if layer["mediaType"] != 'application/vnd.ollama.image.model': - continue - - pull_ollama_blob(ramalama_store, layer_digest, accept, - registry_head, ramalama_models, model_name, model_tag, symlink_path) - - -def usage(): - print("Usage:") - print(f" {os.path.basename(__file__)} COMMAND") - print() - print("Commands:") - print(" pull MODEL Pull a model") - sys.exit(1) - - -def get_ramalama_store(): - if os.geteuid() == 0: - return "/var/lib/ramalama" - - return os.path.expanduser("~/.local/share/ramalama") - - -def main(): - if len(sys.argv) < 2: - usage() - - ramalama_store = get_ramalama_store() - command = sys.argv[1] - if command == "pull" and len(sys.argv) > 2: - pull_cli(ramalama_store + "/repos/ollama", - ramalama_store + "/models/ollama", sys.argv[2]) - else: - usage() - - -if __name__ == "__main__": - main()