Skip to content

Commit

Permalink
Merge pull request #181 from rhatdan/origin
Browse files Browse the repository at this point in the history
Add ability to generate a quadlet file from service
  • Loading branch information
ericcurtin authored Sep 24, 2024
2 parents fac9fdf + cd20804 commit ab488f1
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 6 deletions.
29 changes: 29 additions & 0 deletions docs/ramalama-serve.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ Use the `ramalama stop` command to stop the container running the served ramalam
#### **--help**, **-h**
show this help message and exit

#### **--generate** ['quadlet']
Generate specified configuration format for running the AI Model as a service

#### **--name**, **-n**
Name of the container to run the Model in.

Expand All @@ -43,6 +46,32 @@ CONTAINER ID IMAGE COMMAND CREATED
3f64927f11a5 quay.io/ramalama/ramalama:latest /usr/bin/ramalama... 17 seconds ago Up 17 seconds 0.0.0.0:8082->8082/tcp ramalama_YMPQvJxN97
```

Generate a quadlet for running the AI Model service
```
$ ramalama serve --generate=quadlet granite
[Unit]
Description=RamaLama granite AI Model Service
After=local-fs.target
[Container]
Device=+/dev/dri
Device=+/dev/kfd
Environment=RAMALAMA_TRANSPORT=HuggingFace
Exec=llama-server --port 8080 -m /home/dwalsh/.local/share/ramalama/models/huggingface/instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf
Image=quay.io/ramalama/ramalama:latest
Label=RAMALAMA container
Name=ramalama_YcTTynYeJ6
SecurityLabelDisable=true
Volume=/home/dwalsh/ramalama/ramalama:/usr/bin/ramalama/ramalama:ro
Volume=./ramalama.py:/var/lib/ramalama:ro
PublishPort=8080
[Install]
# Start by default on boot
WantedBy=multi-user.target default.target
```

## SEE ALSO
**[ramalama(1)](ramalama.1.md)**, **[ramalama-stop(1)](ramalama-stop.1.md)**

Expand Down
10 changes: 5 additions & 5 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import time

from ramalama.huggingface import Huggingface
from ramalama.common import in_container, container_manager, exec_cmd, run_cmd, default_image
from ramalama.common import in_container, container_manager, exec_cmd, run_cmd, default_image, find_working_directory
from ramalama.oci import OCI
from ramalama.ollama import Ollama
from ramalama.shortnames import Shortnames
Expand Down Expand Up @@ -344,6 +344,7 @@ def serve_parser(subparsers):
"-n", "--name", dest="name", default=_name(), help="name of container in which the Model will be run"
)
parser.add_argument("-p", "--port", default="8080", help="port for AI Model server to listen on")
parser.add_argument("--generate", choices=["quadlet"], help="generate spectified configuration format for running the AI Model as a service")
parser.add_argument("MODEL") # positional argument
parser.set_defaults(func=serve_cli)

Expand Down Expand Up @@ -432,11 +433,10 @@ def get_store():
return os.path.expanduser("~/.local/share/ramalama")


def find_working_directory():
return os.path.dirname(__file__)


def run_container(args):
if hasattr(args, "generate") and args.generate != "":
return False

if args.nocontainer:
if hasattr(args, "name") and args.name:
raise IndexError("--nocontainer and --name options conflict. --name requires a container.")
Expand Down
4 changes: 4 additions & 0 deletions ramalama/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ def run_cmd(args):
return subprocess.run(args, check=True, stdout=subprocess.PIPE)


def find_working_directory():
return os.path.dirname(__file__)


def run_curl_cmd(args, filename):
if not verify_checksum(filename):
try:
Expand Down
45 changes: 44 additions & 1 deletion ramalama/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import sys
from ramalama.common import container_manager, exec_cmd
from ramalama.common import container_manager, exec_cmd, find_working_directory, default_image


class Model:
Expand Down Expand Up @@ -106,4 +106,47 @@ def serve(self, args):
if args.runtime == "vllm":
exec_args = ["vllm", "serve", "--port", args.port, symlink_path]

if args.generate == "quadlet":
return self.quadlet(args, exec_args)

exec_cmd(exec_args)


def quadlet(self, args, exec_args):
port_string=""
if hasattr(args, "port"):
port_string=f"PublishPort={args.port}"

name_string=""
if hasattr(args, "name") and args.name != "":
name_string=f"Name={args.name}"

print("""
[Unit]
Description=RamaLama %s AI Model Service
After=local-fs.target
[Container]
Device=+/dev/dri
Device=+/dev/kfd
Environment=RAMALAMA_TRANSPORT=%s
Exec=%s
Image=%s
Label=RAMALAMA container
%s
SecurityLabelDisable=true
Volume=%s:/usr/bin/ramalama/ramalama:ro
Volume=%s:/var/lib/ramalama:ro
%s
[Install]
# Start by default on boot
WantedBy=multi-user.target default.target
""" % (args.UNRESOLVED_MODEL,
self.type,
" ".join(exec_args),
default_image(),
name_string,
find_working_directory(),
sys.argv[0],
port_string))
10 changes: 10 additions & 0 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,14 @@ verify_begin="podman run --rm -it --label \"RAMALAMA container\" --security-opt=
is "$output" "Error: specifying --all and container name, ${name}, not allowed" "list correct"
}

@test "ramalama serve --generate=quadlet" {
model=tiny
name=c_$(safename)

run_ramalama serve --name=${name} --port 1234 --generate=quadlet ${model}
is "$output" ".*PublishPort=1234" "PublishPort should match"
is "$output" ".*Name=${name}" "Quadlet should have name field"
is "$output" ".*Exec=llama-server --port 1234 -m .*" "Exec line should be correct"
}

# vim: filetype=sh

0 comments on commit ab488f1

Please sign in to comment.