diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md index 50b8b886..e0ddc7ef 100644 --- a/docs/ramalama-serve.1.md +++ b/docs/ramalama-serve.1.md @@ -21,6 +21,9 @@ Use the `ramalama stop` command to stop the container running the served ramalam #### **--help**, **-h** show this help message and exit +#### **--generate** ['quadlet'] +Generate specified configuration format for running the AI Model as a service + #### **--name**, **-n** Name of the container to run the Model in. @@ -43,6 +46,32 @@ CONTAINER ID IMAGE COMMAND CREATED 3f64927f11a5 quay.io/ramalama/ramalama:latest /usr/bin/ramalama... 17 seconds ago Up 17 seconds 0.0.0.0:8082->8082/tcp ramalama_YMPQvJxN97 ``` +Generate a quadlet for running the AI Model service +``` +$ ramalama serve --generate=quadlet granite + +[Unit] +Description=RamaLama granite AI Model Service +After=local-fs.target + +[Container] +Device=+/dev/dri +Device=+/dev/kfd +Environment=RAMALAMA_TRANSPORT=HuggingFace +Exec=llama-server --port 8080 -m /home/dwalsh/.local/share/ramalama/models/huggingface/instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf +Image=quay.io/ramalama/ramalama:latest +Label=RAMALAMA container +Name=ramalama_YcTTynYeJ6 +SecurityLabelDisable=true +Volume=/home/dwalsh/ramalama/ramalama:/usr/bin/ramalama/ramalama:ro +Volume=./ramalama.py:/var/lib/ramalama:ro +PublishPort=8080 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target +``` + ## SEE ALSO **[ramalama(1)](ramalama.1.md)**, **[ramalama-stop(1)](ramalama-stop.1.md)** diff --git a/ramalama/cli.py b/ramalama/cli.py index 2c54c09a..e63ed788 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -13,7 +13,7 @@ import time from ramalama.huggingface import Huggingface -from ramalama.common import in_container, container_manager, exec_cmd, run_cmd, default_image +from ramalama.common import in_container, container_manager, exec_cmd, run_cmd, default_image, find_working_directory from ramalama.oci import OCI from ramalama.ollama import Ollama from ramalama.shortnames import Shortnames @@ -344,6 +344,7 @@ def serve_parser(subparsers): "-n", "--name", dest="name", default=_name(), help="name of container in which the Model will be run" ) parser.add_argument("-p", "--port", default="8080", help="port for AI Model server to listen on") + parser.add_argument("--generate", choices=["quadlet"], help="generate spectified configuration format for running the AI Model as a service") parser.add_argument("MODEL") # positional argument parser.set_defaults(func=serve_cli) @@ -432,11 +433,10 @@ def get_store(): return os.path.expanduser("~/.local/share/ramalama") -def find_working_directory(): - return os.path.dirname(__file__) - - def run_container(args): + if hasattr(args, "generate") and args.generate != "": + return False + if args.nocontainer: if hasattr(args, "name") and args.name: raise IndexError("--nocontainer and --name options conflict. --name requires a container.") diff --git a/ramalama/common.py b/ramalama/common.py index ac982aa0..2a1985eb 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -52,6 +52,10 @@ def run_cmd(args): return subprocess.run(args, check=True, stdout=subprocess.PIPE) +def find_working_directory(): + return os.path.dirname(__file__) + + def run_curl_cmd(args, filename): if not verify_checksum(filename): try: diff --git a/ramalama/model.py b/ramalama/model.py index e94d40c4..5e888051 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -1,6 +1,6 @@ import os import sys -from ramalama.common import container_manager, exec_cmd +from ramalama.common import container_manager, exec_cmd, find_working_directory, default_image class Model: @@ -106,4 +106,47 @@ def serve(self, args): if args.runtime == "vllm": exec_args = ["vllm", "serve", "--port", args.port, symlink_path] + if args.generate == "quadlet": + return self.quadlet(args, exec_args) + exec_cmd(exec_args) + + + def quadlet(self, args, exec_args): + port_string="" + if hasattr(args, "port"): + port_string=f"PublishPort={args.port}" + + name_string="" + if hasattr(args, "name") and args.name != "": + name_string=f"Name={args.name}" + + print(""" +[Unit] +Description=RamaLama %s AI Model Service +After=local-fs.target + +[Container] +Device=+/dev/dri +Device=+/dev/kfd +Environment=RAMALAMA_TRANSPORT=%s +Exec=%s +Image=%s +Label=RAMALAMA container +%s +SecurityLabelDisable=true +Volume=%s:/usr/bin/ramalama/ramalama:ro +Volume=%s:/var/lib/ramalama:ro +%s + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target +""" % (args.UNRESOLVED_MODEL, + self.type, + " ".join(exec_args), + default_image(), + name_string, + find_working_directory(), + sys.argv[0], + port_string)) diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats index d8eaab0a..a82d5160 100644 --- a/test/system/040-serve.bats +++ b/test/system/040-serve.bats @@ -91,4 +91,14 @@ verify_begin="podman run --rm -it --label \"RAMALAMA container\" --security-opt= is "$output" "Error: specifying --all and container name, ${name}, not allowed" "list correct" } +@test "ramalama serve --generate=quadlet" { + model=tiny + name=c_$(safename) + + run_ramalama serve --name=${name} --port 1234 --generate=quadlet ${model} + is "$output" ".*PublishPort=1234" "PublishPort should match" + is "$output" ".*Name=${name}" "Quadlet should have name field" + is "$output" ".*Exec=llama-server --port 1234 -m .*" "Exec line should be correct" +} + # vim: filetype=sh