Merge pull request #181 from rhatdan/origin

Add ability to generate a quadlet file from service
containers · Sep 24, 2024 · ab488f1 · ab488f1
2 parents fac9fdf + cd20804
commit ab488f1
Show file tree

Hide file tree

Showing 5 changed files with 92 additions and 6 deletions.
diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md
@@ -21,6 +21,9 @@ Use the `ramalama stop` command to stop the container running the served ramalam
 #### **--help**, **-h**
 show this help message and exit
 
+#### **--generate** ['quadlet']
+Generate specified configuration format for running the AI Model as a service
+
 #### **--name**, **-n**
 Name of the container to run the Model in.
 
@@ -43,6 +46,32 @@ CONTAINER ID  IMAGE                             COMMAND               CREATED
 3f64927f11a5  quay.io/ramalama/ramalama:latest  /usr/bin/ramalama...  17 seconds ago  Up 17 seconds  0.0.0.0:8082->8082/tcp  ramalama_YMPQvJxN97
 ```
 
+Generate a quadlet for running the AI Model service
+```
+$ ramalama serve --generate=quadlet granite
+
+[Unit]
+Description=RamaLama granite AI Model Service
+After=local-fs.target
+
+[Container]
+Device=+/dev/dri
+Device=+/dev/kfd
+Environment=RAMALAMA_TRANSPORT=HuggingFace
+Exec=llama-server --port 8080 -m /home/dwalsh/.local/share/ramalama/models/huggingface/instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf
+Image=quay.io/ramalama/ramalama:latest
+Label=RAMALAMA container
+Name=ramalama_YcTTynYeJ6
+SecurityLabelDisable=true
+Volume=/home/dwalsh/ramalama/ramalama:/usr/bin/ramalama/ramalama:ro
+Volume=./ramalama.py:/var/lib/ramalama:ro
+PublishPort=8080
+
+[Install]
+# Start by default on boot
+WantedBy=multi-user.target default.target
+```
+
 ## SEE ALSO
 **[ramalama(1)](ramalama.1.md)**, **[ramalama-stop(1)](ramalama-stop.1.md)**
 

diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -13,7 +13,7 @@
 import time
 
 from ramalama.huggingface import Huggingface
-from ramalama.common import in_container, container_manager, exec_cmd, run_cmd, default_image
+from ramalama.common import in_container, container_manager, exec_cmd, run_cmd, default_image, find_working_directory
 from ramalama.oci import OCI
 from ramalama.ollama import Ollama
 from ramalama.shortnames import Shortnames
@@ -344,6 +344,7 @@ def serve_parser(subparsers):
         "-n", "--name", dest="name", default=_name(), help="name of container in which the Model will be run"
     )
     parser.add_argument("-p", "--port", default="8080", help="port for AI Model server to listen on")
+    parser.add_argument("--generate", choices=["quadlet"], help="generate spectified configuration format for running the AI Model as a service")
     parser.add_argument("MODEL")  # positional argument
     parser.set_defaults(func=serve_cli)
 
@@ -432,11 +433,10 @@ def get_store():
     return os.path.expanduser("~/.local/share/ramalama")
 
 
-def find_working_directory():
-    return os.path.dirname(__file__)
-
-
 def run_container(args):
+    if hasattr(args, "generate") and args.generate != "":
+        return False
+
     if args.nocontainer:
         if hasattr(args, "name") and args.name:
             raise IndexError("--nocontainer and --name options conflict. --name requires a container.")

diff --git a/ramalama/common.py b/ramalama/common.py
@@ -52,6 +52,10 @@ def run_cmd(args):
     return subprocess.run(args, check=True, stdout=subprocess.PIPE)
 
 
+def find_working_directory():
+    return os.path.dirname(__file__)
+
+
 def run_curl_cmd(args, filename):
     if not verify_checksum(filename):
         try:

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -1,6 +1,6 @@
 import os
 import sys
-from ramalama.common import container_manager, exec_cmd
+from ramalama.common import container_manager, exec_cmd, find_working_directory, default_image
 
 
 class Model:
@@ -106,4 +106,47 @@ def serve(self, args):
         if args.runtime == "vllm":
             exec_args = ["vllm", "serve", "--port", args.port, symlink_path]
 
+        if args.generate == "quadlet":
+            return self.quadlet(args, exec_args)
+
         exec_cmd(exec_args)
+
+
+    def quadlet(self, args, exec_args):
+        port_string=""
+        if hasattr(args, "port"):
+            port_string=f"PublishPort={args.port}"
+
+        name_string=""
+        if hasattr(args, "name") and args.name != "":
+            name_string=f"Name={args.name}"
+
+        print("""
+[Unit]
+Description=RamaLama %s AI Model Service
+After=local-fs.target
+
+[Container]
+Device=+/dev/dri
+Device=+/dev/kfd
+Environment=RAMALAMA_TRANSPORT=%s
+Exec=%s
+Image=%s
+Label=RAMALAMA container
+%s
+SecurityLabelDisable=true
+Volume=%s:/usr/bin/ramalama/ramalama:ro
+Volume=%s:/var/lib/ramalama:ro
+%s
+
+[Install]
+# Start by default on boot
+WantedBy=multi-user.target default.target
+""" % (args.UNRESOLVED_MODEL,
+       self.type,
+       " ".join(exec_args),
+       default_image(),
+       name_string,
+       find_working_directory(),
+       sys.argv[0],
+       port_string))
diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
@@ -91,4 +91,14 @@ verify_begin="podman run --rm -it --label \"RAMALAMA container\" --security-opt=
     is "$output" "Error: specifying --all and container name, ${name}, not allowed" "list correct"
 }
 
+@test "ramalama serve --generate=quadlet" {
+    model=tiny
+    name=c_$(safename)
+
+    run_ramalama serve --name=${name} --port 1234 --generate=quadlet ${model}
+    is "$output" ".*PublishPort=1234" "PublishPort should match"
+    is "$output" ".*Name=${name}" "Quadlet should have name field"
+    is "$output" ".*Exec=llama-server --port 1234 -m .*" "Exec line should be correct"
+}
+
 # vim: filetype=sh