Add --host=0.0.0.0 if running llama.cpp serve within a container

Turn on some testing of --nocontainer serve and run, at least with dryrun. Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
containers · Nov 12, 2024 · ebea61d · ebea61d
1 parent 76afd2d
commit ebea61d
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 38 deletions.
diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md
@@ -38,6 +38,9 @@ Generate specified configuration format for running the AI Model as a service
 #### **--help**, **-h**
 show this help message and exit
 
+#### **--host**="0.0.0.0"
+ip address to listen
+
 #### **--name**, **-n**
 Name of the container to run the Model in.
 

diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -614,6 +614,7 @@ def serve_parser(subparsers):
     parser = subparsers.add_parser("serve", help="serve REST API on specified AI Model")
     parser.add_argument("--authfile", help="path of the authentication file")
     parser.add_argument("-d", "--detach", action="store_true", dest="detach", help="run the container in detached mode")
+    parser.add_argument("--host", default=config.get('host', "0.0.0.0"), help="ip address to listen")
     parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
     parser.add_argument(
         "-p", "--port", default=config.get('port', "8080"), help="port for AI Model server to listen on"

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -267,9 +267,6 @@ def run(self, args):
         if not args.container:
             exec_model_path = model_path
 
-        # if args.container:
-        #     model_path = mnt_file
-
         exec_args = ["llama-cli", "-m", exec_model_path, "--in-prefix", "", "--in-suffix", ""]
 
         if not args.debug:
@@ -289,6 +286,9 @@ def run(self, args):
         try:
             if self.exec_model_in_container(model_path, exec_args, args):
                 return
+            if args.dryrun:
+                dry_run(exec_args)
+                return
             exec_cmd(exec_args, args.debug, debug=args.debug)
         except FileNotFoundError as e:
             if in_container():
@@ -317,8 +317,7 @@ def serve(self, args):
         else:
             if args.gpu:
                 exec_args.extend(self.gpu_args())
-            if in_container():
-                exec_args.extend(["--host", "0.0.0.0"])
+            exec_args.extend(["--host", args.host])
 
         if args.generate == "quadlet":
             return self.quadlet(model_path, args, exec_args)
@@ -332,6 +331,9 @@ def serve(self, args):
         try:
             if self.exec_model_in_container(model_path, exec_args, args):
                 return
+            if args.dryrun:
+                dry_run(exec_args)
+                return
             exec_cmd(exec_args, debug=args.debug)
         except FileNotFoundError as e:
             if in_container():

diff --git a/test/system/030-run.bats b/test/system/030-run.bats
@@ -3,31 +3,37 @@
 load helpers
 
 @test "ramalama --dryrun run basic output" {
-    skip_if_nocontainer
-
     model=tiny
     image=m_$(safename)
 
-    run_ramalama info
-    conman=$(jq .Engine <<< $output | tr -d '"' )
-    verify_begin="${conman} run --rm -i --label RAMALAMA --security-opt=label=disable --name"
+    if is_container; then
+	run_ramalama info
+	conman=$(jq .Engine <<< $output | tr -d '"' )
+	verify_begin="${conman} run --rm -i --label RAMALAMA --security-opt=label=disable --name"
+
+	run_ramalama --dryrun run ${model}
+	is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
+	is "$output" ".*${model}" "verify model name"
 
-    run_ramalama --dryrun run ${model}
-    is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
-    is "$output" ".*${model}" "verify model name"
+	run_ramalama --dryrun run --name foobar ${model}
+	is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
+	is "$output" ".*${model}" "verify model name"
 
-    run_ramalama --dryrun run --name foobar ${model}
-    is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
-    is "$output" ".*${model}" "verify model name"
+	run_ramalama --dryrun run --name foobar ${model}
+	is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
 
-    run_ramalama --dryrun run --name foobar ${model}
-    is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
+	run_ramalama 1 --nocontainer run --name foobar tiny
+	is "${lines[0]}"  "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
 
-    run_ramalama 1 --nocontainer run --name foobar tiny
-    is "${lines[0]}"  "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
+	RAMALAMA_IMAGE=${image} run_ramalama --dryrun run ${model}
+	is "$output" ".*${image} /bin/sh -c" "verify image name"
+    else
+	run_ramalama --dryrun run ${model}
+	is "$output" 'llama-cli -m /path/to/model --in-prefix --in-suffix --no-display-prompt -p.*' "dryrun correct"
 
-    RAMALAMA_IMAGE=${image} run_ramalama --dryrun run ${model}
-    is "$output" ".*${image} /bin/sh -c" "verify image name"
+	run_ramalama 1 run --name foobar tiny
+	is "${lines[0]}"  "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
+    fi
 }
 
 @test "ramalama run tiny with prompt" {

diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
@@ -7,24 +7,36 @@ load setup_suite
 verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name"
 
 @test "ramalama --dryrun serve basic output" {
-    skip_if_nocontainer
-
     model=m_$(safename)
 
-    run_ramalama --dryrun serve ${model}
-    is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
-    is "$output" ".*${model}" "verify model name"
-
-    run_ramalama --dryrun serve --name foobar ${model}
-    is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
-    is "$output" ".*${model}" "verify model name"
-
-    run_ramalama 1 serve --name foobar MODEL
-    is "$output" ".*Error: failed to pull .*MODEL" "dryrun correct with --name"
-
-    run_ramalama 1 --nocontainer serve --name foobar tiny
-    is "${lines[0]}"  "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
-    run_ramalama stop --all
+    if is_container; then
+	run_ramalama --dryrun serve ${model}
+	is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
+	is "$output" ".*${model}" "verify model name"
+
+	run_ramalama --dryrun serve --name foobar ${model}
+	is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
+	assert "$output" =~ ".*--host 0.0.0.0" "verify host 0.0.0.0 is added when run within container"
+	is "$output" ".*${model}" "verify model name"
+
+	run_ramalama --dryrun serve --host 127.1.2.3 --name foobar ${model}
+	assert "$output" =~ ".*--host 127.1.2.3" "verify --host is modified when run within container"
+	is "$output" ".*${model}" "verify model name"
+
+	run_ramalama 1 --nocontainer serve --name foobar tiny
+	is "${lines[0]}"  "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
+	run_ramalama stop --all
+    else
+	run_ramalama --dryrun serve ${model}
+	assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0"
+	run_ramalama --dryrun serve --host 127.0.0.1 ${model}
+	assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1"
+	run_ramalama 1 --nocontainer serve --name foobar tiny
+	is "${lines[0]}"  "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
+     fi
+
+    run_ramalama 1 serve MODEL
+    is "$output" ".*Error: failed to pull .*MODEL" "failed to pull model"
 }
 
 @test "ramalama --detach serve" {