feature(worker): Allow configuring inference job timeout and ollama k…

…eep alive. Fixes #389 #224
hoarder-app · Sep 15, 2024 · f263f9e · f263f9e
1 parent 3b8cebb
commit f263f9e
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 19 deletions.
diff --git a/apps/workers/inference.ts b/apps/workers/inference.ts
@@ -103,6 +103,7 @@ class OllamaInferenceClient implements InferenceClient {
       model: model,
       format: "json",
       stream: true,
+      keep_alive: serverConfig.inference.ollamaKeepAlive,
       messages: [
         { role: "user", content: prompt, images: image ? [image] : undefined },
       ],

diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts
@@ -81,7 +81,7 @@ export class OpenAiWorker {
       {
         concurrency: 1,
         pollIntervalMs: 1000,
-        timeoutSecs: 30,
+        timeoutSecs: serverConfig.inference.jobTimeoutSec,
       },
     );
 

diff --git a/docs/docs/03-configuration.md b/docs/docs/03-configuration.md
@@ -2,16 +2,16 @@
 
 The app is mainly configured by environment variables. All the used environment variables are listed in [packages/shared/config.ts](https://github.com/hoarder-app/hoarder/blob/main/packages/shared/config.ts). The most important ones are:
 
-| Name                      | Required                              | Default   | Description                                                                                                                                    |
-| ------------------------- | ------------------------------------- | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| DATA_DIR                  | Yes                                   | Not set   | The path for the persistent data directory. This is where the db and the uploaded assets live.                                                 |
-| NEXTAUTH_URL              | Yes                                   | Not set   | Should point to the address of your server. The app will function without it, but will redirect you to wrong addresses on signout for example. |
-| NEXTAUTH_SECRET           | Yes                                   | Not set   | Random string used to sign the JWT tokens. Generate one with `openssl rand -base64 36`.                                                        |
-| MEILI_ADDR                | No                                    | Not set   | The address of meilisearch. If not set, Search will be disabled. E.g. (`http://meilisearch:7700`)                                              |
-| MEILI_MASTER_KEY          | Only in Prod and if search is enabled | Not set   | The master key configured for meilisearch. Not needed in development environment. Generate one with `openssl rand -base64 36`                  |
-| DISABLE_SIGNUPS           | No                                    | false     | If enabled, no new signups will be allowed and the signup button will be disabled in the UI                                                    |
-| MAX_ASSET_SIZE_MB         | No                                    | 4         | Sets the maximum allowed asset size (in MB) to be uploaded                                                                                     |
-| DISABLE_NEW_RELEASE_CHECK | No                                    | false     | If set to true, latest release check will be disabled in the admin panel.                                                                      |
+| Name                      | Required                              | Default | Description                                                                                                                                    |
+| ------------------------- | ------------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
+| DATA_DIR                  | Yes                                   | Not set | The path for the persistent data directory. This is where the db and the uploaded assets live.                                                 |
+| NEXTAUTH_URL              | Yes                                   | Not set | Should point to the address of your server. The app will function without it, but will redirect you to wrong addresses on signout for example. |
+| NEXTAUTH_SECRET           | Yes                                   | Not set | Random string used to sign the JWT tokens. Generate one with `openssl rand -base64 36`.                                                        |
+| MEILI_ADDR                | No                                    | Not set | The address of meilisearch. If not set, Search will be disabled. E.g. (`http://meilisearch:7700`)                                              |
+| MEILI_MASTER_KEY          | Only in Prod and if search is enabled | Not set | The master key configured for meilisearch. Not needed in development environment. Generate one with `openssl rand -base64 36`                  |
+| DISABLE_SIGNUPS           | No                                    | false   | If enabled, no new signups will be allowed and the signup button will be disabled in the UI                                                    |
+| MAX_ASSET_SIZE_MB         | No                                    | 4       | Sets the maximum allowed asset size (in MB) to be uploaded                                                                                     |
+| DISABLE_NEW_RELEASE_CHECK | No                                    | false   | If set to true, latest release check will be disabled in the admin panel.                                                                      |
 
 ## Inference Configs (For automatic tagging)
 
@@ -23,14 +23,16 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin
 - Running local models is a recent addition and not as battle tested as using OpenAI, so proceed with care (and potentially expect a bunch of inference failures).
   :::
 
-| Name                  | Required | Default            | Description                                                                                                                                                                                     |
-| --------------------- | -------- | ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| OPENAI_API_KEY        | No       | Not set            | The OpenAI key used for automatic tagging. More on that in [here](/openai).                                                                                                                     |
-| OPENAI_BASE_URL       | No       | Not set            | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API. |
-| OLLAMA_BASE_URL       | No       | Not set            | If you want to use ollama for local inference, set the address of ollama API here.                                                                                                              |
-| INFERENCE_TEXT_MODEL  | No       | gpt-4o-mini | The model to use for text inference. You'll need to change this to some other model if you're using ollama.                                                                                     |
-| INFERENCE_IMAGE_MODEL | No       | gpt-4o-mini  | The model to use for image inference. You'll need to change this to some other model if you're using ollama and that model needs to support vision APIs (e.g. llava).                           |
-| INFERENCE_LANG        | No       | english            | The language in which the tags will be generated.                                                                                                                                               |
+| Name                      | Required | Default     | Description                                                                                                                                                                                     |
+| ------------------------- | -------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| OPENAI_API_KEY            | No       | Not set     | The OpenAI key used for automatic tagging. More on that in [here](/openai).                                                                                                                     |
+| OPENAI_BASE_URL           | No       | Not set     | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API. |
+| OLLAMA_BASE_URL           | No       | Not set     | If you want to use ollama for local inference, set the address of ollama API here.                                                                                                              |
+| OLLAMA_KEEP_ALIVE         | No       | Not set     | Controls how long the model will stay loaded into memory following the request (example value: "5m").                                                                                           |
+| INFERENCE_TEXT_MODEL      | No       | gpt-4o-mini | The model to use for text inference. You'll need to change this to some other model if you're using ollama.                                                                                     |
+| INFERENCE_IMAGE_MODEL     | No       | gpt-4o-mini | The model to use for image inference. You'll need to change this to some other model if you're using ollama and that model needs to support vision APIs (e.g. llava).                           |
+| INFERENCE_LANG            | No       | english     | The language in which the tags will be generated.                                                                                                                                               |
+| INFERENCE_JOB_TIMEOUT_SEC | No       | 30          | How long to wait for the inference job to finish before timing out. If you're running ollama without powerful GPUs, you might want to increase the timeout a bit.                               |
 
 ## Crawler Configs
 

diff --git a/packages/shared/config.ts b/packages/shared/config.ts
@@ -13,6 +13,8 @@ const allEnv = z.object({
   OPENAI_API_KEY: z.string().optional(),
   OPENAI_BASE_URL: z.string().url().optional(),
   OLLAMA_BASE_URL: z.string().url().optional(),
+  OLLAMA_KEEP_ALIVE: z.string().optional(),
+  INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
   INFERENCE_TEXT_MODEL: z.string().default("gpt-4o-mini"),
   INFERENCE_IMAGE_MODEL: z.string().default("gpt-4o-mini"),
   CRAWLER_HEADLESS_BROWSER: stringBool("true"),
@@ -47,9 +49,11 @@ const serverConfigSchema = allEnv.transform((val) => {
       disableSignups: val.DISABLE_SIGNUPS,
     },
     inference: {
+      jobTimeoutSec: val.INFERENCE_JOB_TIMEOUT_SEC,
       openAIApiKey: val.OPENAI_API_KEY,
       openAIBaseUrl: val.OPENAI_BASE_URL,
       ollamaBaseUrl: val.OLLAMA_BASE_URL,
+      ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE,
       textModel: val.INFERENCE_TEXT_MODEL,
       imageModel: val.INFERENCE_IMAGE_MODEL,
       inferredTagLang: val.INFERENCE_LANG,