diff --git a/.env.example b/.env.example index 156b5ed..b927624 100644 --- a/.env.example +++ b/.env.example @@ -23,8 +23,8 @@ OWL_PORT=6969 OWL_WORKERS=3 DOCIO_WORKERS=1 DOCIO_DEVICE=cpu -EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 -RERANKER_MODEL=cross-encoder/ms-marco-TinyBERT-L-2 +EMBEDDING_MODEL=BAAI/bge-small-en-v1.5 +RERANKER_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1 OWL_CONCURRENT_ROWS_BATCH_SIZE=5 OWL_CONCURRENT_COLS_BATCH_SIZE=5 -OWL_MAX_WRITE_BATCH_SIZE=1000 \ No newline at end of file +OWL_MAX_WRITE_BATCH_SIZE=1000 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6d8270..aea4711 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,6 +78,9 @@ jobs: # Replace the org with the key in the .env file sed -i "s/$org=.*/$org=$key/g" .env done + sed -i "s:EMBEDDING_MODEL=.*:EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2:g" .env + sed -i "s:RERANKER_MODEL=.*:RERANKER_MODEL=cross-encoder/ms-marco-TinyBERT-L-2:g" .env + echo 'OWL_MODELS_CONFIG=models_ci.json' >> .env env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -91,7 +94,6 @@ jobs: run: | set -e docker compose -p jamai -f docker/compose.cpu.yml --profile minio up --quiet-pull -d --wait - env: COMPOSE_DOCKER_CLI_BUILD: 1 DOCKER_BUILDKIT: 1 @@ -118,6 +120,7 @@ jobs: - name: Inspect owl logs if Python SDK tests failed if: failure() && steps.python_sdk_test_oss.outcome == 'failure' + timeout-minutes: 1 run: docker exec jamai-owl-1 cat /app/api/logs/owl.log - name: Upload Pytest Test Results @@ -139,6 +142,7 @@ jobs: - name: Inspect owl logs if TS/JS SDK tests failed if: failure() && steps.ts_sdk_test_oss.outcome == 'failure' + timeout-minutes: 1 run: docker exec jamai-owl-1 cat /app/api/logs/owl.log - name: Update owl service for S3 test @@ -168,6 +172,7 @@ jobs: - name: Inspect owl logs if Python SDK tests failed if: failure() && steps.python_sdk_test_oss_file.outcome == 'failure' + timeout-minutes: 1 run: docker exec jamai-owl-1 cat /app/api/logs/owl.log lance_tests: diff --git a/docker/amd.yml b/docker/amd.yml index 696fcac..81209af 100644 --- a/docker/amd.yml +++ b/docker/amd.yml @@ -5,7 +5,7 @@ services: [ "/bin/sh", "-c", - "(. /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=0 infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile &);(. /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=1 infinity_emb v2 --port 6919 --model-id $${RERANKER_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile)", + ". /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=0 infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-id $${RERANKER_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile", ] # # https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html # # instruction to specify the AMD GPU resources diff --git a/docker/compose.cpu.yml b/docker/compose.cpu.yml index fd272ec..a7a1e29 100644 --- a/docker/compose.cpu.yml +++ b/docker/compose.cpu.yml @@ -2,14 +2,7 @@ services: infinity: image: michaelf34/infinity:0.0.70-cpu container_name: jamai_infinity - command: [ - "v2", - "--engine", "torch", - "--port", "6909", - "--model-warmup", - "--model-id", "${EMBEDDING_MODEL}", - "--model-id", "${RERANKER_MODEL}", - ] + command: ["v2", "--engine", "torch", "--port", "6909", "--model-warmup", "--model-id", "${EMBEDDING_MODEL}", "--model-id", "${RERANKER_MODEL}"] healthcheck: test: ["CMD-SHELL", "curl --fail http://localhost:6909/health"] interval: 10s diff --git a/docker/compose.nvidia.yml b/docker/compose.nvidia.yml index bb9f3ae..5424af5 100644 --- a/docker/compose.nvidia.yml +++ b/docker/compose.nvidia.yml @@ -1,4 +1,4 @@ include: - path: - compose.cpu.yml - - nvidia.yml \ No newline at end of file + - nvidia.yml diff --git a/docker/nvidia.yml b/docker/nvidia.yml index 2da8ba5..0c788b2 100644 --- a/docker/nvidia.yml +++ b/docker/nvidia.yml @@ -1,6 +1,6 @@ services: infinity: - image: michaelf34/infinity:0.0.70 # Use GPU-compatible image, instead of torch cpu-only. + image: michaelf34/infinity:0.0.70 # Use GPU-compatible image, instead of torch cpu-only. deploy: resources: reservations: diff --git a/services/api/src/owl/configs/models.json b/services/api/src/owl/configs/models.json index 7f58cb4..887f7be 100644 --- a/services/api/src/owl/configs/models.json +++ b/services/api/src/owl/configs/models.json @@ -44,24 +44,9 @@ } ], "embed_models": [ - { - "id": "ellm/sentence-transformers/all-MiniLM-L6-v2", - "name": "ELLM MiniLM L6 v2", - "context_length": 512, - "embedding_size": 384, - "languages": ["mul"], - "capabilities": ["embed"], - "deployments": [ - { - "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2", - "api_base": "http://infinity:6909", - "provider": "ellm" - } - ] - }, { "id": "ellm/BAAI/bge-small-en-v1.5", - "litellm_id": "openai/BAAI/bge-small-en-v1.5", + "name": "ELLM BAAI BGE Small EN v1.5", "context_length": 512, "embedding_size": 1024, "languages": ["mul"], @@ -140,28 +125,14 @@ "rerank_models": [ { "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1", - "name": "ELLM mxbai-rerank-xsmall v1", + "name": "ELLM MxBAI Rerank XSmall v1", "context_length": 512, "languages": ["en"], "capabilities": ["rerank"], "deployments": [ { "litellm_id": "", - "api_base": "http://infinity:6919", - "provider": "ellm" - } - ] - }, - { - "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2", - "name": "ELLM TinyBERT L2", - "context_length": 512, - "languages": ["en"], - "capabilities": ["rerank"], - "deployments": [ - { - "litellm_id": "", - "api_base": "http://infinity:6919", + "api_base": "http://infinity:6909", "provider": "ellm" } ] @@ -181,4 +152,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/services/api/src/owl/configs/models_aipc.json b/services/api/src/owl/configs/models_aipc.json index 8b0ace0..c7e8b74 100644 --- a/services/api/src/owl/configs/models_aipc.json +++ b/services/api/src/owl/configs/models_aipc.json @@ -129,15 +129,15 @@ ], "embed_models": [ { - "id": "ellm/sentence-transformers/all-MiniLM-L6-v2", - "name": "ELLM MiniLM L6 v2", + "id": "ellm/BAAI/bge-small-en-v1.5", + "name": "ELLM BAAI BGE Small EN v1.5", "context_length": 512, - "embedding_size": 384, + "embedding_size": 1024, "languages": ["mul"], "capabilities": ["embed"], "deployments": [ { - "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2", + "litellm_id": "openai/BAAI/bge-small-en-v1.5", "api_base": "http://infinity:6909", "provider": "ellm" } @@ -209,15 +209,15 @@ ], "rerank_models": [ { - "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2", - "name": "ELLM TinyBERT L2", + "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1", + "name": "ELLM MxBAI Rerank XSmall v1", "context_length": 512, "languages": ["en"], "capabilities": ["rerank"], "deployments": [ { "litellm_id": "", - "api_base": "http://infinity:6919", + "api_base": "http://infinity:6909", "provider": "ellm" } ] diff --git a/services/api/src/owl/configs/models_ci.json b/services/api/src/owl/configs/models_ci.json new file mode 100644 index 0000000..d37b069 --- /dev/null +++ b/services/api/src/owl/configs/models_ci.json @@ -0,0 +1,124 @@ +{ + "llm_models": [ + { + "id": "openai/gpt-4o-mini", + "name": "OpenAI GPT-4o Mini", + "context_length": 128000, + "languages": ["mul"], + "capabilities": ["chat", "image"], + "deployments": [ + { + "litellm_id": "", + "api_base": "", + "provider": "openai" + } + ] + }, + { + "id": "anthropic/claude-3-haiku-20240307", + "name": "Anthropic Claude 3 Haiku", + "context_length": 200000, + "languages": ["mul"], + "capabilities": ["chat"], + "deployments": [ + { + "litellm_id": "", + "api_base": "", + "provider": "anthropic" + } + ] + }, + { + "id": "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + "name": "Together AI Meta Llama 3.1 (8B)", + "context_length": 130000, + "languages": ["mul"], + "capabilities": ["chat"], + "deployments": [ + { + "litellm_id": "", + "api_base": "", + "provider": "together_ai" + } + ] + } + ], + "embed_models": [ + { + "id": "ellm/sentence-transformers/all-MiniLM-L6-v2", + "name": "ELLM MiniLM L6 v2", + "context_length": 512, + "embedding_size": 384, + "languages": ["mul"], + "capabilities": ["embed"], + "deployments": [ + { + "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2", + "api_base": "http://infinity:6909", + "provider": "ellm" + } + ] + }, + { + "id": "openai/text-embedding-3-small-512", + "name": "OpenAI Text Embedding 3 Small (512-dim)", + "context_length": 8192, + "embedding_size": 512, + "dimensions": 512, + "languages": ["mul"], + "capabilities": ["embed"], + "deployments": [ + { + "litellm_id": "text-embedding-3-small", + "api_base": "", + "provider": "openai" + } + ] + }, + { + "id": "cohere/embed-multilingual-v3.0", + "name": "Cohere Embed Multilingual v3.0", + "context_length": 512, + "embedding_size": 1024, + "languages": ["mul"], + "capabilities": ["embed"], + "deployments": [ + { + "litellm_id": "embed-multilingual-v3.0", + "api_base": "", + "provider": "cohere" + } + ] + } + ], + "rerank_models": [ + { + "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2", + "name": "ELLM TinyBERT L2", + "context_length": 512, + "languages": ["en"], + "capabilities": ["rerank"], + "deployments": [ + { + "litellm_id": "", + "api_base": "http://infinity:6909", + "provider": "ellm" + } + ] + }, + { + "id": "cohere/rerank-multilingual-v3.0", + "name": "Cohere Rerank Multilingual v3.0", + "context_length": 512, + "languages": ["mul"], + "capabilities": ["rerank"], + "deployments": [ + { + "litellm_id": "", + "api_base": "", + "provider": "cohere" + } + ] + } + ] +} diff --git a/services/api/src/owl/configs/models_ollama.json b/services/api/src/owl/configs/models_ollama.json index afb08e4..6705fc2 100644 --- a/services/api/src/owl/configs/models_ollama.json +++ b/services/api/src/owl/configs/models_ollama.json @@ -59,15 +59,15 @@ ], "embed_models": [ { - "id": "ellm/sentence-transformers/all-MiniLM-L6-v2", - "name": "ELLM MiniLM L6 v2", + "id": "ellm/BAAI/bge-small-en-v1.5", + "name": "ELLM BAAI BGE Small EN v1.5", "context_length": 512, - "embedding_size": 384, + "embedding_size": 1024, "languages": ["mul"], "capabilities": ["embed"], "deployments": [ { - "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2", + "litellm_id": "openai/BAAI/bge-small-en-v1.5", "api_base": "http://infinity:6909", "provider": "ellm" } @@ -139,15 +139,15 @@ ], "rerank_models": [ { - "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2", - "name": "ELLM TinyBERT L2", + "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1", + "name": "ELLM MxBAI Rerank XSmall v1", "context_length": 512, "languages": ["en"], "capabilities": ["rerank"], "deployments": [ { "litellm_id": "", - "api_base": "http://infinity:6919", + "api_base": "http://infinity:6909", "provider": "ellm" } ] diff --git a/services/app/electron/main.js b/services/app/electron/main.js index b49bf3d..c957d3b 100644 --- a/services/app/electron/main.js +++ b/services/app/electron/main.js @@ -68,10 +68,6 @@ app.whenReady().then(() => { // cmd: [path.resolve('resources/infinity_server/infinity_server.exe'), 'v1', '--host', '127.0.0.1', '--port', '6909', '--model-warmup', '--device', 'cpu', '--model-name-or-path', 'sentence-transformers/all-MiniLM-L6-v2'], // cwd: path.resolve('resources/infinity_server'), // }, - // reranker: { - // cmd: [path.resolve('resources/infinity_server/infinity_server.exe'), 'v1', '--host', '127.0.0.1', '--port', '6919', '--model-warmup', '--device', 'cpu', '--model-name-or-path', 'cross-encoder/ms-marco-TinyBERT-L-2'], - // cwd: path.resolve('resources/infinity_server'), - // }, // ellm_api_server: { // cmd: [path.resolve('resources/ellm_api_server/ellm_api_server.exe'), '--model_path', path.resolve('resources/phi3-mini-directml-int4-awq-block-128'), '--port', '5555'], // cwd: path.resolve('resources'),