Skip to content

Commit

Permalink
fix: add optional graphrag toggle in dockerfile (#377)
Browse files Browse the repository at this point in the history
* fix: toggle graphrag install in Docker build

* fix: update Dockerfile

* fix: remove unused logics in chat_fn

* fix: disable duckduckgo test due to API limit
  • Loading branch information
taprosoft authored Oct 10, 2024
1 parent 3ff6af8 commit 6da9db4
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 132 deletions.
22 changes: 20 additions & 2 deletions .github/workflows/build-push-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,34 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build docker image
- name: Build docker image (amd64)
uses: docker/build-push-action@v6
with:
file: Dockerfile
context: .
push: true
platforms: linux/amd64,linux/arm64
platforms: linux/amd64
tags: |
${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: ${{ matrix.target }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
ENABLE_GRAPHRAG=true
- name: Build docker image (arm64)
uses: docker/build-push-action@v6
with:
file: Dockerfile
context: .
push: true
platforms: linux/arm64
tags: |
${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: ${{ matrix.target }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
ENABLE_GRAPHRAG=false
14 changes: 9 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ RUN apt-get update -qqy && \
curl \
cargo

# Setup args
ARG ENABLE_GRAPHRAG=true

# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
ENV ENABLE_GRAPHRAG=${ENABLE_GRAPHRAG}

# Create working directory
WORKDIR /app
Expand All @@ -30,15 +34,19 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR

# Copy contents
COPY . /app
COPY .env.example /app/.env

# Install pip packages
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
pip install -e "libs/kotaemon" \
&& pip install -e "libs/ktem" \
&& pip install graphrag future \
&& pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"

RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
if [ "$ENABLE_GRAPHRAG" = "true" ]; then pip install graphrag future; fi

# Clean up
RUN apt-get autoremove \
&& apt-get clean \
Expand Down Expand Up @@ -66,10 +74,6 @@ RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

# Copy contents
COPY . /app
COPY .env.example /app/.env

# Install additional pip packages
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
Expand Down
18 changes: 9 additions & 9 deletions libs/kotaemon/tests/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,15 @@ def generate_chat_completion_obj(text):
"Action: wikipedia\n"
"Action Input: Cinnamon AI company\n"
),
(
"The information retrieved from Wikipedia is not "
"about Cinnamon AI company, but about Blue Prism, "
"a British multinational software corporation. "
"I need to try another source to gather information "
"about Cinnamon AI company.\n"
"Action: duckduckgo_search\n"
"Action Input: Cinnamon AI company\n"
),
# (
# "The information retrieved from Wikipedia is not "
# "about Cinnamon AI company, but about Blue Prism, "
# "a British multinational software corporation. "
# "I need to try another source to gather information "
# "about Cinnamon AI company.\n"
# "Action: duckduckgo_search\n"
# "Action Input: Cinnamon AI company\n"
# ),
FINAL_RESPONSE_TEXT,
]
]
Expand Down
105 changes: 0 additions & 105 deletions libs/ktem/ktem/pages/chat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import asyncio
import csv
import json
import re
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import Optional

import gradio as gr
from filelock import FileLock
from ktem.app import BasePage
from ktem.components import reasonings
from ktem.db.models import Conversation, engine
Expand Down Expand Up @@ -269,10 +265,6 @@ def on_register_events(self):
self._suggestion_updated,
self._app.user_id,
],
outputs=[
self.chat_control.conversation,
self.chat_control.conversation,
],
show_progress="hidden",
)

Expand Down Expand Up @@ -372,10 +364,6 @@ def on_register_events(self):
self._suggestion_updated,
self._app.user_id,
],
outputs=[
self.chat_control.conversation,
self.chat_control.conversation,
],
show_progress="hidden",
)

Expand Down Expand Up @@ -995,96 +983,3 @@ def suggest_chat_conv(self, settings, chat_history):
pass

return suggested_ques, updated

def backup_original_info(
self, chat_history, settings, info_pannel, original_chat_history
):
original_chat_history.append(chat_history[-1])
return original_chat_history, settings, info_pannel

def save_log(
self,
conversation_id,
chat_history,
settings,
info_panel,
original_chat_history,
original_settings,
original_info_panel,
log_dir,
):
if not Path(log_dir).exists():
Path(log_dir).mkdir(parents=True)

lock = FileLock(Path(log_dir) / ".lock")
# get current date
today = datetime.now()
formatted_date = today.strftime("%d%m%Y_%H")

with Session(engine) as session:
statement = select(Conversation).where(Conversation.id == conversation_id)
result = session.exec(statement).one()

data_source = deepcopy(result.data_source)
likes = data_source.get("likes", [])
if not likes:
return

feedback = likes[-1][-1]
message_index = likes[-1][0]

current_message = chat_history[message_index[0]]
original_message = original_chat_history[message_index[0]]
is_original = all(
[
current_item == original_item
for current_item, original_item in zip(
current_message, original_message
)
]
)

dataframe = [
[
conversation_id,
message_index,
current_message[0],
current_message[1],
chat_history,
settings,
info_panel,
feedback,
is_original,
original_message[1],
original_chat_history,
original_settings,
original_info_panel,
]
]

with lock:
log_file = Path(log_dir) / f"{formatted_date}_log.csv"
is_log_file_exist = log_file.is_file()
with open(log_file, "a") as f:
writer = csv.writer(f)
# write headers
if not is_log_file_exist:
writer.writerow(
[
"Conversation ID",
"Message ID",
"Question",
"Answer",
"Chat History",
"Settings",
"Evidences",
"Feedback",
"Original/ Rewritten",
"Original Answer",
"Original Chat History",
"Original Settings",
"Original Evidences",
]
)

writer.writerows(dataframe)
12 changes: 1 addition & 11 deletions libs/ktem/ktem/pages/chat/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,7 @@ def persist_chat_suggestions(
):
"""Update the conversation's chat suggestions"""
if not is_updated:
return (
gr.update(),
conversation_id,
gr.update(visible=False),
)
return

if user_id is None:
gr.Warning("Please sign in first (Settings → User Settings)")
Expand All @@ -353,13 +349,7 @@ def persist_chat_suggestions(
session.add(result)
session.commit()

history = self.load_chat_history(user_id)
gr.Info("Chat suggestions updated.")
return (
gr.update(choices=history),
conversation_id,
gr.update(visible=False),
)

def _on_app_created(self):
"""Reload the conversation once the app is created"""
Expand Down

0 comments on commit 6da9db4

Please sign in to comment.