Skip to content

Commit

Permalink
feat: add lightrag support (#474) bump:patch
Browse files Browse the repository at this point in the history
* feat: add lightrag support

* docs: update README
  • Loading branch information
taprosoft authored Nov 7, 2024
1 parent 908452c commit aeace96
Show file tree
Hide file tree
Showing 6 changed files with 477 additions and 34 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,18 @@ documents and developers who want to build their own RAG pipeline.

<details>

<summary>Setup LIGHTRAG</summary>

- Install LightRAG: `pip install git+https://github.com/HKUDS/LightRAG.git`
- `LightRAG` install might introduce version conflicts, see [this issue](https://github.com/Cinnamon/kotaemon/issues/440)
- To quickly fix: `pip uninstall hnswlib chroma-hnswlib && pip install chroma-hnswlib`
- Launch Kotaemon with `USE_LIGHTRAG=true` environment variable.
- Set your default LLM & Embedding models in Resources setting and it will be recognized automatically from LightRAG.

</details>

<details>

<summary>Setup MS GRAPHRAG</summary>

- **Non-Docker Installation**: If you are not using Docker, install GraphRAG with the following command:
Expand Down
50 changes: 20 additions & 30 deletions flowsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,41 +287,31 @@
}

USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool)
GRAPHRAG_INDEX_TYPE = (
"ktem.index.file.graph.GraphRAGIndex"
if not USE_NANO_GRAPHRAG
else "ktem.index.file.graph.NanoGraphRAGIndex"
)
USE_LIGHTRAG = config("USE_LIGHTRAG", default=False, cast=bool)

if USE_NANO_GRAPHRAG:
GRAPHRAG_INDEX_TYPE = "ktem.index.file.graph.NanoGraphRAGIndex"
elif USE_LIGHTRAG:
GRAPHRAG_INDEX_TYPE = "ktem.index.file.graph.LightRAGIndex"
else:
GRAPHRAG_INDEX_TYPE = "ktem.index.file.graph.GraphRAGIndex"

KH_INDEX_TYPES = [
"ktem.index.file.FileIndex",
GRAPHRAG_INDEX_TYPE,
]

GRAPHRAG_INDEX = (
{
"name": "GraphRAG",
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},
"index_type": "ktem.index.file.graph.GraphRAGIndex",
}
if not USE_NANO_GRAPHRAG
else {
"name": "NanoGraphRAG",
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},
"index_type": "ktem.index.file.graph.NanoGraphRAGIndex",
}
)
GRAPHRAG_INDEX = {
"name": GRAPHRAG_INDEX_TYPE.split(".")[-1].replace("Index", ""), # get last name
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},
"index_type": GRAPHRAG_INDEX_TYPE,
}

KH_INDICES = [
{
Expand Down
3 changes: 2 additions & 1 deletion libs/ktem/ktem/index/file/graph/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .graph_index import GraphRAGIndex
from .light_graph_index import LightRAGIndex
from .nano_graph_index import NanoGraphRAGIndex

__all__ = ["GraphRAGIndex", "NanoGraphRAGIndex"]
__all__ = ["GraphRAGIndex", "NanoGraphRAGIndex", "LightRAGIndex"]
26 changes: 26 additions & 0 deletions libs/ktem/ktem/index/file/graph/light_graph_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from typing import Any

from ..base import BaseFileIndexRetriever
from .graph_index import GraphRAGIndex
from .lightrag_pipelines import LightRAGIndexingPipeline, LightRAGRetrieverPipeline


class LightRAGIndex(GraphRAGIndex):
def _setup_indexing_cls(self):
self._indexing_pipeline_cls = LightRAGIndexingPipeline

def _setup_retriever_cls(self):
self._retriever_pipeline_cls = [LightRAGRetrieverPipeline]

def get_retriever_pipelines(
self, settings: dict, user_id: int, selected: Any = None
) -> list["BaseFileIndexRetriever"]:
_, file_ids, _ = selected
retrievers = [
LightRAGRetrieverPipeline(
file_ids=file_ids,
Index=self._resources["Index"],
)
]

return retrievers
Loading

0 comments on commit aeace96

Please sign in to comment.