Skip to content

Commit

Permalink
Move extractors (#1516)
Browse files Browse the repository at this point in the history
* Consolidate graph embedding and umap

* Consolidate claim extraction

* Consolidate graph extractor

* Move graph utils

* Move summarizers

* Semver

---------

Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
  • Loading branch information
natoverse and AlonsoGuevara authored Dec 19, 2024
1 parent d59b397 commit d0543d1
Show file tree
Hide file tree
Showing 59 changed files with 292 additions and 519 deletions.
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20241213181544864279.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Move extractor code to co-locate with operations."
}
5 changes: 0 additions & 5 deletions graphrag/config/models/claim_extraction_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,7 @@ class ClaimExtractionConfig(LLMConfig):

def resolved_strategy(self, root_dir: str, encoding_model: str | None) -> dict:
"""Get the resolved claim extraction strategy."""
from graphrag.index.operations.extract_covariates import (
ExtractClaimsStrategyType,
)

return self.strategy or {
"type": ExtractClaimsStrategyType.graph_intelligence,
"llm": self.llm.model_dump(),
**self.parallelization.model_dump(),
"extraction_prompt": (Path(root_dir) / self.prompt)
Expand Down
2 changes: 1 addition & 1 deletion graphrag/config/models/embed_graph_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class EmbedGraphConfig(BaseModel):

def resolved_strategy(self) -> dict:
"""Get the resolved node2vec strategy."""
from graphrag.index.operations.embed_graph import (
from graphrag.index.operations.embed_graph.typing import (
EmbedGraphStrategyType,
)

Expand Down
12 changes: 6 additions & 6 deletions graphrag/index/flows/create_final_community_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
)

from graphrag.cache.pipeline_cache import PipelineCache
from graphrag.index.graph.extractors.community_reports.schemas import (
from graphrag.index.operations.summarize_communities import (
prepare_community_reports,
restore_community_hierarchy,
summarize_communities,
)
from graphrag.index.operations.summarize_communities.community_reports_extractor.schemas import (
CLAIM_DESCRIPTION,
CLAIM_DETAILS,
CLAIM_ID,
Expand All @@ -32,11 +37,6 @@
NODE_ID,
NODE_NAME,
)
from graphrag.index.operations.summarize_communities import (
prepare_community_reports,
restore_community_hierarchy,
summarize_communities,
)


async def create_final_community_reports(
Expand Down
2 changes: 1 addition & 1 deletion graphrag/index/flows/create_final_covariates.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)

from graphrag.cache.pipeline_cache import PipelineCache
from graphrag.index.operations.extract_covariates import (
from graphrag.index.operations.extract_covariates.extract_covariates import (
extract_covariates,
)

Expand Down
4 changes: 2 additions & 2 deletions graphrag/index/flows/create_final_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
)

from graphrag.index.operations.create_graph import create_graph
from graphrag.index.operations.embed_graph import embed_graph
from graphrag.index.operations.layout_graph import layout_graph
from graphrag.index.operations.embed_graph.embed_graph import embed_graph
from graphrag.index.operations.layout_graph.layout_graph import layout_graph


def create_final_nodes(
Expand Down
4 changes: 0 additions & 4 deletions graphrag/index/graph/__init__.py

This file was deleted.

8 changes: 0 additions & 8 deletions graphrag/index/graph/embedding/__init__.py

This file was deleted.

17 changes: 0 additions & 17 deletions graphrag/index/graph/extractors/__init__.py

This file was deleted.

8 changes: 0 additions & 8 deletions graphrag/index/graph/extractors/claims/__init__.py

This file was deleted.

24 changes: 0 additions & 24 deletions graphrag/index/graph/extractors/community_reports/__init__.py

This file was deleted.

16 changes: 0 additions & 16 deletions graphrag/index/graph/extractors/graph/__init__.py

This file was deleted.

11 changes: 0 additions & 11 deletions graphrag/index/graph/extractors/summarize/__init__.py

This file was deleted.

9 changes: 0 additions & 9 deletions graphrag/index/graph/utils/__init__.py

This file was deleted.

14 changes: 0 additions & 14 deletions graphrag/index/graph/utils/normalize_node_names.py

This file was deleted.

17 changes: 0 additions & 17 deletions graphrag/index/graph/visualization/__init__.py

This file was deleted.

148 changes: 0 additions & 148 deletions graphrag/index/graph/visualization/compute_umap_positions.py

This file was deleted.

2 changes: 1 addition & 1 deletion graphrag/index/operations/cluster_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import networkx as nx

from graphrag.index.graph.utils import stable_largest_connected_component
from graphrag.index.utils.stable_lcc import stable_largest_connected_component

Communities = list[tuple[int, int, int, list[str]]]

Expand Down
8 changes: 0 additions & 8 deletions graphrag/index/operations/embed_graph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,3 @@
# Licensed under the MIT License

"""The Indexing Engine graph embed package root."""

from graphrag.index.operations.embed_graph.embed_graph import (
EmbedGraphStrategyType,
embed_graph,
)
from graphrag.index.operations.embed_graph.typing import NodeEmbeddings

__all__ = ["EmbedGraphStrategyType", "NodeEmbeddings", "embed_graph"]
Loading

0 comments on commit d0543d1

Please sign in to comment.