From 44ac321ebd7c26f820757785e0c3e976555ae999 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Fri, 25 Oct 2024 23:04:31 +0530 Subject: [PATCH] add reference tool call to required cols --- docs/concepts/metrics/available_metrics/agents.md | 7 +++---- src/ragas/metrics/_tool_call_accuracy.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/concepts/metrics/available_metrics/agents.md b/docs/concepts/metrics/available_metrics/agents.md index 5d7bda4b7..3a542bf98 100644 --- a/docs/concepts/metrics/available_metrics/agents.md +++ b/docs/concepts/metrics/available_metrics/agents.md @@ -71,10 +71,9 @@ scorer = TopicAdherenceScore(mode="recall") `ToolCallAccuracy` is a metric that can be used to evaluate the performance of the LLM in identifying and calling the required tools to complete a given task. This metric needs `user_input` and `reference_tool_calls` to evaluate the performance of the LLM in identifying and calling the required tools to complete a given task. The metric is computed by comparing the `reference_tool_calls` with the Tool calls made by the AI. The values range between 0 and 1, with higher values indicating better performance. ```python +from ragas.metrics import ToolCallAccuracy from ragas.dataset_schema import MultiTurnSample from ragas.messages import HumanMessage,AIMessage,ToolMessage,ToolCall -from ragas.metrics import ToolCallAccuracy - sample = [ HumanMessage(content="What's the weather like in New York right now?"), @@ -89,7 +88,7 @@ sample = [ AIMessage(content="75°F is approximately 23.9°C.") ] -sampl2 = MultiTurnSample( +sample = MultiTurnSample( user_input=sample, reference_tool_calls=[ ToolCall(name="weather_check", args={"location": "New York"}), @@ -98,7 +97,7 @@ sampl2 = MultiTurnSample( ) scorer = ToolCallAccuracy() -await metric.multi_turn_ascore(sample) +await scorer.multi_turn_ascore(sample) ``` The tool call sequence specified in `reference_tool_calls` is used as the ideal outcome. If the tool calls made by the AI does not the the order or sequence of the `reference_tool_calls`, the metric will return a score of 0. This helps to ensure that the AI is able to identify and call the required tools in the correct order to complete a given task. diff --git a/src/ragas/metrics/_tool_call_accuracy.py b/src/ragas/metrics/_tool_call_accuracy.py index 9b4cf99b2..8fe416d6a 100644 --- a/src/ragas/metrics/_tool_call_accuracy.py +++ b/src/ragas/metrics/_tool_call_accuracy.py @@ -20,7 +20,7 @@ class ToolCallAccuracy(MultiTurnMetric): default_factory=lambda: { MetricType.MULTI_TURN: { "user_input", - "reference", + "reference_tool_calls", } } ) @@ -61,7 +61,7 @@ def is_sequence_aligned( async def _multi_turn_ascore( self, sample: MultiTurnSample, callbacks: Callbacks ) -> float: - assert sample.reference_tool_calls is not None, "Reference is not set" + assert sample.reference_tool_calls is not None, "Reference tool calls is not set" pred_tool_calls = [] for item in sample.user_input: