diff --git a/src/experimental/ragas_experimental/testset/__init__.py b/src/experimental/ragas_experimental/testset/__init__.py index eba4f0617..d33d5046c 100644 --- a/src/experimental/ragas_experimental/testset/__init__.py +++ b/src/experimental/ragas_experimental/testset/__init__.py @@ -1,7 +1,6 @@ -from ragas_experimental.testset.generators import SimpleTestGenerator -from ragas_experimental.testset.generators import QADistribution +from ragas_experimental.testset.generators import QADistribution, SimpleTestGenerator __all__ = [ "SimpleTestGenerator", "QADistribution", -] \ No newline at end of file +] diff --git a/src/experimental/ragas_experimental/testset/extractors/base.py b/src/experimental/ragas_experimental/testset/extractors/base.py index 29ab2dbeb..fd0709d85 100644 --- a/src/experimental/ragas_experimental/testset/extractors/base.py +++ b/src/experimental/ragas_experimental/testset/extractors/base.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from langchain_core.documents import Document as LCDocument - from ragas_experimental.testset.graph import Node diff --git a/src/experimental/ragas_experimental/testset/extractors/llm_based.py b/src/experimental/ragas_experimental/testset/extractors/llm_based.py index 50e083da4..1e8119299 100644 --- a/src/experimental/ragas_experimental/testset/extractors/llm_based.py +++ b/src/experimental/ragas_experimental/testset/extractors/llm_based.py @@ -4,10 +4,6 @@ import numpy as np import tiktoken from langchain_core.documents import Document as LCDocument - -from ragas.llms.base import BaseRagasLLM, llm_factory -from ragas.llms.json_load import json_loader -from ragas.llms.prompt import Prompt from ragas_experimental.testset.extractors.base import Extractor from ragas_experimental.testset.extractors.prompts import ( headline_extractor_prompt, @@ -18,6 +14,10 @@ from ragas_experimental.testset.graph import Node from ragas_experimental.testset.utils import MODEL_MAX_LENGTHS, merge_dicts +from ragas.llms.base import BaseRagasLLM, llm_factory +from ragas.llms.json_load import json_loader +from ragas.llms.prompt import Prompt + @dataclass class LLMbasedExtractor(Extractor): diff --git a/src/experimental/ragas_experimental/testset/extractors/regex_based.py b/src/experimental/ragas_experimental/testset/extractors/regex_based.py index 1c3812fbf..0de662437 100644 --- a/src/experimental/ragas_experimental/testset/extractors/regex_based.py +++ b/src/experimental/ragas_experimental/testset/extractors/regex_based.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from langchain_core.documents import Document as LCDocument - from ragas_experimental.testset.extractors.base import Extractor, Regex from ragas_experimental.testset.graph import Node diff --git a/src/experimental/ragas_experimental/testset/generators/base.py b/src/experimental/ragas_experimental/testset/generators/base.py index 1a9730f5a..ce40adc38 100644 --- a/src/experimental/ragas_experimental/testset/generators/base.py +++ b/src/experimental/ragas_experimental/testset/generators/base.py @@ -48,7 +48,8 @@ def generate( docs: t.Sequence[Document], test_size: int, distribution: QADistribution, - ) -> TestDataset: ... + ) -> TestDataset: + ... def generate_with_langchain_docs( self, diff --git a/src/experimental/ragas_experimental/testset/generators/simple.py b/src/experimental/ragas_experimental/testset/generators/simple.py index edc034475..4d845a675 100644 --- a/src/experimental/ragas_experimental/testset/generators/simple.py +++ b/src/experimental/ragas_experimental/testset/generators/simple.py @@ -11,10 +11,8 @@ summary_extractor, title_extractor, ) -from ragas_experimental.testset.generators import ( - QADistribution, - TestGenerator, -) +from ragas_experimental.testset.generators import QADistribution, TestGenerator +from ragas_experimental.testset.generators.base import TestDataset from ragas_experimental.testset.graph import Node, NodeLevel from ragas_experimental.testset.questions import ( DEFAULT_DISTRIBUTION, @@ -29,11 +27,10 @@ ) from ragas_experimental.testset.splitters import HeadlineSplitter from ragas_experimental.testset.utils import rng -from ragas_experimental.testset.generators.base import TestDataset +from ragas._analytics import TestsetGenerationEvent, track from ragas.embeddings import embedding_factory from ragas.executor import Executor -from ragas._analytics import TestsetGenerationEvent, track from ragas.llms.base import llm_factory from ragas.utils import check_if_sum_is_close diff --git a/src/experimental/ragas_experimental/testset/relationships/relation.py b/src/experimental/ragas_experimental/testset/relationships/relation.py index 0e3580f9b..092e33ee4 100644 --- a/src/experimental/ragas_experimental/testset/relationships/relation.py +++ b/src/experimental/ragas_experimental/testset/relationships/relation.py @@ -29,7 +29,9 @@ def form_relations( properties }} }} - """.format(node_level=node_level.name) + """.format( + node_level=node_level.name + ) results = schema.execute( query, context={"nodes": nodes, "relationships": relationships} ) diff --git a/src/experimental/ragas_experimental/testset/relationships/similarity.py b/src/experimental/ragas_experimental/testset/relationships/similarity.py index 21add01a7..f4ea97c11 100644 --- a/src/experimental/ragas_experimental/testset/relationships/similarity.py +++ b/src/experimental/ragas_experimental/testset/relationships/similarity.py @@ -2,7 +2,6 @@ from dataclasses import dataclass import numpy as np - from ragas_experimental.testset.graph import Node from ragas_experimental.testset.relationships.base import Similarity diff --git a/src/experimental/ragas_experimental/testset/utils.py b/src/experimental/ragas_experimental/testset/utils.py index 7ddccb18e..a96d4be45 100644 --- a/src/experimental/ragas_experimental/testset/utils.py +++ b/src/experimental/ragas_experimental/testset/utils.py @@ -1,7 +1,6 @@ import json import numpy as np - from ragas_experimental.testset.graph import Node, NodeLevel, NodeType, Relationship MODEL_MAX_LENGTHS = { diff --git a/src/ragas/callbacks.py b/src/ragas/callbacks.py index 87e286a51..fb704846c 100644 --- a/src/ragas/callbacks.py +++ b/src/ragas/callbacks.py @@ -12,7 +12,7 @@ def new_group( - name: str, inputs: t.Dict, callbacks: Callbacks, is_async=False + name: str, inputs: t.Dict, callbacks: Callbacks ) -> t.Tuple[CallbackManagerForChainRun, CallbackManagerForChainGroup]: # start evaluation chain if isinstance(callbacks, list): diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py index dc3b73064..099701cf4 100644 --- a/src/ragas/evaluation.py +++ b/src/ragas/evaluation.py @@ -49,7 +49,6 @@ def evaluate( embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None, callbacks: Callbacks = None, in_ci: bool = False, - is_async: bool = True, run_config: t.Optional[RunConfig] = None, raise_exceptions: bool = True, column_map: t.Optional[t.Dict[str, str]] = None, @@ -81,11 +80,6 @@ def evaluate( Whether the evaluation is running in CI or not. If set to True then some metrics will be run to increase the reproducability of the evaluations. This will increase the runtime and cost of evaluations. Default is False. - is_async: bool - Whether to run the evaluation in async mode or not. If set to True then the - evaluation is run by calling the `metric.ascore` method. In case the llm or - embeddings does not support async then the evaluation can be run in sync mode - with `is_async=False`. Default is False. run_config: RunConfig, optional Configuration for runtime settings like timeout and retries. If not provided, default values are used. @@ -206,7 +200,7 @@ def evaluate( # new evaluation chain row_run_managers = [] evaluation_rm, evaluation_group_cm = new_group( - name="ragas evaluation", inputs={}, callbacks=callbacks, is_async=is_async + name="ragas evaluation", inputs={}, callbacks=callbacks ) for i, row in enumerate(dataset): row = t.cast(t.Dict[str, t.Any], row) @@ -214,7 +208,6 @@ def evaluate( name=f"row {i}", inputs=row, callbacks=evaluation_group_cm, - is_async=is_async, ) row_run_managers.append((row_rm, row_group_cm)) [ @@ -222,7 +215,6 @@ def evaluate( metric.ascore, row, row_group_cm, - is_async, name=f"{metric.name}-{i}", thread_timeout=run_config.thread_timeout, ) diff --git a/src/ragas/llms/prompt.py b/src/ragas/llms/prompt.py index bc0a7dd86..95ea28db9 100644 --- a/src/ragas/llms/prompt.py +++ b/src/ragas/llms/prompt.py @@ -1,9 +1,9 @@ from __future__ import annotations +import ast import json import logging import os -import ast import typing as t from langchain_core.messages import BaseMessage, HumanMessage @@ -233,8 +233,8 @@ def get_all_keys(nested_json): example_dict[self.output_key] = json_loader._safe_load(example[-1], llm) if example_dict[self.output_key] == {}: # Extracting the dictionary part using string slicing - dict_str = example[-1].split('(')[0].strip() - example_dict[self.output_key ] = ast.literal_eval(dict_str) + dict_str = example[-1].split("(")[0].strip() + example_dict[self.output_key] = ast.literal_eval(dict_str) else: example_dict[self.output_key] = example[-1] if self.output_type.lower() == "json": diff --git a/src/ragas/metrics/_answer_correctness.py b/src/ragas/metrics/_answer_correctness.py index 2aba80ffa..100e4e751 100644 --- a/src/ragas/metrics/_answer_correctness.py +++ b/src/ragas/metrics/_answer_correctness.py @@ -141,7 +141,6 @@ class AnswerCorrectnessClassification(BaseModel): @dataclass class AnswerCorrectness(MetricWithLLM, MetricWithEmbeddings): - """ Measures answer correctness compared to ground truth as a combination of factuality and semantic similarity. @@ -211,16 +210,14 @@ def _create_statements_prompt(self, question: str, text: str) -> PromptValue: ) return prompt_value - async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> float: + async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: assert self.llm is not None, "LLM must be set" question = row["question"] statements = {} for item in ["answer", "ground_truth"]: p_value = self._create_statements_prompt(question, row[item]) - item_statement = await self.llm.generate( - p_value, callbacks=callbacks, is_async=is_async - ) + item_statement = await self.llm.generate(p_value, callbacks=callbacks) statements[item] = await _statements_output_parser.aparse( item_statement.generations[0][0].text, p_value, @@ -247,9 +244,7 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> fl ground_truth=ground_truth, answer=answer, ) - is_statement_present = await self.llm.generate( - p_value, callbacks=callbacks, is_async=is_async - ) + is_statement_present = await self.llm.generate(p_value, callbacks=callbacks) result_text = is_statement_present.generations[0][0].text answers = await _output_parser.aparse( @@ -268,7 +263,7 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> fl assert self.answer_similarity is not None, "AnswerSimilarity must be set" similarity_score = await self.answer_similarity.ascore( - row, callbacks=callbacks, is_async=is_async + row, callbacks=callbacks ) score = np.average( diff --git a/src/ragas/metrics/_answer_relevance.py b/src/ragas/metrics/_answer_relevance.py index af95aa06d..ee20f7313 100644 --- a/src/ragas/metrics/_answer_relevance.py +++ b/src/ragas/metrics/_answer_relevance.py @@ -145,7 +145,7 @@ def _create_question_gen_prompt(self, row: t.Dict) -> PromptValue: ans, ctx = row["answer"], row["contexts"] return self.question_generation.format(answer=ans, context="\n".join(ctx)) - async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> float: + async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: assert self.llm is not None, "LLM is not set" prompt = self._create_question_gen_prompt(row) @@ -153,7 +153,6 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> fl prompt, n=self.strictness, callbacks=callbacks, - is_async=is_async, ) answers = [ diff --git a/src/ragas/metrics/_answer_similarity.py b/src/ragas/metrics/_answer_similarity.py index ea5513782..65b43793a 100644 --- a/src/ragas/metrics/_answer_similarity.py +++ b/src/ragas/metrics/_answer_similarity.py @@ -49,9 +49,7 @@ def __post_init__(self: t.Self): **self.embeddings.encode_kwargs, } - async def _ascore( - self: t.Self, row: t.Dict, callbacks: Callbacks, is_async: bool - ) -> float: + async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float: assert self.embeddings is not None, "embeddings must be set" ground_truth = t.cast(str, row["ground_truth"]) diff --git a/src/ragas/metrics/_context_entities_recall.py b/src/ragas/metrics/_context_entities_recall.py index ba249107f..64eab7833 100644 --- a/src/ragas/metrics/_context_entities_recall.py +++ b/src/ragas/metrics/_context_entities_recall.py @@ -149,7 +149,6 @@ async def get_entities( self, text: str, callbacks: Callbacks, - is_async: bool, ) -> t.Optional[ContextEntitiesResponse]: assert self.llm is not None, "LLM is not initialized" p_value = self.context_entity_recall_prompt.format( @@ -158,7 +157,6 @@ async def get_entities( result = await self.llm.generate( prompt=p_value, callbacks=callbacks, - is_async=is_async, ) result_text = result.generations[0][0].text @@ -174,15 +172,10 @@ async def _ascore( self, row: Dict, callbacks: Callbacks, - is_async: bool, ) -> float: ground_truth, contexts = row["ground_truth"], row["contexts"] - ground_truth = await self.get_entities( - ground_truth, callbacks=callbacks, is_async=is_async - ) - contexts = await self.get_entities( - "\n".join(contexts), callbacks=callbacks, is_async=is_async - ) + ground_truth = await self.get_entities(ground_truth, callbacks=callbacks) + contexts = await self.get_entities("\n".join(contexts), callbacks=callbacks) if ground_truth is None or contexts is None: return np.nan return self._compute_score(ground_truth.entities, contexts.entities) diff --git a/src/ragas/metrics/_context_precision.py b/src/ragas/metrics/_context_precision.py index 709bb5fca..7f371d1d0 100644 --- a/src/ragas/metrics/_context_precision.py +++ b/src/ragas/metrics/_context_precision.py @@ -151,7 +151,6 @@ async def _ascore( self: t.Self, row: t.Dict, callbacks: Callbacks, - is_async: bool, ) -> float: assert self.llm is not None, "LLM is not set" @@ -161,7 +160,6 @@ async def _ascore( results = await self.llm.generate( hp, callbacks=callbacks, - is_async=is_async, n=self.reproducibility, ) results = [ diff --git a/src/ragas/metrics/_context_recall.py b/src/ragas/metrics/_context_recall.py index c88ccf257..aac71bfab 100644 --- a/src/ragas/metrics/_context_recall.py +++ b/src/ragas/metrics/_context_recall.py @@ -109,7 +109,6 @@ def dicts(self) -> t.List[t.Dict]: @dataclass class ContextRecall(MetricWithLLM): - """ Estimates context recall by estimating TP and FN using annotated answer and retrieved context. @@ -163,13 +162,12 @@ def _compute_score(self, response: t.Any) -> float: return score - async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> float: + async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: assert self.llm is not None, "set LLM before use" p_value = self._create_context_recall_prompt(row) results = await self.llm.generate( p_value, callbacks=callbacks, - is_async=is_async, n=self.reproducibility, ) results = [results.generations[0][i].text for i in range(self.reproducibility)] diff --git a/src/ragas/metrics/_faithfulness.py b/src/ragas/metrics/_faithfulness.py index baeeb3fa4..713c3aa52 100644 --- a/src/ragas/metrics/_faithfulness.py +++ b/src/ragas/metrics/_faithfulness.py @@ -236,9 +236,7 @@ def _compute_score(self, answers: StatementFaithfulnessAnswers): return score - async def _ascore( - self: t.Self, row: t.Dict, callbacks: Callbacks, is_async: bool - ) -> float: + async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float: """ returns the NLI score for each (q, c, a) pair """ @@ -248,7 +246,6 @@ async def _ascore( statements = await self.llm.generate( p_value, callbacks=callbacks, - is_async=is_async, ) statements = await _statements_output_parser.aparse( statements.generations[0][0].text, p_value, self.llm, self.max_retries @@ -266,7 +263,6 @@ async def _ascore( nli_result = await self.llm.generate( p_value, callbacks=callbacks, - is_async=is_async, n=self._reproducibility, ) diff --git a/src/ragas/metrics/_summarization.py b/src/ragas/metrics/_summarization.py index 2e6bd3f7d..ff2691872 100644 --- a/src/ragas/metrics/_summarization.py +++ b/src/ragas/metrics/_summarization.py @@ -164,15 +164,15 @@ def _get_answer_generation_prompt( ) -> PromptValue: return TEXT_GENERATE_ANSWERS.format(summary=summary, questions=questions) - async def _ascore(self, row: Dict, callbacks: Callbacks, is_async: bool) -> float: + async def _ascore(self, row: Dict, callbacks: Callbacks) -> float: # text is the contexts provided # summary is the summary generated by the model # TODO: add support for the query used as well text: str = "\n".join(row["contexts"]) summary: str = row["summary"] - keyphrases = await self._extract_keyphrases(text, callbacks, is_async) - questions = await self._get_questions(text, keyphrases, callbacks, is_async) - answers = await self._get_answers(questions, summary, callbacks, is_async) + keyphrases = await self._extract_keyphrases(text, callbacks) + questions = await self._get_questions(text, keyphrases, callbacks) + answers = await self._get_answers(questions, summary, callbacks) scores = [] qa_score = self._compute_qa_score(answers) @@ -201,15 +201,12 @@ def _compute_conciseness_score(self, text, summary) -> float: """ return 1 - (len(summary) / len(text)) - async def _extract_keyphrases( - self, text: str, callbacks: Callbacks, is_async: bool - ) -> t.List[str]: + async def _extract_keyphrases(self, text: str, callbacks: Callbacks) -> t.List[str]: assert self.llm is not None, "LLM is not initialized" p_value = self._get_extract_keyphrases_prompt(text) result = await self.llm.generate( prompt=p_value, callbacks=callbacks, - is_async=is_async, ) result_text = result.generations[0][0].text answer = await _output_parser_keyphrase_extraction.aparse( @@ -218,14 +215,13 @@ async def _extract_keyphrases( return answer.keyphrases if answer else [] async def _get_questions( - self, text: str, keyphrases: list[str], callbacks: Callbacks, is_async: bool + self, text: str, keyphrases: list[str], callbacks: Callbacks ) -> t.List[str]: assert self.llm is not None, "LLM is not initialized" p_value = self._get_question_generation_prompt(text, keyphrases) result = await self.llm.generate( prompt=p_value, callbacks=callbacks, - is_async=is_async, ) result_text = result.generations[0][0].text @@ -238,14 +234,13 @@ async def _get_questions( return answer.questions async def _get_answers( - self, questions: t.List[str], summary: str, callbacks: Callbacks, is_async: bool + self, questions: t.List[str], summary: str, callbacks: Callbacks ) -> t.List[str]: assert self.llm is not None, "LLM is not initialized" p_value = self._get_answer_generation_prompt(questions, summary) result = await self.llm.generate( prompt=p_value, callbacks=callbacks, - is_async=is_async, ) result_text = result.generations[0][0].text diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py index 2da4ab995..3860301a5 100644 --- a/src/ragas/metrics/base.py +++ b/src/ragas/metrics/base.py @@ -96,13 +96,9 @@ def save(self, cache_dir: t.Optional[str] = None) -> None: def score(self: t.Self, row: t.Dict, callbacks: Callbacks = None) -> float: callbacks = callbacks or [] - rm, group_cm = new_group( - self.name, inputs=row, callbacks=callbacks, is_async=False - ) + rm, group_cm = new_group(self.name, inputs=row, callbacks=callbacks) try: - score = asyncio.run( - self._ascore(row=row, callbacks=group_cm, is_async=False) - ) + score = asyncio.run(self._ascore(row=row, callbacks=group_cm)) except Exception as e: if not group_cm.ended: rm.on_chain_error(e) @@ -116,16 +112,13 @@ async def ascore( self: t.Self, row: t.Dict, callbacks: Callbacks = None, - is_async: bool = True, thread_timeout: t.Optional[float] = None, ) -> float: callbacks = callbacks or [] - rm, group_cm = new_group( - self.name, inputs=row, callbacks=callbacks, is_async=True - ) + rm, group_cm = new_group(self.name, inputs=row, callbacks=callbacks) try: score = await asyncio.wait_for( - self._ascore(row=row, callbacks=group_cm, is_async=is_async), + self._ascore(row=row, callbacks=group_cm), timeout=thread_timeout, ) except Exception as e: @@ -138,7 +131,7 @@ async def ascore( return score @abstractmethod - async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> float: + async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: ... diff --git a/src/ragas/metrics/critique.py b/src/ragas/metrics/critique.py index c6006f074..007ddf788 100644 --- a/src/ragas/metrics/critique.py +++ b/src/ragas/metrics/critique.py @@ -117,17 +117,13 @@ def _compute_score(self, safe_loaded_responses: t.List[CriticClassification]): return score - async def _ascore( - self: t.Self, row: t.Dict, callbacks: Callbacks, is_async: bool - ) -> float: + async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float: assert self.llm is not None, "set LLM before use" q, c, a = row["question"], row["contexts"], row["answer"] p_value = self.prompt_format(q, a, c) - result = await self.llm.generate( - p_value, callbacks=callbacks, is_async=is_async - ) + result = await self.llm.generate(p_value, callbacks=callbacks) responses = [r.text for r in result.generations[0]] safe_loaded_responses = [ diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py index 0d5c7eb3e..f55ae5502 100644 --- a/src/ragas/testset/prompts.py +++ b/src/ragas/testset/prompts.py @@ -151,23 +151,32 @@ class AnswerFormat(BaseModel): { "context": """Climate change is significantly influenced by human activities, notably the emission of greenhouse gases from burning fossil fuels. The increased greenhouse gas concentration in the atmosphere traps more heat, leading to global warming and changes in weather patterns.""", "question": "How do human activities contribute to climate change?", - "answer": AnswerFormat.parse_obj({ - "answer": "Human activities contribute to climate change primarily through the emission of greenhouse gases from burning fossil fuels. These emissions increase the concentration of greenhouse gases in the atmosphere, which traps more heat and leads to global warming and altered weather patterns.", - "verdict": "1",}).dict(), + "answer": AnswerFormat.parse_obj( + { + "answer": "Human activities contribute to climate change primarily through the emission of greenhouse gases from burning fossil fuels. These emissions increase the concentration of greenhouse gases in the atmosphere, which traps more heat and leads to global warming and altered weather patterns.", + "verdict": "1", + } + ).dict(), }, { "context": """The concept of artificial intelligence (AI) has evolved over time, but it fundamentally refers to machines designed to mimic human cognitive functions. AI can learn, reason, perceive, and, in some instances, react like humans, making it pivotal in fields ranging from healthcare to autonomous vehicles.""", "question": "What are the key capabilities of artificial intelligence?", - "answer": AnswerFormat.parse_obj({ - "answer": "Artificial intelligence is designed to mimic human cognitive functions, with key capabilities including learning, reasoning, perception, and reacting to the environment in a manner similar to humans. These capabilities make AI pivotal in various fields, including healthcare and autonomous driving.", - "verdict": "1",}).dict(), + "answer": AnswerFormat.parse_obj( + { + "answer": "Artificial intelligence is designed to mimic human cognitive functions, with key capabilities including learning, reasoning, perception, and reacting to the environment in a manner similar to humans. These capabilities make AI pivotal in various fields, including healthcare and autonomous driving.", + "verdict": "1", + } + ).dict(), }, { "context": """The novel "Pride and Prejudice" by Jane Austen revolves around the character Elizabeth Bennet and her family. The story is set in the 19th century in rural England and deals with issues of marriage, morality, and misconceptions.""", "question": "What year was 'Pride and Prejudice' published?", - "answer": AnswerFormat.parse_obj({ - "answer": "The answer to given question is not present in context", - "verdict": "-1",}).dict(), + "answer": AnswerFormat.parse_obj( + { + "answer": "The answer to given question is not present in context", + "verdict": "-1", + } + ).dict(), }, ], input_keys=["context", "question"], diff --git a/tests/e2e/test_evaluation_in_jupyter.ipynb b/tests/e2e/test_evaluation_in_jupyter.ipynb index 725685051..32078f43e 100644 --- a/tests/e2e/test_evaluation_in_jupyter.ipynb +++ b/tests/e2e/test_evaluation_in_jupyter.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -23,7 +23,7 @@ "})" ] }, - "execution_count": 1, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -47,151 +47,42 @@ " faithfulness,\n", " context_recall,\n", " context_precision,\n", - ")\n" + ")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9b453badd44845918b70e4c53c37c048", + "model_id": "97c098677a074b078639aba1f2d0bd4a", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Evaluating: 0%| | 0/800 [00:00 131\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ssl\u001b[38;5;241m.\u001b[39mSSLWantReadError:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/ssl.py:917\u001b[0m, in \u001b[0;36mSSLObject.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 916\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 917\u001b[0m v \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m v\n", - "\u001b[0;31mSSLWantReadError\u001b[0m: The operation did not complete (read) (_ssl.c:2578)", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mCancelledError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:232\u001b[0m, in \u001b[0;36mTask.__step\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 230\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 232\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:754\u001b[0m, in \u001b[0;36mBaseChatModel._agenerate_with_cache\u001b[0;34m(self, messages, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 753\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39msignature(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_agenerate)\u001b[38;5;241m.\u001b[39mparameters\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun_manager\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 754\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_agenerate(\n\u001b[1;32m 755\u001b[0m messages, stop\u001b[38;5;241m=\u001b[39mstop, run_manager\u001b[38;5;241m=\u001b[39mrun_manager, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[1;32m 756\u001b[0m )\n\u001b[1;32m 757\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/langchain_openai/chat_models/base.py:666\u001b[0m, in \u001b[0;36mBaseChatOpenAI._agenerate\u001b[0;34m(self, messages, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 665\u001b[0m params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs}\n\u001b[0;32m--> 666\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39masync_client\u001b[38;5;241m.\u001b[39mcreate(messages\u001b[38;5;241m=\u001b[39mmessage_dicts, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[1;32m 667\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_chat_result(response)\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/openai/resources/chat/completions.py:1283\u001b[0m, in \u001b[0;36mAsyncCompletions.create\u001b[0;34m(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_tokens, n, parallel_tool_calls, presence_penalty, response_format, seed, service_tier, stop, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;129m@required_args\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m], [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m 1251\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1281\u001b[0m timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m 1282\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ChatCompletion \u001b[38;5;241m|\u001b[39m AsyncStream[ChatCompletionChunk]:\n\u001b[0;32m-> 1283\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_post(\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/chat/completions\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1285\u001b[0m body\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mawait\u001b[39;00m async_maybe_transform(\n\u001b[1;32m 1286\u001b[0m {\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m: messages,\n\u001b[1;32m 1288\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m: model,\n\u001b[1;32m 1289\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfrequency_penalty\u001b[39m\u001b[38;5;124m\"\u001b[39m: frequency_penalty,\n\u001b[1;32m 1290\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfunction_call\u001b[39m\u001b[38;5;124m\"\u001b[39m: function_call,\n\u001b[1;32m 1291\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfunctions\u001b[39m\u001b[38;5;124m\"\u001b[39m: functions,\n\u001b[1;32m 1292\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogit_bias\u001b[39m\u001b[38;5;124m\"\u001b[39m: logit_bias,\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogprobs\u001b[39m\u001b[38;5;124m\"\u001b[39m: logprobs,\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m: max_tokens,\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn\u001b[39m\u001b[38;5;124m\"\u001b[39m: n,\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparallel_tool_calls\u001b[39m\u001b[38;5;124m\"\u001b[39m: parallel_tool_calls,\n\u001b[1;32m 1297\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpresence_penalty\u001b[39m\u001b[38;5;124m\"\u001b[39m: presence_penalty,\n\u001b[1;32m 1298\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresponse_format\u001b[39m\u001b[38;5;124m\"\u001b[39m: response_format,\n\u001b[1;32m 1299\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseed\u001b[39m\u001b[38;5;124m\"\u001b[39m: seed,\n\u001b[1;32m 1300\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mservice_tier\u001b[39m\u001b[38;5;124m\"\u001b[39m: service_tier,\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstop\u001b[39m\u001b[38;5;124m\"\u001b[39m: stop,\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m: stream,\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream_options\u001b[39m\u001b[38;5;124m\"\u001b[39m: stream_options,\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtemperature\u001b[39m\u001b[38;5;124m\"\u001b[39m: temperature,\n\u001b[1;32m 1305\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtool_choice\u001b[39m\u001b[38;5;124m\"\u001b[39m: tool_choice,\n\u001b[1;32m 1306\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtools\u001b[39m\u001b[38;5;124m\"\u001b[39m: tools,\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop_logprobs\u001b[39m\u001b[38;5;124m\"\u001b[39m: top_logprobs,\n\u001b[1;32m 1308\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop_p\u001b[39m\u001b[38;5;124m\"\u001b[39m: top_p,\n\u001b[1;32m 1309\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m\"\u001b[39m: user,\n\u001b[1;32m 1310\u001b[0m },\n\u001b[1;32m 1311\u001b[0m completion_create_params\u001b[38;5;241m.\u001b[39mCompletionCreateParams,\n\u001b[1;32m 1312\u001b[0m ),\n\u001b[1;32m 1313\u001b[0m options\u001b[38;5;241m=\u001b[39mmake_request_options(\n\u001b[1;32m 1314\u001b[0m extra_headers\u001b[38;5;241m=\u001b[39mextra_headers, extra_query\u001b[38;5;241m=\u001b[39mextra_query, extra_body\u001b[38;5;241m=\u001b[39mextra_body, timeout\u001b[38;5;241m=\u001b[39mtimeout\n\u001b[1;32m 1315\u001b[0m ),\n\u001b[1;32m 1316\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mChatCompletion,\n\u001b[1;32m 1317\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 1318\u001b[0m stream_cls\u001b[38;5;241m=\u001b[39mAsyncStream[ChatCompletionChunk],\n\u001b[1;32m 1319\u001b[0m )\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/openai/_base_client.py:1805\u001b[0m, in \u001b[0;36mAsyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, files, options, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1802\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1803\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mawait\u001b[39;00m async_to_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1804\u001b[0m )\n\u001b[0;32m-> 1805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest(cast_to, opts, stream\u001b[38;5;241m=\u001b[39mstream, stream_cls\u001b[38;5;241m=\u001b[39mstream_cls)\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/openai/_base_client.py:1503\u001b[0m, in \u001b[0;36mAsyncAPIClient.request\u001b[0;34m(self, cast_to, options, stream, stream_cls, remaining_retries)\u001b[0m\n\u001b[1;32m 1494\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrequest\u001b[39m(\n\u001b[1;32m 1495\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1496\u001b[0m cast_to: Type[ResponseT],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1501\u001b[0m remaining_retries: Optional[\u001b[38;5;28mint\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1502\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _AsyncStreamT:\n\u001b[0;32m-> 1503\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_request(\n\u001b[1;32m 1504\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1505\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[1;32m 1506\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream,\n\u001b[1;32m 1507\u001b[0m stream_cls\u001b[38;5;241m=\u001b[39mstream_cls,\n\u001b[1;32m 1508\u001b[0m remaining_retries\u001b[38;5;241m=\u001b[39mremaining_retries,\n\u001b[1;32m 1509\u001b[0m )\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/openai/_base_client.py:1537\u001b[0m, in \u001b[0;36mAsyncAPIClient._request\u001b[0;34m(self, cast_to, options, stream, stream_cls, remaining_retries)\u001b[0m\n\u001b[1;32m 1536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1537\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client\u001b[38;5;241m.\u001b[39msend(\n\u001b[1;32m 1538\u001b[0m request,\n\u001b[1;32m 1539\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_stream_response_body(request\u001b[38;5;241m=\u001b[39mrequest),\n\u001b[1;32m 1540\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 1541\u001b[0m )\n\u001b[1;32m 1542\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m httpx\u001b[38;5;241m.\u001b[39mTimeoutException \u001b[38;5;28;01mas\u001b[39;00m err:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpx/_client.py:1617\u001b[0m, in \u001b[0;36mAsyncClient.send\u001b[0;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[1;32m 1615\u001b[0m auth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_request_auth(request, auth)\n\u001b[0;32m-> 1617\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_send_handling_auth(\n\u001b[1;32m 1618\u001b[0m request,\n\u001b[1;32m 1619\u001b[0m auth\u001b[38;5;241m=\u001b[39mauth,\n\u001b[1;32m 1620\u001b[0m follow_redirects\u001b[38;5;241m=\u001b[39mfollow_redirects,\n\u001b[1;32m 1621\u001b[0m history\u001b[38;5;241m=\u001b[39m[],\n\u001b[1;32m 1622\u001b[0m )\n\u001b[1;32m 1623\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpx/_client.py:1645\u001b[0m, in \u001b[0;36mAsyncClient._send_handling_auth\u001b[0;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[1;32m 1644\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m-> 1645\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_send_handling_redirects(\n\u001b[1;32m 1646\u001b[0m request,\n\u001b[1;32m 1647\u001b[0m follow_redirects\u001b[38;5;241m=\u001b[39mfollow_redirects,\n\u001b[1;32m 1648\u001b[0m history\u001b[38;5;241m=\u001b[39mhistory,\n\u001b[1;32m 1649\u001b[0m )\n\u001b[1;32m 1650\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpx/_client.py:1682\u001b[0m, in \u001b[0;36mAsyncClient._send_handling_redirects\u001b[0;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[1;32m 1680\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m hook(request)\n\u001b[0;32m-> 1682\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_send_single_request(request)\n\u001b[1;32m 1683\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpx/_client.py:1719\u001b[0m, in \u001b[0;36mAsyncClient._send_single_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 1718\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m request_context(request\u001b[38;5;241m=\u001b[39mrequest):\n\u001b[0;32m-> 1719\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m transport\u001b[38;5;241m.\u001b[39mhandle_async_request(request)\n\u001b[1;32m 1721\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response\u001b[38;5;241m.\u001b[39mstream, AsyncByteStream)\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpx/_transports/default.py:366\u001b[0m, in \u001b[0;36mAsyncHTTPTransport.handle_async_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m--> 366\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pool\u001b[38;5;241m.\u001b[39mhandle_async_request(req)\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp\u001b[38;5;241m.\u001b[39mstream, typing\u001b[38;5;241m.\u001b[39mAsyncIterable)\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_async/connection_pool.py:268\u001b[0m, in \u001b[0;36mAsyncConnectionPool.handle_async_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponse_closed(status)\n\u001b[0;32m--> 268\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_async/connection_pool.py:251\u001b[0m, in \u001b[0;36mAsyncConnectionPool.handle_async_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 251\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m connection\u001b[38;5;241m.\u001b[39mhandle_async_request(request)\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[1;32m 253\u001b[0m \u001b[38;5;66;03m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[1;32m 254\u001b[0m \u001b[38;5;66;03m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;66;03m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;66;03m# up as HTTP/1.1.\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_async/connection.py:103\u001b[0m, in \u001b[0;36mAsyncHTTPConnection.handle_async_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection\u001b[38;5;241m.\u001b[39mhandle_async_request(request)\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_async/http11.py:133\u001b[0m, in \u001b[0;36mAsyncHTTP11Connection.handle_async_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_response_closed()\n\u001b[0;32m--> 133\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_async/http11.py:111\u001b[0m, in \u001b[0;36mAsyncHTTP11Connection.handle_async_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\n\u001b[1;32m 104\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreceive_response_headers\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request, kwargs\n\u001b[1;32m 105\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[1;32m 106\u001b[0m (\n\u001b[1;32m 107\u001b[0m http_version,\n\u001b[1;32m 108\u001b[0m status,\n\u001b[1;32m 109\u001b[0m reason_phrase,\n\u001b[1;32m 110\u001b[0m headers,\n\u001b[0;32m--> 111\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_receive_response_headers(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 112\u001b[0m trace\u001b[38;5;241m.\u001b[39mreturn_value \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 113\u001b[0m http_version,\n\u001b[1;32m 114\u001b[0m status,\n\u001b[1;32m 115\u001b[0m reason_phrase,\n\u001b[1;32m 116\u001b[0m headers,\n\u001b[1;32m 117\u001b[0m )\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_async/http11.py:176\u001b[0m, in \u001b[0;36mAsyncHTTP11Connection._receive_response_headers\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 176\u001b[0m event \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_receive_event(timeout\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(event, h11\u001b[38;5;241m.\u001b[39mResponse):\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_async/http11.py:212\u001b[0m, in \u001b[0;36mAsyncHTTP11Connection._receive_event\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m event \u001b[38;5;129;01mis\u001b[39;00m h11\u001b[38;5;241m.\u001b[39mNEED_DATA:\n\u001b[0;32m--> 212\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_network_stream\u001b[38;5;241m.\u001b[39mread(\n\u001b[1;32m 213\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mREAD_NUM_BYTES, timeout\u001b[38;5;241m=\u001b[39mtimeout\n\u001b[1;32m 214\u001b[0m )\n\u001b[1;32m 216\u001b[0m \u001b[38;5;66;03m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;66;03m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[38;5;66;03m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# it as a ConnectError.\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/httpcore/_backends/anyio.py:34\u001b[0m, in \u001b[0;36mAnyIOStream.read\u001b[0;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 34\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stream\u001b[38;5;241m.\u001b[39mreceive(max_bytes\u001b[38;5;241m=\u001b[39mmax_bytes)\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m anyio\u001b[38;5;241m.\u001b[39mEndOfStream: \u001b[38;5;66;03m# pragma: nocover\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/anyio/streams/tls.py:196\u001b[0m, in \u001b[0;36mTLSStream.receive\u001b[0;34m(self, max_bytes)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mreceive\u001b[39m(\u001b[38;5;28mself\u001b[39m, max_bytes: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m65536\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mbytes\u001b[39m:\n\u001b[0;32m--> 196\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_sslobject_method(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ssl_object\u001b[38;5;241m.\u001b[39mread, max_bytes)\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/anyio/streams/tls.py:138\u001b[0m, in \u001b[0;36mTLSStream._call_sslobject_method\u001b[0;34m(self, func, *args)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransport_stream\u001b[38;5;241m.\u001b[39msend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_write_bio\u001b[38;5;241m.\u001b[39mread())\n\u001b[0;32m--> 138\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransport_stream\u001b[38;5;241m.\u001b[39mreceive()\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EndOfStream:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/anyio/_backends/_asyncio.py:1203\u001b[0m, in \u001b[0;36mSocketStream.receive\u001b[0;34m(self, max_bytes)\u001b[0m\n\u001b[1;32m 1202\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_transport\u001b[38;5;241m.\u001b[39mresume_reading()\n\u001b[0;32m-> 1203\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_protocol\u001b[38;5;241m.\u001b[39mread_event\u001b[38;5;241m.\u001b[39mwait()\n\u001b[1;32m 1204\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_transport\u001b[38;5;241m.\u001b[39mpause_reading()\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/locks.py:214\u001b[0m, in \u001b[0;36mEvent.wait\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 214\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m fut\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/futures.py:285\u001b[0m, in \u001b[0;36mFuture.__await__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_asyncio_future_blocking \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 285\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;66;03m# This tells Task to wait for completion.\u001b[39;00m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdone():\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:304\u001b[0m, in \u001b[0;36mTask.__wakeup\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 306\u001b[0m \u001b[38;5;66;03m# This may also be a cancellation.\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/futures.py:196\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 195\u001b[0m exc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_cancelled_error()\n\u001b[0;32m--> 196\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m!=\u001b[39m _FINISHED:\n", - "\u001b[0;31mCancelledError\u001b[0m: ", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mCancelledError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:234\u001b[0m, in \u001b[0;36mTask.__step\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 234\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mthrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/metrics/_faithfulness.py:266\u001b[0m, in \u001b[0;36mFaithfulness._ascore\u001b[0;34m(self, row, callbacks, is_async)\u001b[0m\n\u001b[1;32m 265\u001b[0m p_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_nli_prompt(row, statements)\n\u001b[0;32m--> 266\u001b[0m nli_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm\u001b[38;5;241m.\u001b[39mgenerate(\n\u001b[1;32m 267\u001b[0m p_value,\n\u001b[1;32m 268\u001b[0m callbacks\u001b[38;5;241m=\u001b[39mcallbacks,\n\u001b[1;32m 269\u001b[0m is_async\u001b[38;5;241m=\u001b[39mis_async,\n\u001b[1;32m 270\u001b[0m n\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reproducibility,\n\u001b[1;32m 271\u001b[0m )\n\u001b[1;32m 273\u001b[0m nli_result_text \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 274\u001b[0m nli_result\u001b[38;5;241m.\u001b[39mgenerations[\u001b[38;5;241m0\u001b[39m][i]\u001b[38;5;241m.\u001b[39mtext \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reproducibility)\n\u001b[1;32m 275\u001b[0m ]\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/llms/base.py:93\u001b[0m, in \u001b[0;36mBaseRagasLLM.generate\u001b[0;34m(self, prompt, n, temperature, stop, callbacks, is_async)\u001b[0m\n\u001b[1;32m 90\u001b[0m agenerate_text_with_retry \u001b[38;5;241m=\u001b[39m add_async_retry(\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39magenerate_text, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_config\n\u001b[1;32m 92\u001b[0m )\n\u001b[0;32m---> 93\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m agenerate_text_with_retry(\n\u001b[1;32m 94\u001b[0m prompt\u001b[38;5;241m=\u001b[39mprompt,\n\u001b[1;32m 95\u001b[0m n\u001b[38;5;241m=\u001b[39mn,\n\u001b[1;32m 96\u001b[0m temperature\u001b[38;5;241m=\u001b[39mtemperature,\n\u001b[1;32m 97\u001b[0m stop\u001b[38;5;241m=\u001b[39mstop,\n\u001b[1;32m 98\u001b[0m callbacks\u001b[38;5;241m=\u001b[39mcallbacks,\n\u001b[1;32m 99\u001b[0m )\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/tenacity/_asyncio.py:88\u001b[0m, in \u001b[0;36mAsyncRetrying.wraps..async_wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(fn)\n\u001b[1;32m 87\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21masync_wrapped\u001b[39m(\u001b[38;5;241m*\u001b[39margs: t\u001b[38;5;241m.\u001b[39mAny, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: t\u001b[38;5;241m.\u001b[39mAny) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m t\u001b[38;5;241m.\u001b[39mAny:\n\u001b[0;32m---> 88\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/tenacity/_asyncio.py:47\u001b[0m, in \u001b[0;36mAsyncRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m---> 47\u001b[0m do \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/tenacity/__init__.py:314\u001b[0m, in \u001b[0;36mBaseRetrying.iter\u001b[0;34m(self, retry_state)\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (is_explicit_retry \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mretry(retry_state)):\n\u001b[0;32m--> 314\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfut\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mafter \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/concurrent/futures/_base.py:451\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/concurrent/futures/_base.py:403\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 402\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 403\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[1;32m 404\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 405\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/tenacity/_asyncio.py:50\u001b[0m, in \u001b[0;36mAsyncRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 50\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m: \u001b[38;5;66;03m# noqa: B902\u001b[39;00m\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/llms/base.py:170\u001b[0m, in \u001b[0;36mLangchainLLMWrapper.agenerate_text\u001b[0;34m(self, prompt, n, temperature, stop, callbacks)\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_multiple_completion_supported(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlangchain_llm):\n\u001b[0;32m--> 170\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlangchain_llm\u001b[38;5;241m.\u001b[39magenerate_prompt(\n\u001b[1;32m 171\u001b[0m prompts\u001b[38;5;241m=\u001b[39m[prompt],\n\u001b[1;32m 172\u001b[0m n\u001b[38;5;241m=\u001b[39mn,\n\u001b[1;32m 173\u001b[0m temperature\u001b[38;5;241m=\u001b[39mtemperature,\n\u001b[1;32m 174\u001b[0m stop\u001b[38;5;241m=\u001b[39mstop,\n\u001b[1;32m 175\u001b[0m callbacks\u001b[38;5;241m=\u001b[39mcallbacks,\n\u001b[1;32m 176\u001b[0m )\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:609\u001b[0m, in \u001b[0;36mBaseChatModel.agenerate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 608\u001b[0m prompt_messages \u001b[38;5;241m=\u001b[39m [p\u001b[38;5;241m.\u001b[39mto_messages() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[0;32m--> 609\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39magenerate(\n\u001b[1;32m 610\u001b[0m prompt_messages, stop\u001b[38;5;241m=\u001b[39mstop, callbacks\u001b[38;5;241m=\u001b[39mcallbacks, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[1;32m 611\u001b[0m )\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:535\u001b[0m, in \u001b[0;36mBaseChatModel.agenerate\u001b[0;34m(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[0m\n\u001b[1;32m 525\u001b[0m run_managers \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m callback_manager\u001b[38;5;241m.\u001b[39mon_chat_model_start(\n\u001b[1;32m 526\u001b[0m dumpd(\u001b[38;5;28mself\u001b[39m),\n\u001b[1;32m 527\u001b[0m messages,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 532\u001b[0m run_id\u001b[38;5;241m=\u001b[39mrun_id,\n\u001b[1;32m 533\u001b[0m )\n\u001b[0;32m--> 535\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\n\u001b[1;32m 536\u001b[0m \u001b[38;5;241m*\u001b[39m[\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_agenerate_with_cache(\n\u001b[1;32m 538\u001b[0m m,\n\u001b[1;32m 539\u001b[0m stop\u001b[38;5;241m=\u001b[39mstop,\n\u001b[1;32m 540\u001b[0m run_manager\u001b[38;5;241m=\u001b[39mrun_managers[i] \u001b[38;5;28;01mif\u001b[39;00m run_managers \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 541\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 542\u001b[0m )\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(messages)\n\u001b[1;32m 544\u001b[0m ],\n\u001b[1;32m 545\u001b[0m return_exceptions\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 546\u001b[0m )\n\u001b[1;32m 547\u001b[0m exceptions \u001b[38;5;241m=\u001b[39m []\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:304\u001b[0m, in \u001b[0;36mTask.__wakeup\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 306\u001b[0m \u001b[38;5;66;03m# This may also be a cancellation.\u001b[39;00m\n", - "\u001b[0;31mCancelledError\u001b[0m: ", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mCancelledError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:456\u001b[0m, in \u001b[0;36mwait_for\u001b[0;34m(fut, timeout)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 456\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfut\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mCancelledError \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/futures.py:196\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 195\u001b[0m exc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_cancelled_error()\n\u001b[0;32m--> 196\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m!=\u001b[39m _FINISHED:\n", - "\u001b[0;31mCancelledError\u001b[0m: ", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mTimeoutError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m evaluate\n\u001b[0;32m----> 3\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mbigger_ds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontext_precision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mfaithfulness\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer_relevancy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontext_recall\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m result\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/evaluation.py:255\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(dataset, metrics, llm, embeddings, callbacks, in_ci, is_async, run_config, raise_exceptions, column_map)\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m evaluation_group_cm\u001b[38;5;241m.\u001b[39mended:\n\u001b[1;32m 253\u001b[0m evaluation_rm\u001b[38;5;241m.\u001b[39mon_chain_error(e)\n\u001b[0;32m--> 255\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 257\u001b[0m result \u001b[38;5;241m=\u001b[39m Result(\n\u001b[1;32m 258\u001b[0m scores\u001b[38;5;241m=\u001b[39mDataset\u001b[38;5;241m.\u001b[39mfrom_list(scores),\n\u001b[1;32m 259\u001b[0m dataset\u001b[38;5;241m=\u001b[39mdataset,\n\u001b[1;32m 260\u001b[0m binary_columns\u001b[38;5;241m=\u001b[39mbinary_metrics,\n\u001b[1;32m 261\u001b[0m )\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/evaluation.py:235\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(dataset, metrics, llm, embeddings, callbacks, in_ci, is_async, run_config, raise_exceptions, column_map)\u001b[0m\n\u001b[1;32m 232\u001b[0m scores \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 234\u001b[0m \u001b[38;5;66;03m# get the results\u001b[39;00m\n\u001b[0;32m--> 235\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mexecutor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresults\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m results \u001b[38;5;241m==\u001b[39m []:\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ExceptionInRunner()\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/executor.py:107\u001b[0m, in \u001b[0;36mExecutor.results\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 103\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(r)\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results\n\u001b[0;32m--> 107\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43masyncio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_aresults\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m sorted_results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msorted\u001b[39m(results, key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [r[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m r \u001b[38;5;129;01min\u001b[39;00m sorted_results]\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/nest_asyncio.py:31\u001b[0m, in \u001b[0;36m_patch_asyncio..run\u001b[0;34m(main, debug)\u001b[0m\n\u001b[1;32m 29\u001b[0m task \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mensure_future(main)\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 31\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mloop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m task\u001b[38;5;241m.\u001b[39mdone():\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/envs/ragas/lib/python3.10/site-packages/nest_asyncio.py:99\u001b[0m, in \u001b[0;36m_patch_loop..run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m f\u001b[38;5;241m.\u001b[39mdone():\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEvent loop stopped before Future completed.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 99\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/futures.py:201\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:232\u001b[0m, in \u001b[0;36mTask.__step\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 230\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 232\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 234\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/executor.py:102\u001b[0m, in \u001b[0;36mExecutor.results.._aresults\u001b[0;34m()\u001b[0m\n\u001b[1;32m 94\u001b[0m results \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m future \u001b[38;5;129;01min\u001b[39;00m tqdm(\n\u001b[1;32m 96\u001b[0m futures_as_they_finish,\n\u001b[1;32m 97\u001b[0m desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdesc,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 100\u001b[0m leave\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkeep_progress_bar,\n\u001b[1;32m 101\u001b[0m ):\n\u001b[0;32m--> 102\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m future\n\u001b[1;32m 103\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(r)\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:571\u001b[0m, in \u001b[0;36mas_completed.._wait_for_one\u001b[0;34m()\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m f \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 569\u001b[0m \u001b[38;5;66;03m# Dummy value from _on_timeout().\u001b[39;00m\n\u001b[1;32m 570\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mTimeoutError\n\u001b[0;32m--> 571\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/futures.py:201\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 201\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:232\u001b[0m, in \u001b[0;36mTask.__step\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 230\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 232\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 234\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/executor.py:34\u001b[0m, in \u001b[0;36mas_completed..sema_coro\u001b[0;34m(coro)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msema_coro\u001b[39m(coro):\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m semaphore:\n\u001b[0;32m---> 34\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m coro\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/executor.py:59\u001b[0m, in \u001b[0;36mExecutor.wrap_callable_with_index..wrapped_callable_async\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraise_exceptions:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 61\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(\n\u001b[1;32m 62\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRunner in Executor raised an exception\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 63\u001b[0m )\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/executor.py:53\u001b[0m, in \u001b[0;36mExecutor.wrap_callable_with_index..wrapped_callable_async\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m result \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mnan\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 53\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m MaxRetriesExceeded \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# this only for testset generation v2\u001b[39;00m\n\u001b[1;32m 56\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax retries exceeded for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;241m.\u001b[39mevolution\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/metrics/base.py:134\u001b[0m, in \u001b[0;36mMetric.ascore\u001b[0;34m(self, row, callbacks, is_async, thread_timeout)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m group_cm\u001b[38;5;241m.\u001b[39mended:\n\u001b[1;32m 133\u001b[0m rm\u001b[38;5;241m.\u001b[39mon_chain_error(e)\n\u001b[0;32m--> 134\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 135\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m group_cm\u001b[38;5;241m.\u001b[39mended:\n", - "File \u001b[0;32m~/jjmachan/explodinggradients/ragas/src/ragas/metrics/base.py:127\u001b[0m, in \u001b[0;36mMetric.ascore\u001b[0;34m(self, row, callbacks, is_async, thread_timeout)\u001b[0m\n\u001b[1;32m 123\u001b[0m rm, group_cm \u001b[38;5;241m=\u001b[39m new_group(\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname, inputs\u001b[38;5;241m=\u001b[39mrow, callbacks\u001b[38;5;241m=\u001b[39mcallbacks, is_async\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 125\u001b[0m )\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 127\u001b[0m score \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mwait_for(\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ascore(row\u001b[38;5;241m=\u001b[39mrow, callbacks\u001b[38;5;241m=\u001b[39mgroup_cm, is_async\u001b[38;5;241m=\u001b[39mis_async),\n\u001b[1;32m 129\u001b[0m timeout\u001b[38;5;241m=\u001b[39mthread_timeout,\n\u001b[1;32m 130\u001b[0m )\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m group_cm\u001b[38;5;241m.\u001b[39mended:\n", - "File \u001b[0;32m~/.pyenv/versions/3.10.12/lib/python3.10/asyncio/tasks.py:458\u001b[0m, in \u001b[0;36mwait_for\u001b[0;34m(fut, timeout)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fut\u001b[38;5;241m.\u001b[39mresult()\n\u001b[1;32m 457\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mCancelledError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 458\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mTimeoutError() \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 460\u001b[0m timeout_handle\u001b[38;5;241m.\u001b[39mcancel()\n", - "\u001b[0;31mTimeoutError\u001b[0m: " - ] + "data": { + "text/plain": [ + "{'context_precision': 1.0000, 'faithfulness': 0.3485, 'answer_relevancy': 0.9810, 'context_recall': 0.9600}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" }, { "name": "stderr", "output_type": "stream", "text": [ - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", - "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n", "Failed to batch ingest runs: LangSmithRateLimitError('Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError(\\'429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch\\', \\'{\"detail\":\"Monthly unique traces usage limit exceeded\"}\\')')\n" ] @@ -210,8 +101,15 @@ " ],\n", ")\n", "\n", - "result\n" + "result" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/tests/e2e/test_testset_gen_in_jupyter.ipynb b/tests/e2e/test_testset_gen_in_jupyter.ipynb index 23304bc05..9de7133db 100644 --- a/tests/e2e/test_testset_gen_in_jupyter.ipynb +++ b/tests/e2e/test_testset_gen_in_jupyter.ipynb @@ -2,16 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "850.63s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -26,24 +19,25 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from langchain_community.document_loaders import DirectoryLoader\n", + "\n", "loader = DirectoryLoader(\"../../experiments/data\")\n", "documents = loader.load()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4711243b79964afeacadbf7dc0f9bf57", + "model_id": "cd18f5881c294d5498eae5f6c19e29e6", "version_major": 2, "version_minor": 0 }, @@ -64,7 +58,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0f81229e5a0044ca86a6e1e4ec3f1f44", + "model_id": "b1b8fe7d9746488cbdc69fe67e62926d", "version_major": 2, "version_minor": 0 }, @@ -87,19 +81,19 @@ "\n", "embeddings = OpenAIEmbeddings()\n", "\n", - "generator = TestsetGenerator.from_langchain(\n", - " generator_llm,\n", - " critic_llm,\n", - " embeddings\n", - ")\n", + "generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, embeddings)\n", "\n", "# generate testset\n", - "testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})\n" + "testset = generator.generate_with_langchain_docs(\n", + " documents,\n", + " test_size=10,\n", + " distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25},\n", + ")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -108,7 +102,7 @@ "10" ] }, - "execution_count": 4, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/unit/test_executor.py b/tests/unit/test_executor.py index 3d6e48251..2e1bdfc6c 100644 --- a/tests/unit/test_executor.py +++ b/tests/unit/test_executor.py @@ -1,4 +1,5 @@ import asyncio + import pytest @@ -44,6 +45,7 @@ async def echo_order(index: int): @pytest.mark.asyncio async def test_executor_with_running_loop(): import asyncio + from ragas.executor import Executor loop = asyncio.new_event_loop() diff --git a/tests/unit/test_executor_in_jupyter.ipynb b/tests/unit/test_executor_in_jupyter.ipynb index 58ccc56af..f1dba1e42 100644 --- a/tests/unit/test_executor_in_jupyter.ipynb +++ b/tests/unit/test_executor_in_jupyter.ipynb @@ -37,7 +37,7 @@ "exec = Executor(raise_exceptions=True)\n", "for i in range(10):\n", " exec.submit(sleep, i)\n", - " \n", + "\n", "assert exec.results(), \"didn't get anything from results\"" ] }, @@ -61,10 +61,12 @@ "from ragas.executor import as_completed\n", "import asyncio\n", "\n", + "\n", "async def echo_order(index: int):\n", " await asyncio.sleep(index)\n", " return index\n", "\n", + "\n", "async def _run():\n", " results = []\n", " for t in as_completed([echo_order(1), echo_order(2), echo_order(3)], 3):\n", @@ -72,10 +74,11 @@ " results.append(r)\n", " return results\n", "\n", + "\n", "results = await _run()\n", "\n", "expected = [1, 2, 3]\n", - "assert results == expected, f\"got: {results}, expected: {expected}\"\n" + "assert results == expected, f\"got: {results}, expected: {expected}\"" ] }, { @@ -116,7 +119,7 @@ "exec = Executor(raise_exceptions=True)\n", "for i in range(1000):\n", " exec.submit(sleep, 1)\n", - " \n", + "\n", "assert exec.results(), \"didn't get anything from results\"\n", "\n", "for i in range(1000):\n",