Skip to content

Commit

Permalink
fix: import error for TestsetGeneration and small fixes (#1516)
Browse files Browse the repository at this point in the history
  • Loading branch information
jjmachan authored Oct 16, 2024
1 parent 9408d10 commit 8e41fe3
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 86 deletions.
114 changes: 66 additions & 48 deletions src/ragas/dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
import typing as t
from abc import ABC, abstractmethod
from dataclasses import dataclass, field

from datasets import Dataset as HFDataset
Expand All @@ -12,6 +13,8 @@
from ragas.utils import safe_nanmean

if t.TYPE_CHECKING:
from pathlib import Path

from datasets import Dataset as HFDataset
from pandas import DataFrame as PandasDataframe

Expand Down Expand Up @@ -136,9 +139,20 @@ def pretty_repr(self):
Sample = t.TypeVar("Sample", bound=BaseSample)


class RagasDataset(BaseModel, t.Generic[Sample]):
class RagasDataset(ABC, BaseModel, t.Generic[Sample]):
samples: t.List[Sample]

@abstractmethod
def to_list(self) -> t.List[t.Dict]:
"""Converts the samples to a list of dictionaries."""
pass

@classmethod
@abstractmethod
def from_list(cls, data: t.List[t.Dict]) -> RagasDataset[Sample]:
"""Creates an EvaluationDataset from a list of dictionaries."""
pass

@field_validator("samples")
def validate_samples(cls, samples: t.List[BaseSample]) -> t.List[BaseSample]:
"""Validates that all samples are of the same type."""
Expand All @@ -155,20 +169,6 @@ def get_sample_type(self) -> t.Type[Sample]:
"""Returns the type of the samples in the dataset."""
return type(self.samples[0])

def _to_list(self) -> t.List[t.Dict]:
"""Converts the samples to a list of dictionaries."""
rows = [sample.to_dict() for sample in self.samples]

if self.get_sample_type() == MultiTurnSample:
for sample in rows:
for item in sample["user_input"]:
if not isinstance(item["content"], str):
item["content"] = json.dumps(
item["content"], ensure_ascii=False
)

return rows

def to_hf_dataset(self) -> HFDataset:
"""Converts the dataset to a Hugging Face Dataset."""
try:
Expand All @@ -178,7 +178,7 @@ def to_hf_dataset(self) -> HFDataset:
"datasets is not installed. Please install it to use this function."
)

return HFDataset.from_list(self._to_list())
return HFDataset.from_list(self.to_list())

@classmethod
def from_hf_dataset(cls, dataset: HFDataset):
Expand All @@ -194,26 +194,13 @@ def to_pandas(self) -> PandasDataframe:
"pandas is not installed. Please install it to use this function."
)

data = self._to_list()
data = self.to_list()
return pd.DataFrame(data)

def features(self):
"""Returns the features of the samples."""
return self.samples[0].get_features()

@classmethod
def from_list(cls, mapping: t.List[t.Dict]):
"""Creates an EvaluationDataset from a list of dictionaries."""
samples = []
if all(
"user_input" in item and isinstance(mapping[0]["user_input"], list)
for item in mapping
):
samples.extend(MultiTurnSample(**sample) for sample in mapping)
else:
samples.extend(SingleTurnSample(**sample) for sample in mapping)
return cls(samples=samples)

@classmethod
def from_dict(cls, mapping: t.Dict):
"""Creates an EvaluationDataset from a dictionary."""
Expand All @@ -227,40 +214,30 @@ def from_dict(cls, mapping: t.Dict):
samples.extend(SingleTurnSample(**sample) for sample in mapping)
return cls(samples=samples)

@classmethod
def from_csv(cls, path: str):
"""Creates an EvaluationDataset from a CSV file."""
import csv

with open(path, "r", newline="") as csvfile:
reader = csv.DictReader(csvfile)
data = [row for row in reader]
return cls.from_list(data)

def to_csv(self, path: str):
def to_csv(self, path: t.Union[str, Path]):
"""Converts the dataset to a CSV file."""
import csv

data = self._to_list()
data = self.to_list()
if not data:
return

fieldnames = self.features()
fieldnames = data[0].keys()

with open(path, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in data:
writer.writerow(row)

def to_jsonl(self, path: str):
def to_jsonl(self, path: t.Union[str, Path]):
"""Converts the dataset to a JSONL file."""
with open(path, "w") as jsonlfile:
for sample in self.samples:
jsonlfile.write(json.dumps(sample.to_dict(), ensure_ascii=False) + "\n")

@classmethod
def from_jsonl(cls, path: str):
def from_jsonl(cls, path: t.Union[str, Path]):
"""Creates an EvaluationDataset from a JSONL file."""
with open(path, "r") as jsonlfile:
data = [json.loads(line) for line in jsonlfile]
Expand Down Expand Up @@ -307,8 +284,6 @@ class EvaluationDataset(RagasDataset[SingleTurnSampleOrMultiTurnSample]):
Creates an EvaluationDataset from a list of dictionaries.
from_dict(mapping)
Creates an EvaluationDataset from a dictionary.
from_csv(path)
Creates an EvaluationDataset from a CSV file.
to_csv(path)
Converts the dataset to a CSV file.
to_jsonl(path)
Expand All @@ -333,6 +308,37 @@ def __getitem__(
else:
raise TypeError("Index must be int or slice")

def to_list(self) -> t.List[t.Dict]:
rows = [sample.to_dict() for sample in self.samples]

if self.get_sample_type() == MultiTurnSample:
for sample in rows:
for item in sample["user_input"]:
if not isinstance(item["content"], str):
item["content"] = json.dumps(
item["content"], ensure_ascii=False
)

return rows

@classmethod
def from_list(cls, data: t.List[t.Dict]) -> EvaluationDataset:
samples = []
if all(
"user_input" in item and isinstance(data[0]["user_input"], list)
for item in data
):
samples.extend(MultiTurnSample(**sample) for sample in data)
else:
samples.extend(SingleTurnSample(**sample) for sample in data)
return cls(samples=samples)


class EvaluationResultRow(BaseModel):
dataset_row: t.Dict
scores: t.Dict[str, t.Any]
trace: t.Dict[str, t.Any] = field(default_factory=dict) # none for now


@dataclass
class EvaluationResult:
Expand All @@ -352,7 +358,7 @@ class EvaluationResult:
"""

scores: t.List[t.Dict[str, t.Any]]
dataset: t.Optional[EvaluationDataset] = None
dataset: EvaluationDataset
binary_columns: t.List[str] = field(default_factory=list)
cost_cb: t.Optional[CostCallbackHandler] = None

Expand Down Expand Up @@ -407,6 +413,18 @@ def to_pandas(self, batch_size: int | None = None, batched: bool = False):
dataset_df = self.dataset.to_pandas()
return pd.concat([dataset_df, scores_df], axis=1)

def serialized(self) -> t.List[EvaluationResultRow]:
"""
Convert the result to a list of EvaluationResultRow.
"""
return [
EvaluationResultRow(
dataset_row=self.dataset[i].to_dict(),
scores=self.scores[i],
)
for i in range(len(self.scores))
]

def total_tokens(self) -> t.Union[t.List[TokenUsage], TokenUsage]:
"""
Compute the total tokens used in the evaluation.
Expand Down
55 changes: 39 additions & 16 deletions src/ragas/testset/synthesizers/testset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

import typing as t

from ragas.dataset_schema import BaseSample, RagasDataset
from ragas.dataset_schema import (
BaseSample,
EvaluationDataset,
MultiTurnSample,
RagasDataset,
SingleTurnSample,
)

if t.TYPE_CHECKING:
from ragas.dataset_schema import (
EvaluationDataset,
MultiTurnSample,
SingleTurnSample,
)
from ragas.dataset_schema import MultiTurnSample, SingleTurnSample


class TestsetSample(BaseSample):
Expand Down Expand Up @@ -48,13 +50,34 @@ def to_evaluation_dataset(self) -> EvaluationDataset:
samples=[sample.eval_sample for sample in self.samples]
)

def _to_list(self) -> t.List[t.Dict]:
eval_list = self.to_evaluation_dataset()._to_list()
testset_list_without_eval_sample = [
sample.model_dump(exclude={"eval_sample"}) for sample in self.samples
]
testset_list = [
{**eval_sample, **sample}
for eval_sample, sample in zip(eval_list, testset_list_without_eval_sample)
]
return testset_list
def to_list(self) -> t.List[t.Dict]:
"""
Converts the Testset to a list of dictionaries.
"""
return [sample.model_dump() for sample in self.samples]

@classmethod
def from_list(cls, data: t.List[t.Dict]) -> Testset:
"""
Converts a list of dictionaries to a Testset.
"""
# first create the samples
samples = []
for sample in data:
eval_sample = sample["eval_sample"]

# if user_input is a list it is MultiTurnSample
if "user_input" in eval_sample and not isinstance(
eval_sample.get("user_input"), list
):
eval_sample = SingleTurnSample(**sample["eval_sample"])
else:
eval_sample = MultiTurnSample(**sample["eval_sample"])

samples.append(
TestsetSample(
eval_sample=eval_sample, synthesizer_name=sample["synthesizer_name"]
)
)
# then create the testset
return Testset(samples=samples)
64 changes: 42 additions & 22 deletions tests/unit/test_dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,66 @@
import pytest
from pydantic import ValidationError

from ragas.dataset_schema import EvaluationDataset, MultiTurnSample, SingleTurnSample


def test_evaluation_dataset():
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")

dataset = EvaluationDataset(samples=[single_turn_sample, single_turn_sample])
from ragas.dataset_schema import (
EvaluationDataset,
HumanMessage,
MultiTurnSample,
SingleTurnSample,
)

samples = [
SingleTurnSample(user_input="What is X", response="Y"),
MultiTurnSample(
user_input=[HumanMessage(content="What is X")],
reference="Y",
),
]


@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset(eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

hf_dataset = dataset.to_hf_dataset()

assert dataset.get_sample_type() == SingleTurnSample
assert dataset.get_sample_type() is type(eval_sample)
assert len(hf_dataset) == 2
assert dataset.features() == ["user_input", "response"]
assert len(dataset) == 2
assert dataset[0] == single_turn_sample
assert dataset[0] == eval_sample


def test_evaluation_dataset_save_load(tmpdir):
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")
@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset_save_load_csv(tmpdir, eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

dataset = EvaluationDataset(samples=[single_turn_sample, single_turn_sample])
# save and load to csv
csv_path = tmpdir / "csvfile.csv"
dataset.to_csv(csv_path)

hf_dataset = dataset.to_hf_dataset()

# save and load to csv
dataset.to_csv(tmpdir / "csvfile.csv")
loaded_dataset = EvaluationDataset.from_csv(tmpdir / "csvfile.csv")
assert loaded_dataset == dataset
@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset_save_load_jsonl(tmpdir, eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

# save and load to jsonl
dataset.to_jsonl(tmpdir / "jsonlfile.jsonl")
loaded_dataset = EvaluationDataset.from_jsonl(tmpdir / "jsonlfile.jsonl")
jsonl_path = tmpdir / "jsonlfile.jsonl"
dataset.to_jsonl(jsonl_path)
loaded_dataset = EvaluationDataset.from_jsonl(jsonl_path)
assert loaded_dataset == dataset

# load from hf dataset

@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset_load_from_hf(eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

# convert to and load from hf dataset
hf_dataset = dataset.to_hf_dataset()
loaded_dataset = EvaluationDataset.from_hf_dataset(hf_dataset)
assert loaded_dataset == dataset


def test_single_type_evaluation_dataset():
@pytest.mark.parametrize("eval_sample", samples)
def test_single_type_evaluation_dataset(eval_sample):
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")
multi_turn_sample = MultiTurnSample(
user_input=[{"content": "What is X"}],
Expand Down
Loading

0 comments on commit 8e41fe3

Please sign in to comment.