From 4ea6bffcda3a90d0ef2c46372e69115c3d3877e4 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 5 Dec 2024 15:17:28 -0800 Subject: [PATCH] push --- torchtitan/datasets/hf_datasets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchtitan/datasets/hf_datasets.py b/torchtitan/datasets/hf_datasets.py index 201f0b48..745cf40f 100644 --- a/torchtitan/datasets/hf_datasets.py +++ b/torchtitan/datasets/hf_datasets.py @@ -9,9 +9,6 @@ from typing import Any, Callable, Dict, List, Optional import torch - -from datasets import Dataset, load_dataset -from datasets.distributed import split_dataset_by_node from torch.distributed.checkpoint.stateful import Stateful from torch.utils.data import IterableDataset from torchdata.stateful_dataloader import StatefulDataLoader @@ -19,6 +16,9 @@ from torchtitan.datasets.tokenizer import Tokenizer from torchtitan.logging import logger +from datasets import Dataset, load_dataset +from datasets.distributed import split_dataset_by_node + def _load_c4_dataset(dataset_path: str): """Load C4 dataset with default configuration."""