From 3b5f59a794f4de2e0bb8689d0d55d6aa02710c48 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Thu, 2 May 2024 19:05:09 -0400 Subject: [PATCH] Fix GSM template There's no need of a period between the question and the line break since the question will contain its own punctuation (normally interrogation mark). The period also doesn't match the lm-evaluation-harness template and can lead to lower accuracy. --- src/sparseml/transformers/finetune/data/gsm8k.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparseml/transformers/finetune/data/gsm8k.py b/src/sparseml/transformers/finetune/data/gsm8k.py index f93b7871b4f..c98e7f0ae03 100644 --- a/src/sparseml/transformers/finetune/data/gsm8k.py +++ b/src/sparseml/transformers/finetune/data/gsm8k.py @@ -28,7 +28,7 @@ class GSM8KDataset(TextGenerationDataset): :param tokenizer: tokenizer to use on dataset """ - GSM_TEMPLATE = "Question: {question}.\nAnswer:" + GSM_TEMPLATE = "Question: {question}\nAnswer:" def __init__(self, data_args, split, tokenizer): data_args = deepcopy(data_args)