-
Notifications
You must be signed in to change notification settings - Fork 0
/
random_sentence_txt.py
58 lines (45 loc) · 1.72 KB
/
random_sentence_txt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import random
import re
def is_valid_sentence(sentence):
# filter away sentences that starts with abnormal characters
if sentence.startswith(','):
return False
if sentence.startswith('-'):
return False
if sentence.startswith(':'):
return False
if re.search(r'\.\s+', sentence):
return False
return True
def get_random_sentence_from_file(file_path):
# get a random sentence from a file
with open(file_path, 'r', encoding='utf-8') as file:
sentences = file.readlines()
valid_sentences = []
for sentence in sentences:
full_sentence = sentence.strip()
word_count = len(full_sentence.split())
if 5 <= word_count < 7 and is_valid_sentence(full_sentence):
valid_sentences.append(full_sentence)
if not valid_sentences:
return None
return random.choice(valid_sentences)
def get_random_sentence_from_directory(directory_path):
# using the get random sentence function from a file to get a random sentence from a directory including subfolders and files
txt_files = []
for root_dir, dirs, files in os.walk(directory_path):
for file in files:
if file.endswith('.txt'):
txt_files.append(os.path.join(root_dir, file))
if not txt_files:
return None
random_file = random.choice(txt_files)
return get_random_sentence_from_file(random_file)
if __name__ == "__main__":
parent_directory_path = './generated_data'
random_sentence = get_random_sentence_from_directory(parent_directory_path)
if random_sentence:
print("Random sentence:", random_sentence)
else:
print("No valid sentences found in the selected files.")