-
Notifications
You must be signed in to change notification settings - Fork 37
/
stringproblems.py
215 lines (178 loc) · 7 KB
/
stringproblems.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# The string module has handy data and methods for text processing.
from string import ascii_letters as letters
from string import ascii_uppercase as au
from string import ascii_lowercase as al
from random import Random
# Convert words in the text to title case. (Not same as uppercase.)
def title_words(text):
prev, result = " ", ""
for c in text:
if prev.isspace() and not c.isspace():
result += c.title()
else:
result += c
prev = c
return result
# Eliminate the consecutive duplicate characters from a string.
def eliminate_duplicates(text):
prev, result = None, ""
for c in text:
if c != prev:
result += c
prev = c
return result
# Given a text string, create and return another string that contains
# each character only once, in order that they occur in the text.
def unique_chars(text):
result, seen = '', set()
for c in text:
if c not in seen:
result += c
seen.add(c)
return result
# The classic way to test whether two strings are anagrams. They
# are if and only if sorting both gives the same end result.
def are_anagrams(word1, word2):
# A quick screening test to avoid the more expensive sort operation.
# If pass, perform the expensive operation to find out the truth.
return len(word1) == len(word2) and sorted(word1) == sorted(word2)
# Obfuscate the given text using the ROT-13 encoding. Easy to do with
# building a conversion dictionary with dictionary comprehension.
# https://en.wikipedia.org/wiki/ROT13
def rot13(text):
# Conversion dictionary for characters.
rot = {a: b for (a, b) in zip(au + al, au[13:] + au[:13] + al[13:] + al[:13])}
# Convert to ROT-13 by converting characters separately.
return "".join([rot.get(c, c) for c in text])
# Translate the entire sentence given a function word_func that converts
# one word. Since whitespace and punctuation characters must be kept
# as they were in the original sentence, we can't just use "split" to
# separate the sentence into words, since this would lose the track
# of what the whitespace and punctuation were in the original text.
# Instead, we have to break the sentence into words the hard way.
def translate_words(sentence, word_func):
result, word = '', ''
for c in sentence:
is_letter = c in letters
if is_letter: # add the letters into the current word
word += c
elif len(word) > 0 and not is_letter: # non-letter ends word
result += word_func(word) + c # add the translated word
word = '' # and start the next word from empty
else:
result += c # non-letters added to result as is
if len(word) > 0: # the possibly remaining word at end of sentence
result += word_func(word)
return result
# Convert the given sentence to pig latin. Note how the function to
# convert one word is defined inside this function, to be passed to
# the previous translate_words function as its second argument f.
def pig_latin(sentence):
def trans(word):
cap = word[0].isupper()
pos = 0
# Skip all the consonants at the start of the word.
while pos < len(word) and word[pos] not in "aeiouAEIOUY":
pos += 1
if pos == 0: # The word starts with vowel.
return word + "way"
else:
head = word[pos].upper() + word[pos + 1:] if cap else word[pos:]
return head + word[:pos].lower() + "ay"
return translate_words(sentence, trans)
# Convert the given sentence to ubbi dubbi. Same logic as previous.
def ubbi_dubbi(sentence):
def convert(c):
if c in 'aeiouyAEIOUY':
if c.isupper():
return "Ub" + c.lower()
else:
return "ub" + c
else:
return c
def trans(word):
return "".join([convert(c) for c in word])
return translate_words(sentence, trans)
# The trickiest conversion gives us a choice of how to convert each
# letter. Let us maintain a dictionary that maps each letter to the
# list of the possibilities.
def tutnese(sentence):
reps = {"b": ["bub"],
"c": ["cash", "coch"],
"d": ["dud"],
"f": ["fuf", "fud"],
"g": ["gug"],
"h": ["hash", "hutch"],
"j": ["jay", "jug"],
"k": ["kuck"],
"l": ["lul"],
"m": ["mum"],
"n": ["nun"],
"p": ["pup", "pub"],
"q": ["quack", "queue"],
"r": ["rug", "rur"],
"s": ["sus"],
"t": ["tut"],
"v": ["vuv"],
"w": ["wack", "wash"],
"x": ["ex", "xux"],
"y": ["yub", "yuck"],
"z": ["zub", "zug"]}
def trans(word, rng=None):
if not rng:
rng = Random(12345)
result, skip = '', False
for (pos, c) in enumerate(word):
if skip:
skip = False
continue
c = c.lower()
if pos < len(word) - 1 and c == word[pos + 1].lower():
if c in "aeiouy":
dup = "squat"
else:
dup = "squa"
if word[pos].isupper():
dup = dup[0].upper() + dup[1:]
result += dup + c
skip = True # skip the duplicate after this one
else:
if c in reps:
rep = rng.choice(reps[c])
if word[pos].isupper():
rep = rep[0].upper() + rep[1:]
result += rep
else:
result += word[pos]
return result
return translate_words(sentence, trans)
def __demo():
text = "hello there, how are you doing?"
print(f"Original string is: {text}")
print(f"Unique chars of {text} are: {unique_chars(text)}.")
print(f"Removing duplicates gives: {eliminate_duplicates(text)}")
print(f"Converted to titlecase gives: {title_words(text)}\n")
text = "Hello world! How are you?"
print(f"Original string is : {text!r}")
text = rot13(text)
print(f"After ROT-13, it is: {text!r}")
text = rot13(text)
print(f"Another ROT-13, it is: {text!r}")
print("\nNext, some conversions to secret languages.")
sentences = [
'What does this become? We are eager to see!',
'Another one, just for fun.',
'Do you know the famous Variety headline "Stix nix hix pix"?'
]
for sentence in sentences:
print(f"\nOriginal: {sentence}")
print(f"Pig latin: {pig_latin(sentence)}")
print(f"Ubbi dubbi: {ubbi_dubbi(sentence)}")
print(f"Tutnese: {tutnese(sentence)}")
print("\nFinally, let's check out the anagram tester.")
tater_rater = are_anagrams('tater', 'rater')
print(f"Are 'tater' and 'rater' anagrams? {tater_rater}")
search_chaser = are_anagrams('search', 'chaser')
print(f"Are 'search' and 'chaser' anagrams? {search_chaser}")
if __name__ == "__main__":
__demo()