-
Notifications
You must be signed in to change notification settings - Fork 1
/
kindleanki.py
235 lines (158 loc) · 5.69 KB
/
kindleanki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/usr/bin/env python
# Kindle to Anki
# Puts Kindle vocabulary words into Anki with their definitions.
# TODO:
# - Allow user to set number of definitions obtained
# - Ensure user enters a valid path
# - Allow user to import cards to multiple decks (no duplicates is global)
# - Clean up main()
import csv, sys, os.path, getopt, re, json, urllib2
from bs4 import BeautifulSoup
from anki.importing import TextImporter
from anki import Collection
def set_path(path_type):
path_extensions = {'clippings':'txt', 'collection':'anki2'}
ext = path_extensions[path_type]
path = ''
while path.split('.')[-1] != ext:
path = raw_input("Enter the path to your "
"{0} file: ".format(path_type))
return path
def get_path(path_type):
data = 'data/{0}_path.txt'.format(path_type)
try:
with open(data):
file = open(data, 'r')
path = unicode(os.path.expanduser(file.readline()))
file.close()
# Also need to ensure user enters a valid path (to a .txt)
pass
except:
path = unicode(os.path.expanduser(set_path(path_type)))
file = open(data, 'w')
file.write(path)
file.close()
return path
def set_deck_name():
# if len(sys.argv) < 2:
deck_name = raw_input("Name of deck to which your cards will "
"be imported: ")
# else:
# deck_name = sys.argv[1]
return deck_name
def save_dictionary(dictionary):
file = open('data/dictionary.txt', 'w')
for key in dictionary.keys():
line = u"{0}\t{1}\n".format(key, dictionary[key])
file.write(line)
file.close()
print("Saved words and definitions to dictionary file.")
def get_dictionary(clippings_path=''):
file = open(clippings_path, 'r')
word_definition_pairs = dict()
print("Found Kindle clippings file.")
for line in file:
matched = re.match(r'(^|\r\n)([\w-]+)[.]?($|\r\n)',
line, re.MULTILINE)
if matched:
word = matched.group(2).lower()
print("\n\n")
print(word)
definition = get_definition(word)
if definition:
word_definition_pairs[word] = definition
file.close()
print("Got word definitions from web.")
return word_definition_pairs
def get_definition(word):
url = 'http://dictionary.reference.com/browse/{0}?s=t'.format(word)
# Does it matter whether we use request?
response = urllib2.urlopen(url)
html = response.read()
soup = BeautifulSoup(html)
soup_group = soup.find_all('div', attrs={'class':'pbk'})
dict_entry = ''
if soup_group:
for entry in soup_group:
part_of_speech = entry.find_all('span', attrs={'class':'pg'})
def_num = 1
if part_of_speech:
dict_entry += u'</br><b>{0}'.format(part_of_speech[0].string)
dict_entry += '</b></br>'
for definition in entry.find_all('div',
attrs={'class':'dndata'},
text = True):
dict_entry += u'<li>{0}</li>'.format(definition.string)
if dict_entry:
print(dict_entry)
return dict_entry
else:
return False
def add_dictionary_to_anki(collection_path, deck_name = 'Import'):
# See:
# http://ankisrs.net/docs/addons.html#the-collection
dictionary = unicode(os.path.abspath('data/dictionary.txt'))
col = Collection(collection_path)
# Change to the basic note type
m = col.models.byName('Basic')
col.models.setCurrent(m)
# Set 'Import' as the target deck
m['did'] = col.decks.id(deck_name)
col.models.save(m)
# Import into the collection
ti = TextImporter(col, dictionary)
ti.allowHTML = True
ti.initMapping()
ti.run()
col.close()
print("Imported dictionary into collection.")
return 0
def usage():
print(
"""
Usage: python kindleanki.py [OPTION...]
-d, --deck=DECK_NAME\tspecify name of deck to which cards will be imported
-k, --kindle=PATH\tspecify location of Kindle clippings file (.txt)
-a, --anki=PATH\t\tspecify location of Anki collection file (.anki2)
""")
def main(argv):
try:
opts, args = getopt.getopt(argv,
"d:k:a:",
["deck=", "kindle=", "anki="])
except getopt.GetoptError:
usage()
sys.exit(2)
candidate_deck_name = ''
candidate_clippings_path = ''
candidate_collection_path = ''
for opt, arg in opts:
if opt in ("-d", "--deck"):
candidate_deck_name = arg
elif opt in ("-k", "--kindle"):
candidate_clippings_path = arg
elif opt in ("-a", "--anki"):
candidate_collection_path = arg
if len(candidate_clippings_path) > 0:
data = 'data/clippings_path.txt'
path = unicode(os.path.expanduser(candidate_clippings_path))
file = open(data, 'w')
file.write(path)
file.close()
if len(candidate_collection_path) > 0:
data = 'data/collection_path.txt'
path = unicode(os.path.expanduser(candidate_collection_path))
file = open(data, 'w')
file.write(path)
file.close()
clippings_path = get_path('clippings')
collection_path = get_path('collection')
if len(candidate_deck_name) > 0:
deck_name = candidate_deck_name
else:
deck_name = set_deck_name()
dictionary = get_dictionary(clippings_path)
save_dictionary(dictionary)
add_dictionary_to_anki(collection_path, deck_name)
if __name__ == "__main__":
main(sys.argv[1:])