-
Notifications
You must be signed in to change notification settings - Fork 9
/
encoding.py
169 lines (146 loc) · 4.48 KB
/
encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import os.path
import random
import struct
import unicodedata
"""
This is generic code that should work untouched accross all applications.
This code implements generic encoding functions.
Code should work on both Python 2.7 as well as 3.4.
Requires PyQt5.
(Old version supported PyQt4.)
"""
def unpack(fmt, s):
# u = lambda fmt, s: struct.unpack(fmt, s)[0]
return(struct.unpack(fmt, s)[0])
def pack(fmt, s):
# p = lambda fmt, s: struct.pack(fmt, s)[0]
return(struct.pack(fmt, s))
def normalize_nfc(txt):
"""
Utility function to bridge Py2 and Py3 incompatibilities.
Convert to NFC unicode.
Takes string-equivalent or bytes-equivalent and
returns str-equivalent in NFC unicode format.
Py2: str (aslias bytes), unicode
Py3: bytes, str (in unicode format)
Py2-vs-Py3:
"""
if sys.version_info[0] < 3: # Py2-vs-Py3:
if isinstance(txt, unicode):
return unicodedata.normalize('NFC', txt)
if isinstance(txt, str):
return unicodedata.normalize('NFC', txt.decode('utf-8'))
else:
if isinstance(txt, bytes):
return unicodedata.normalize('NFC', txt.decode('utf-8'))
if isinstance(txt, str):
return unicodedata.normalize('NFC', txt)
def tobytes(txt):
"""
Utility function to bridge Py2 and Py3 incompatibilities.
Convert to bytes.
Takes string-equivalent or bytes-equivalent and returns bytesequivalent.
Py2: str (aslias bytes), unicode
Py3: bytes, str (in unicode format)
Py2-vs-Py3:
"""
if sys.version_info[0] < 3: # Py2-vs-Py3:
if isinstance(txt, unicode):
return txt.encode('utf-8')
if isinstance(txt, str): # == bytes
return txt
else:
if isinstance(txt, bytes):
return txt
if isinstance(txt, str):
return txt.encode('utf-8')
class Padding(object):
"""
PKCS#7 Padding for block cipher having 16-byte blocks
"""
def __init__(self, blocksize):
self.blocksize = blocksize
def pad(self, s):
"""
In Python 2 input s is a string, a char list.
Python 2 returns a string.
In Python 3 input s is bytes.
Python 3 returns bytes.
"""
BS = self.blocksize
if sys.version_info[0] > 2: # Py2-vs-Py3:
return s + (BS - len(s) % BS) * bytes([BS - len(s) % BS])
else:
return s + (BS - len(s) % BS) * chr(BS - len(s) % BS)
def unpad(self, s):
if sys.version_info[0] > 2: # Py2-vs-Py3:
return s[0:-s[-1]]
else:
return s[0:-ord(s[-1])]
class PaddingHomegrown(object):
"""
Pad filenames that are already base64 encoded. Must have length of multiple of 4.
Base64 always have a length of mod 4, padded with =
Examples: YQ==, YWI=, YWJj, YWJjZA==, YWJjZGU=, YWJjZGVm, ...
On homegrown padding we remove the = pad, then we pad to a mod 16 length.
If length is already mod 16, it will be padded with 16 chars. So in all cases we pad.
The last letter always represents how many chars have been padded (A=1, ..., P=16).
The last letter is in the alphabet A..P.
The padded letters before the last letter are pseudo-random in the a..zA..Z alphabet.
"""
def __init__(self):
self.homegrownblocksize = 16
self.base64blocksize = 4
def pad(self, s):
"""
Input must be a string in valid base64 format.
Returns a string.
"""
# the randomness can be poor, it does not matter,
# it is just used for buffer letters in the file name
urandom_entropy = os.urandom(64)
random.seed(urandom_entropy)
# remove the base64 buffer char =
t = s.replace(u'=', u'')
initlen = len(t)
BS = self.homegrownblocksize
bufLen = BS - len(t) % BS
r = initlen*ord(t[-1])*ord(t[:1])
for x in range(0, bufLen-1):
# Old version:
# this was not convenient,
# on various encryptions of the same file, multiple encrypted files
# with different names would be created, requiring cleanup by
# the user as the mapping from plaintext filename to obfuscated
# filename was not deterministic
# r = random.randint(0, 51) # old version
# New version
# deterministic mapping of plaintext filename to obfuscated file name
r = (((r+17)*15485863) % 52)
if r < 26:
c = chr(r+ord('a'))
else:
c = chr(r+ord('A')-26)
t += c
t += chr(BS - initlen % BS + ord('A') - 1)
return t
def unpad(self, s):
"""
Input must be a string in valid base64 format.
Returns a string.
"""
t = s[0:-(ord(s[-1])-ord('A')+1)]
BS = self.base64blocksize
return t + "=" * ((BS - len(t) % BS) % BS)
def escape(str):
"""
Escape the letter \ as \\ in a string.
"""
if str is None:
return u''
return str.replace('\\', '\\\\')