forked from MagicStack/py-pgproto
-
Notifications
You must be signed in to change notification settings - Fork 0
/
uuid.pyx
353 lines (289 loc) · 9.71 KB
/
uuid.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
import functools
import uuid
cimport cython
cimport cpython
from libc.stdint cimport uint8_t, int8_t
from libc.string cimport memcpy, memcmp
cdef extern from "Python.h":
int PyUnicode_1BYTE_KIND
const char* PyUnicode_AsUTF8AndSize(
object unicode, Py_ssize_t *size) except NULL
object PyUnicode_FromKindAndData(
int kind, const void *buffer, Py_ssize_t size)
cdef extern from "./tohex.h":
cdef void uuid_to_str(const char *source, char *dest)
cdef void uuid_to_hex(const char *source, char *dest)
# A more efficient UUID type implementation
# (6-7x faster than the starndard uuid.UUID):
#
# -= Benchmark results (less is better): =-
#
# std_UUID(bytes): 1.2368
# c_UUID(bytes): * 0.1645 (7.52x)
# object(): 0.1483
#
# std_UUID(str): 1.8038
# c_UUID(str): * 0.2313 (7.80x)
#
# str(std_UUID()): 1.4625
# str(c_UUID()): * 0.2681 (5.46x)
# str(object()): 0.5975
#
# std_UUID().bytes: 0.3508
# c_UUID().bytes: * 0.1068 (3.28x)
#
# std_UUID().int: 0.0871
# c_UUID().int: * 0.0856
#
# std_UUID().hex: 0.4871
# c_UUID().hex: * 0.1405
#
# hash(std_UUID()): 0.3635
# hash(c_UUID()): * 0.1564 (2.32x)
#
# dct[std_UUID()]: 0.3319
# dct[c_UUID()]: * 0.1570 (2.11x)
#
# std_UUID() ==: 0.3478
# c_UUID() ==: * 0.0915 (3.80x)
cdef char _hextable[256]
_hextable[:] = [
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
]
cdef std_UUID = uuid.UUID
cdef pg_uuid_bytes_from_str(str u, char *out):
cdef:
const char *orig_buf
Py_ssize_t size
unsigned char ch
uint8_t acc, part, acc_set
int i, j
orig_buf = PyUnicode_AsUTF8AndSize(u, &size)
if size > 36 or size < 32:
raise ValueError(
f'invalid UUID {u!r}: '
f'length must be between 32..36 characters, got {size}')
acc_set = 0
j = 0
for i in range(size):
ch = <unsigned char>orig_buf[i]
if ch == <unsigned char>b'-':
continue
part = <uint8_t><int8_t>_hextable[ch]
if part == <uint8_t>-1:
if ch >= 0x20 and ch <= 0x7e:
raise ValueError(
f'invalid UUID {u!r}: unexpected character {chr(ch)!r}')
else:
raise ValueError('invalid UUID {u!r}: unexpected character')
if acc_set:
acc |= part
out[j] = <char>acc
acc_set = 0
j += 1
else:
acc = <uint8_t>(part << 4)
acc_set = 1
if j > 16 or (j == 16 and acc_set):
raise ValueError(
f'invalid UUID {u!r}: decodes to more than 16 bytes')
if j != 16:
raise ValueError(
f'invalid UUID {u!r}: decodes to less than 16 bytes')
cdef class __UUIDReplaceMe:
pass
cdef pg_uuid_from_buf(const char *buf):
cdef:
UUID u = UUID.__new__(UUID)
memcpy(u._data, buf, 16)
return u
@cython.final
@cython.no_gc_clear
cdef class UUID(__UUIDReplaceMe):
cdef:
char _data[16]
object _int
object _hash
object __weakref__
def __cinit__(self):
self._int = None
self._hash = None
def __init__(self, inp):
cdef:
char *buf
Py_ssize_t size
if cpython.PyBytes_Check(inp):
cpython.PyBytes_AsStringAndSize(inp, &buf, &size)
if size != 16:
raise ValueError(f'16 bytes were expected, got {size}')
memcpy(self._data, buf, 16)
elif cpython.PyUnicode_Check(inp):
pg_uuid_bytes_from_str(inp, self._data)
else:
raise TypeError(f'a bytes or str object expected, got {inp!r}')
@property
def bytes(self):
return cpython.PyBytes_FromStringAndSize(self._data, 16)
@property
def int(self):
if self._int is None:
# The cache is important because `self.int` can be
# used multiple times by __hash__ etc.
self._int = int.from_bytes(self.bytes, 'big')
return self._int
@property
def is_safe(self):
return uuid.SafeUUID.unknown
def __str__(self):
cdef char out[36]
uuid_to_str(self._data, out)
return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, <void*>out, 36)
@property
def hex(self):
cdef char out[32]
uuid_to_hex(self._data, out)
return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, <void*>out, 32)
def __repr__(self):
return f"UUID('{self}')"
def __reduce__(self):
return (type(self), (self.bytes,))
def __eq__(self, other):
if type(other) is UUID:
return memcmp(self._data, (<UUID>other)._data, 16) == 0
if isinstance(other, std_UUID):
return self.int == other.int
return NotImplemented
def __ne__(self, other):
if type(other) is UUID:
return memcmp(self._data, (<UUID>other)._data, 16) != 0
if isinstance(other, std_UUID):
return self.int != other.int
return NotImplemented
def __lt__(self, other):
if type(other) is UUID:
return memcmp(self._data, (<UUID>other)._data, 16) < 0
if isinstance(other, std_UUID):
return self.int < other.int
return NotImplemented
def __gt__(self, other):
if type(other) is UUID:
return memcmp(self._data, (<UUID>other)._data, 16) > 0
if isinstance(other, std_UUID):
return self.int > other.int
return NotImplemented
def __le__(self, other):
if type(other) is UUID:
return memcmp(self._data, (<UUID>other)._data, 16) <= 0
if isinstance(other, std_UUID):
return self.int <= other.int
return NotImplemented
def __ge__(self, other):
if type(other) is UUID:
return memcmp(self._data, (<UUID>other)._data, 16) >= 0
if isinstance(other, std_UUID):
return self.int >= other.int
return NotImplemented
def __hash__(self):
# In EdgeDB every schema object has a uuid and there are
# huge hash-maps of them. We want UUID.__hash__ to be
# as fast as possible.
if self._hash is not None:
return self._hash
self._hash = hash(self.int)
return self._hash
def __int__(self):
return self.int
@property
def bytes_le(self):
bytes = self.bytes
return (bytes[4-1::-1] + bytes[6-1:4-1:-1] + bytes[8-1:6-1:-1] +
bytes[8:])
@property
def fields(self):
return (self.time_low, self.time_mid, self.time_hi_version,
self.clock_seq_hi_variant, self.clock_seq_low, self.node)
@property
def time_low(self):
return self.int >> 96
@property
def time_mid(self):
return (self.int >> 80) & 0xffff
@property
def time_hi_version(self):
return (self.int >> 64) & 0xffff
@property
def clock_seq_hi_variant(self):
return (self.int >> 56) & 0xff
@property
def clock_seq_low(self):
return (self.int >> 48) & 0xff
@property
def time(self):
return (((self.time_hi_version & 0x0fff) << 48) |
(self.time_mid << 32) | self.time_low)
@property
def clock_seq(self):
return (((self.clock_seq_hi_variant & 0x3f) << 8) |
self.clock_seq_low)
@property
def node(self):
return self.int & 0xffffffffffff
@property
def urn(self):
return 'urn:uuid:' + str(self)
@property
def variant(self):
if not self.int & (0x8000 << 48):
return uuid.RESERVED_NCS
elif not self.int & (0x4000 << 48):
return uuid.RFC_4122
elif not self.int & (0x2000 << 48):
return uuid.RESERVED_MICROSOFT
else:
return uuid.RESERVED_FUTURE
@property
def version(self):
# The version bits are only meaningful for RFC 4122 UUIDs.
if self.variant == uuid.RFC_4122:
return int((self.int >> 76) & 0xf)
# <hack>
# In order for `isinstance(pgproto.UUID, uuid.UUID)` to work,
# patch __bases__ and __mro__ by injecting `uuid.UUID`.
#
# We apply brute-force here because the following pattern stopped
# working with Python 3.8:
#
# cdef class OurUUID:
# ...
#
# class UUID(OurUUID, uuid.UUID):
# ...
#
# With Python 3.8 it now produces
#
# "TypeError: multiple bases have instance lay-out conflict"
#
# error. Maybe it's possible to fix this some other way, but
# the best solution possible would be to just contribute our
# faster UUID to the standard library and not have this problem
# at all. For now this hack is pretty safe and should be
# compatible with future Pythons for long enough.
#
assert UUID.__bases__[0] is __UUIDReplaceMe
assert UUID.__mro__[1] is __UUIDReplaceMe
cpython.Py_INCREF(std_UUID)
cpython.PyTuple_SET_ITEM(UUID.__bases__, 0, std_UUID)
cpython.Py_INCREF(std_UUID)
cpython.PyTuple_SET_ITEM(UUID.__mro__, 1, std_UUID)
# </hack>
cdef pg_UUID = UUID