Skip to content

Commit

Permalink
Rename 'unicode' and 'basestring' to 'str' internally.
Browse files Browse the repository at this point in the history
Remove legacy code that dealt with the mixed Py2 string types.
Keep recognising the names in (legacy) code.

Closes cython#1370
Closes cython#5854
  • Loading branch information
scoder committed Aug 30, 2024
1 parent f83932e commit 825da36
Show file tree
Hide file tree
Showing 36 changed files with 313 additions and 692 deletions.
4 changes: 2 additions & 2 deletions Cython/Build/Tests/TestCythonizeArgsParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def test_directives_types(self):
options, args = self.parse_args(['-X', cmd])
self.assertFalse(args)
self.assertTrue(self.are_default(options, ['directives']), msg = "Error for option: "+cmd)
if value == 'str':
value = 'unicode'
if value == 'unicode':
value = 'str'
self.assertEqual(options.directives[key], value, msg = "Error for option: "+cmd)

def test_directives_wrong(self):
Expand Down
26 changes: 9 additions & 17 deletions Cython/CodeWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,11 +510,13 @@ def comma_separated_list(self, items):
def visit_Node(self, node):
raise AssertionError("Node not handled by serializer: %r" % node)

# TODO: Remove redundancy below. Most constants serialise fine as just "repr(node.value)".

def visit_IntNode(self, node):
self.put(node.value)
self.put(repr(node.value))

def visit_FloatNode(self, node):
self.put(node.value)
self.put(repr(node.value))

def visit_NoneNode(self, node):
self.put("None")
Expand All @@ -526,29 +528,19 @@ def visit_EllipsisNode(self, node):
self.put("...")

def visit_BoolNode(self, node):
self.put(str(node.value))
self.put(repr(node.value))

def visit_ConstNode(self, node):
self.put(str(node.value))
self.put(repr(node.value))

def visit_ImagNode(self, node):
self.put(node.value)
self.put("j")

def emit_string(self, node, prefix=""):
repr_val = repr(node.value)
if repr_val[0] in 'ub':
repr_val = repr_val[1:]
self.put("%s%s" % (prefix, repr_val))
self.put(f"{node.value!r}j")

def visit_BytesNode(self, node):
self.emit_string(node, "b")

def visit_StringNode(self, node):
self.emit_string(node)
self.put(repr(node.value))

def visit_UnicodeNode(self, node):
self.emit_string(node, "u")
self.put(repr(node.value))

def emit_sequence(self, node, parens=("", "")):
open_paren, close_paren = parens
Expand Down
4 changes: 2 additions & 2 deletions Cython/Compiler/AnalysedTreeTransforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def visit_ModuleNode(self, node):

def add_test(self, testpos, path, doctest):
pos = self.testspos
keystr = '%s (line %d)' % (path, testpos[1])
key = UnicodeNode(pos, value=EncodedString(keystr))
keystr = EncodedString(f'{path} (line {testpos[1]:d})')
key = UnicodeNode(pos, value=keystr)
value = UnicodeNode(pos, value=doctest)
self.tests.append(DictItemNode(pos, key=key, value=value))

Expand Down
7 changes: 1 addition & 6 deletions Cython/Compiler/AutoDocTransforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,8 @@ def visit_LambdaNode(self, node):
"Failed to convert lambda to string representation in {}".format(
self.description), level=1)

def visit_UnicodeNode(self, node):
# Discard Unicode prefix in annotations. Any tool looking at them
# would probably expect Py3 string semantics.
self.emit_string(node, "")

def visit_AnnotationNode(self, node):
self.put(node.string.unicode_value)
self.put(node.string.value)


class EmbedSignature(CythonTransform):
Expand Down
42 changes: 14 additions & 28 deletions Cython/Compiler/Builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def declare_in_type(self, self_type):
#('bin', "", "", ""),
BuiltinFunction('callable', "O", "b", "__Pyx_PyCallable_Check",
utility_code = UtilityCode.load("CallableCheck", "ObjectHandling.c")),
BuiltinFunction('chr', "i", "O", "PyUnicode_FromOrdinal", builtin_return_type='unicode'),
BuiltinFunction('chr', "i", "O", "PyUnicode_FromOrdinal", builtin_return_type='str'),
#('cmp', "", "", "", ""), # int PyObject_Cmp(PyObject *o1, PyObject *o2, int *result)
#('compile', "", "", ""), # PyObject* Py_CompileString( char *str, char *filename, int start)
BuiltinFunction('delattr', "OO", "r", "PyObject_DelAttr"),
Expand Down Expand Up @@ -225,14 +225,13 @@ def declare_in_type(self, self_type):
#('raw_input', "", "", ""),
#('reduce', "", "", ""),
BuiltinFunction('reload', "O", "O", "PyImport_ReloadModule"),
BuiltinFunction('repr', "O", "O", "PyObject_Repr", builtin_return_type='unicode'),
BuiltinFunction('repr', "O", "O", "PyObject_Repr", builtin_return_type='str'),
#('round', "", "", ""),
BuiltinFunction('setattr', "OOO", "r", "PyObject_SetAttr"),
#('sum', "", "", ""),
#('sorted', "", "", ""),
#('type', "O", "O", "PyObject_Type"),
BuiltinFunction('unichr', "i", "O", "PyUnicode_FromOrdinal", builtin_return_type='unicode'),
#('unicode', "", "", ""),
BuiltinFunction('unichr', "i", "O", "PyUnicode_FromOrdinal", builtin_return_type='str'),
#('vars', "", "", ""),
#('zip', "", "", ""),
# Can't do these easily until we have builtin type entries.
Expand Down Expand Up @@ -286,12 +285,6 @@ def declare_in_type(self, self_type):
BuiltinAttribute('imag', 'cval.imag', field_type = PyrexTypes.c_double_type),
]),

("basestring", "&PyBaseString_Type", [
BuiltinMethod("join", "TO", "T", "__Pyx_PyBaseString_Join",
utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
BuiltinMethod("__mul__", "Tz", "T", "__Pyx_PySequence_Multiply",
utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
]),
("bytearray", "&PyByteArray_Type", [
BuiltinMethod("__mul__", "Tz", "T", "__Pyx_PySequence_Multiply",
utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
Expand All @@ -301,12 +294,7 @@ def declare_in_type(self, self_type):
BuiltinMethod("__mul__", "Tz", "T", "__Pyx_PySequence_Multiply",
utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
]),
("str", "&PyString_Type", [BuiltinMethod("join", "TO", "T", "__Pyx_PyString_Join",
utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
BuiltinMethod("__mul__", "Tz", "T", "__Pyx_PySequence_Multiply",
utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
]),
("unicode", "&PyUnicode_Type", [BuiltinMethod("__contains__", "TO", "b", "PyUnicode_Contains"),
("str", "&PyUnicode_Type", [BuiltinMethod("__contains__", "TO", "b", "PyUnicode_Contains"),
BuiltinMethod("join", "TO", "T", "PyUnicode_Join"),
BuiltinMethod("__mul__", "Tz", "T", "__Pyx_PySequence_Multiply",
utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
Expand Down Expand Up @@ -425,14 +413,14 @@ def declare_in_type(self, self_type):
'float': dict(
as_integer_ratio='tuple[int,int]',
is_integer='bint',
hex='unicode',
hex='str',
fromhex='T', # classmethod
),
'list': dict(
index='Py_ssize_t',
count='Py_ssize_t',
),
'unicode': dict(
'str': dict(
capitalize='T',
casefold='T',
center='T',
Expand Down Expand Up @@ -482,12 +470,12 @@ def declare_in_type(self, self_type):
zfill='T',
),
'bytes': dict(
hex='unicode',
hex='str',
fromhex='T', # classmethod
count='Py_ssize_t',
removeprefix='T',
removesuffix='T',
decode='unicode',
decode='str',
endswith='bint',
find='Py_ssize_t',
index='Py_ssize_t',
Expand Down Expand Up @@ -530,7 +518,7 @@ def declare_in_type(self, self_type):
),
'memoryview': dict(
tobytes='bytes',
hex='unicode',
hex='str',
tolist='list',
toreadonly='T',
cast='T',
Expand All @@ -555,7 +543,6 @@ def declare_in_type(self, self_type):

inferred_method_return_types['bytearray'].update(inferred_method_return_types['bytes'])
inferred_method_return_types['frozenset'].update(inferred_method_return_types['set'])
inferred_method_return_types['str'] = inferred_method_return_types['unicode']


def find_return_type_of_builtin_method(builtin_type, method_name):
Expand Down Expand Up @@ -619,6 +606,8 @@ def init_builtin_types():
objstruct_cname = 'PyByteArrayObject'
elif name == 'int':
objstruct_cname = 'PyLongObject'
elif name == 'str':
objstruct_cname = 'PyUnicodeObject'
elif name == 'bool':
objstruct_cname = None
elif name == 'BaseException':
Expand All @@ -640,6 +629,7 @@ def init_builtin_types():
for method in methods:
method.declare_in_type(the_type)


def init_builtin_structs():
for name, cname, attribute_types in builtin_structs_table:
scope = StructOrUnionScope(name)
Expand All @@ -662,7 +652,7 @@ def init_builtins():
entry.utility_code = UtilityCode.load_cached("AssertionsEnabled", "Exceptions.c")

global type_type, list_type, tuple_type, dict_type, set_type, frozenset_type, slice_type
global bytes_type, str_type, unicode_type, basestring_type, bytearray_type
global bytes_type, unicode_type, bytearray_type
global float_type, int_type, bool_type, complex_type
global memoryview_type, py_buffer_type
global sequence_types
Expand All @@ -675,9 +665,7 @@ def init_builtins():
slice_type = builtin_scope.lookup('slice').type

bytes_type = builtin_scope.lookup('bytes').type
str_type = builtin_scope.lookup('str').type
unicode_type = builtin_scope.lookup('unicode').type
basestring_type = builtin_scope.lookup('basestring').type
unicode_type = builtin_scope.lookup('str').type
bytearray_type = builtin_scope.lookup('bytearray').type
memoryview_type = builtin_scope.lookup('memoryview').type

Expand All @@ -690,9 +678,7 @@ def init_builtins():
list_type,
tuple_type,
bytes_type,
str_type,
unicode_type,
basestring_type,
bytearray_type,
memoryview_type,
)
Expand Down
2 changes: 1 addition & 1 deletion Cython/Compiler/Code.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ cdef class StringConst:
cdef public dict py_strings
cdef public list py_versions

cpdef get_py_string_const(self, encoding, identifier=*, bint is_str=*, py3str_cstring=*)
cpdef get_py_string_const(self, encoding, identifier=*)

## cdef class PyStringConst:
## cdef public object cname
Expand Down
Loading

0 comments on commit 825da36

Please sign in to comment.