Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

"Fix" interoperability of rfc822_escape with stblib's email package #213

Merged
merged 3 commits into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions distutils/tests/test_dist.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Tests for distutils.dist."""
import os
import io
import email
import email.policy
import email.generator
import sys
import warnings
import textwrap
Expand Down Expand Up @@ -510,3 +513,41 @@ def test_read_metadata(self):
assert metadata.platforms is None
assert metadata.obsoletes is None
assert metadata.requires == ['foo']

def test_round_trip_through_email_generator(self):
"""
In pypa/setuptools#4033, it was shown that once PKG-INFO is
re-generated using ``email.generator.Generator``, some control
characters might cause problems.
"""
# Given a PKG-INFO file ...
attrs = {
"name": "package",
"version": "1.0",
"long_description": "hello\x0b\nworld\n",
}
dist = Distribution(attrs)
metadata = dist.metadata

with io.StringIO() as buffer:
metadata.write_pkg_file(buffer)
msg = buffer.getvalue()

# ... when it is read and re-written using stdlib's email library,
orig = email.message_from_string(msg)
policy = email.policy.EmailPolicy(
utf8=True,
mangle_from_=False,
max_line_length=0,
)
with io.StringIO() as buffer:
email.generator.Generator(buffer, policy=policy).flatten(orig)

buffer.seek(0)
regen = email.message_from_file(buffer)

# ... then it should be the same as the original
# (except for the specific line break characters)
orig_desc = set(orig["Description"].splitlines())
regen_desc = set(regen["Description"].splitlines())
assert regen_desc == orig_desc
59 changes: 53 additions & 6 deletions distutils/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
"""Tests for distutils.util."""
import email
import email.policy
import email.generator
import io
import os
import sys
import sysconfig as stdlib_sysconfig
Expand Down Expand Up @@ -184,12 +188,55 @@ def test_strtobool(self):
for n in no:
assert not strtobool(n)

def test_rfc822_escape(self):
header = 'I am a\npoor\nlonesome\nheader\n'
res = rfc822_escape(header)
wanted = ('I am a%(8s)spoor%(8s)slonesome%(8s)s' 'header%(8s)s') % {
'8s': '\n' + 8 * ' '
}
indent = 8 * ' '

@pytest.mark.parametrize(
"given,wanted",
[
# 0x0b, 0x0c, ..., etc are also considered a line break by Python
("hello\x0b\nworld\n", f"hello\x0b{indent}\n{indent}world\n{indent}"),
("hello\x1eworld", f"hello\x1e{indent}world"),
("", ""),
(
"I am a\npoor\nlonesome\nheader\n",
f"I am a\n{indent}poor\n{indent}lonesome\n{indent}header\n{indent}",
),
],
)
def test_rfc822_escape(self, given, wanted):
"""
We want to ensure a multi-line header parses correctly.
For interoperability, the escaped value should also "round-trip" over
`email.generator.Generator.flatten` and `email.message_from_*`
(see pypa/setuptools#4033).
The main issue is that internally `email.policy.EmailPolicy` uses
`splitlines` which will split on some control chars. If all the new lines
are not prefixed with spaces, the parser will interrupt reading
the current header and produce an incomplete value, while
incorrectly interpreting the rest of the headers as part of the payload.
"""
res = rfc822_escape(given)

policy = email.policy.EmailPolicy(
utf8=True,
mangle_from_=False,
max_line_length=0,
)
with io.StringIO() as buffer:
raw = f"header: {res}\nother-header: 42\n\npayload\n"
orig = email.message_from_string(raw)
email.generator.Generator(buffer, policy=policy).flatten(orig)
buffer.seek(0)
regen = email.message_from_file(buffer)

for msg in (orig, regen):
assert msg.get_payload() == "payload\n"
assert msg["other-header"] == "42"
# Generator may replace control chars with `\n`
assert set(msg["header"].splitlines()) == set(res.splitlines())

assert res == wanted

def test_dont_write_bytecode(self):
Expand Down
12 changes: 9 additions & 3 deletions distutils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,12 @@ def rfc822_escape(header):
"""Return a version of the string escaped for inclusion in an
RFC-822 header, by ensuring there are 8 spaces space after each newline.
"""
lines = header.split('\n')
sep = '\n' + 8 * ' '
return sep.join(lines)
indent = 8 * " "
lines = header.splitlines(keepends=True)

# Emulate the behaviour of `str.split`
# (the terminal line break in `splitlines` does not result in an extra line):
ends_in_newline = lines and lines[-1].splitlines()[0] != lines[-1]
suffix = indent if ends_in_newline else ""

return indent.join(lines) + suffix
Loading