From 1a2311c8ce3c2661179c0d4528ba6cab2da1cf8b Mon Sep 17 00:00:00 2001 From: Nicolas Heist Date: Mon, 4 Jan 2021 12:15:46 +0100 Subject: [PATCH] FIX: correctly handle plus signs in dbpedia URIs --- impl/util/rdf.py | 6 +++--- impl/util/serialize.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/impl/util/rdf.py b/impl/util/rdf.py index a5537fd..5eb14fd 100644 --- a/impl/util/rdf.py +++ b/impl/util/rdf.py @@ -57,15 +57,15 @@ def parse_triples_from_file(filepath: str) -> Iterator[Triple]: object_triple = object_pattern.match(line) if object_triple: [sub, pred, obj] = object_triple.groups() - sub = urllib.parse.unquote_plus(sub.decode('utf-8')) + sub = urllib.parse.unquote(sub.decode('utf-8')) pred = pred.decode('utf-8') - obj = urllib.parse.unquote_plus(obj.decode('utf-8')) + obj = urllib.parse.unquote(obj.decode('utf-8')) yield Triple(sub=sub, pred=pred, obj=obj) else: literal_triple = literal_pattern.match(line) if literal_triple: [sub, pred, obj] = literal_triple.groups() - sub = urllib.parse.unquote_plus(sub.decode('utf-8')) + sub = urllib.parse.unquote(sub.decode('utf-8')) yield Triple(sub=sub, pred=pred.decode('utf-8'), obj=obj.decode('utf-8')) diff --git a/impl/util/serialize.py b/impl/util/serialize.py index c58aec7..86b0832 100644 --- a/impl/util/serialize.py +++ b/impl/util/serialize.py @@ -9,7 +9,7 @@ int: 'http://www.w3.org/2001/XMLSchema#integer', datetime.datetime: 'http://www.w3.org/2001/XMLSchema#date' } -RESOURCE_ENCODING_EXCEPTIONS = ['#', ':', ',', ';', '(', ')', '\'', '&', '!', '*', '+', '=', '$'] +RESOURCE_ENCODING_EXCEPTIONS = ['#', ':', ',', ';', '(', ')', '\'', '&', '!', '*', '=', '$'] LITERAL_ENCODED_CHARS = ['\\', '\'', '"'] @@ -52,9 +52,9 @@ def _resource_to_string(resource: str) -> str: def _encode_resource(resource: str) -> str: - res_name = urllib.parse.quote_plus(resource) + res_name = urllib.parse.quote(resource) for char in RESOURCE_ENCODING_EXCEPTIONS: - res_name = res_name.replace(urllib.parse.quote_plus(char), char) + res_name = res_name.replace(urllib.parse.quote(char), char) return res_name