Improved readability

manusimidt · Oct 10, 2023 · f6a4f54 · f6a4f54
1 parent 233f7b7
commit f6a4f54
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 27 deletions.
diff --git a/xbrl/helper/uri_helper.py b/xbrl/helper/uri_helper.py
@@ -18,15 +18,14 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str:
     @param relative_uri:
     @return:
     """
-    if relative_uri.startswith('http://') or relative_uri.startswith('https://'):
-        return relative_uri
+    if is_url(relative_uri): return relative_uri
 
     # remove redundant characters in the relative uri
     if relative_uri.startswith('/'): relative_uri = relative_uri[1:]
     if relative_uri.startswith('./'): relative_uri = relative_uri[2:]
 
     dir_uri = str(dir_uri)
-    if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'):
+    if not is_url(dir_uri):
         # check if the dir_uri was really a path to a directory or a file
         if '.' in dir_uri.split(os.sep)[-1]:
             return os.path.normpath(os.path.dirname(dir_uri) + os.sep + relative_uri)
@@ -40,7 +39,7 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str:
         dir_uri += '/'
 
     absolute_uri = dir_uri + relative_uri
-    if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'):
+    if not is_url(dir_uri):
         # make sure the path is correct
         absolute_uri = os.path.normpath(absolute_uri)
 
@@ -74,3 +73,7 @@ def compare_uri(uri1: str, uri2: str) -> bool:
     uri1_segments: [str] = re.findall(r"[\w']+", uri1)
     uri2_segments: [str] = re.findall(r"[\w']+", uri2)
     return uri1_segments == uri2_segments
+
+
+def is_url(candidate: str) -> bool:
+    return candidate.lower().startswith('http://') or candidate.lower().startswith('https://')
diff --git a/xbrl/instance.py b/xbrl/instance.py
@@ -15,7 +15,7 @@
 from pathlib import Path
 from xbrl import TaxonomyNotFound, InstanceParseException
 from xbrl.cache import HttpCache
-from xbrl.helper.uri_helper import resolve_uri
+from xbrl.helper.uri_helper import resolve_uri, is_url
 from xbrl.helper.xml_parser import parse_file
 from xbrl.taxonomy import Concept, TaxonomySchema, parse_taxonomy, parse_common_taxonomy, parse_taxonomy_url
 from xbrl.transformations import normalize, TransformationException, TransformationNotImplemented
@@ -347,7 +347,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
     schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
     # check if the schema uri is relative or absolute
     # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas
-    if schema_uri.startswith('http://') or schema_uri.startswith("https://"):
+    if is_url(schema_uri):
         # fetch the taxonomy extension schema from remote
         taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
     elif instance_url:
@@ -423,7 +423,8 @@ def parse_ixbrl_url(instance_url: str, cache: HttpCache, encoding: str or None =
     return parse_ixbrl(instance_path, cache, instance_url, encoding)
 
 
-def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None, encoding=None, schema_root=None) -> XbrlInstance:
+def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None, encoding=None,
+                schema_root=None) -> XbrlInstance:
     """
     Parses a inline XBRL (iXBRL) instance file.
 
@@ -452,7 +453,7 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
     schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
     # check if the schema uri is relative or absolute
     # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas
-    if schema_uri.startswith('http'):
+    if is_url(schema_uri):
         # fetch the taxonomy extension schema from remote
         taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
     elif schema_root:
@@ -727,10 +728,8 @@ def parse_instance(self, uri: str, instance_url: str or None = None, encoding: s
         :return:
         """
         if uri.split('.')[-1] == 'xml' or uri.split('.')[-1] == 'xbrl':
-            return parse_xbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \
-                else parse_xbrl(uri, self.cache, instance_url)
-        return parse_ixbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \
-            else parse_ixbrl(uri, self.cache, instance_url, encoding)
+            return parse_xbrl_url(uri, self.cache) if is_url(uri) else parse_xbrl(uri, self.cache, instance_url)
+        return parse_ixbrl_url(uri, self.cache) if is_url(uri) else parse_ixbrl(uri, self.cache, instance_url, encoding)
 
     def __str__(self) -> str:
         return 'XbrlParser with cache dir at {}'.format(self.cache.cache_dir)
diff --git a/xbrl/linkbase.py b/xbrl/linkbase.py
@@ -7,7 +7,7 @@
 
 from xbrl import XbrlParseException, LinkbaseNotFoundException
 from xbrl.cache import HttpCache
-from xbrl.helper.uri_helper import resolve_uri
+from xbrl.helper.uri_helper import resolve_uri, is_url
 
 LINK_NS: str = "{http://www.xbrl.org/2003/linkbase}"
 XLINK_NS: str = "{http://www.w3.org/1999/xlink}"
@@ -383,7 +383,8 @@ class Linkbase:
     Represents a complete Linkbase (non-generic).
     """
 
-    def __init__(self, extended_links: List[ExtendedLink], linkbase_type: LinkbaseType, linkbase_uri: None or str = None) -> None:
+    def __init__(self, extended_links: List[ExtendedLink], linkbase_type: LinkbaseType,
+                 linkbase_uri: None or str = None) -> None:
         """
         :param extended_links: All standard extended links that are defined in the linkbase
         :type extended_links: [ExtendedDefinitionLink] or [ExtendedCalculationLink] or [ExtendedPresentationLink] or [ExtendedLabelArc]
@@ -418,8 +419,8 @@ def parse_linkbase_url(linkbase_url: str, linkbase_type: LinkbaseType, cache: Ht
     :param cache: :class:`xbrl.cache.HttpCache` instance
     :return: parsed :class:`xbrl.linkbase.Linkbase` object
     """
-    if not linkbase_url.startswith('http'): raise XbrlParseException(
-        'This function only parses remotely saved linkbases. Please use parse_linkbase to parse local linkbases')
+    if not is_url(linkbase_url): raise XbrlParseException('This function only parses remotely saved linkbases. '
+                                                          'Please use parse_linkbase to parse local linkbases')
 
     linkbase_path: str = cache.cache_file(linkbase_url)
     return parse_linkbase(linkbase_path, linkbase_type, linkbase_url)
@@ -439,8 +440,8 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url
         the locator with concept from the taxonomy
     :return: parsed :class:`xbrl.linkbase.Linkbase` object
     """
-    if linkbase_path.startswith('http'): raise XbrlParseException(
-        'This function only parses locally saved linkbases. Please use parse_linkbase_url to parse remote linkbases')
+    if is_url(linkbase_path): raise XbrlParseException('This function only parses locally saved linkbases. '
+                                                       'Please use parse_linkbase_url to parse remote linkbases')
     if not os.path.exists(linkbase_path):
         raise LinkbaseNotFoundException(f"Could not find linkbase at {linkbase_path}")
 
@@ -486,7 +487,7 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url
             loc_label: str = loc.attrib[XLINK_NS + 'label']
             # check if the locator href is absolute
             locator_href = loc.attrib[XLINK_NS + 'href']
-            if not locator_href.startswith('http'):
+            if not is_url(locator_href):
                 # resolve the path
                 # todo, try to get the URL here, instead of the path!!!
                 locator_href = resolve_uri(linkbase_url if linkbase_url else linkbase_path, locator_href)

diff --git a/xbrl/taxonomy.py b/xbrl/taxonomy.py
@@ -10,7 +10,7 @@
 
 from xbrl import XbrlParseException, TaxonomyNotFound
 from xbrl.cache import HttpCache
-from xbrl.helper.uri_helper import resolve_uri, compare_uri
+from xbrl.helper.uri_helper import resolve_uri, compare_uri, is_url
 from xbrl.linkbase import Linkbase, ExtendedLink, LinkbaseType, parse_linkbase, parse_linkbase_url, Label
 
 logger = logging.getLogger(__name__)
@@ -593,9 +593,8 @@ def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema:
     :param cache: :class:`xbrl.cache.HttpCache` instance
     :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
     """
-    if not schema_url.startswith('http://') and not schema_url.startswith('https://'): raise XbrlParseException(
-        'This function only parses remotely saved taxonomies. Please use parse_taxonomy to parse local taxonomy schemas')
-
+    if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. '
+                                                        'Please use parse_taxonomy to parse local taxonomy schemas')
     schema_path: str = cache.cache_file(schema_url)
     return parse_taxonomy(schema_path, cache, schema_url)
 
@@ -611,8 +610,8 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
     :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
     """
     schema_path = str(schema_path)
-    if schema_path.startswith('http://') or schema_path.startswith('https://'): raise XbrlParseException(
-        'This function only parses locally saved taxonomies. Please use parse_taxonomy_url to parse remote taxonomy schemas')
+    if is_url(schema_path): raise XbrlParseException('This function only parses locally saved taxonomies. '
+                                                     'Please use parse_taxonomy_url to parse remote taxonomy schemas')
     if not os.path.exists(schema_path):
         raise TaxonomyNotFound(f"Could not find taxonomy schema at {schema_path}")
 
@@ -632,7 +631,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
             continue
 
         # sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd"
-        if import_uri.startswith('http://') or import_uri.startswith('https://'):
+        if is_url(import_uri):
             # fetch the schema file from remote
             taxonomy.imports.append(parse_taxonomy_url(import_uri, cache))
         elif schema_url:
@@ -683,7 +682,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
             linkbase_uri)
 
         # check if the linkbase url is relative
-        if linkbase_uri.startswith('http://') or linkbase_uri.startswith('https://'):
+        if is_url(linkbase_uri):
             # fetch the linkbase from remote
             linkbase: Linkbase = parse_linkbase_url(linkbase_uri, linkbase_type, cache)
         elif schema_url: