Skip to content

Commit

Permalink
Improved readability
Browse files Browse the repository at this point in the history
  • Loading branch information
manusimidt committed Oct 10, 2023
1 parent 233f7b7 commit f6a4f54
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 27 deletions.
11 changes: 7 additions & 4 deletions xbrl/helper/uri_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str:
@param relative_uri:
@return:
"""
if relative_uri.startswith('http://') or relative_uri.startswith('https://'):
return relative_uri
if is_url(relative_uri): return relative_uri

# remove redundant characters in the relative uri
if relative_uri.startswith('/'): relative_uri = relative_uri[1:]
if relative_uri.startswith('./'): relative_uri = relative_uri[2:]

dir_uri = str(dir_uri)
if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'):
if not is_url(dir_uri):
# check if the dir_uri was really a path to a directory or a file
if '.' in dir_uri.split(os.sep)[-1]:
return os.path.normpath(os.path.dirname(dir_uri) + os.sep + relative_uri)
Expand All @@ -40,7 +39,7 @@ def resolve_uri(dir_uri: str, relative_uri: str) -> str:
dir_uri += '/'

absolute_uri = dir_uri + relative_uri
if not dir_uri.startswith('http://') and not dir_uri.startswith('https://'):
if not is_url(dir_uri):
# make sure the path is correct
absolute_uri = os.path.normpath(absolute_uri)

Expand Down Expand Up @@ -74,3 +73,7 @@ def compare_uri(uri1: str, uri2: str) -> bool:
uri1_segments: [str] = re.findall(r"[\w']+", uri1)
uri2_segments: [str] = re.findall(r"[\w']+", uri2)
return uri1_segments == uri2_segments


def is_url(candidate: str) -> bool:
return candidate.lower().startswith('http://') or candidate.lower().startswith('https://')
15 changes: 7 additions & 8 deletions xbrl/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pathlib import Path
from xbrl import TaxonomyNotFound, InstanceParseException
from xbrl.cache import HttpCache
from xbrl.helper.uri_helper import resolve_uri
from xbrl.helper.uri_helper import resolve_uri, is_url
from xbrl.helper.xml_parser import parse_file
from xbrl.taxonomy import Concept, TaxonomySchema, parse_taxonomy, parse_common_taxonomy, parse_taxonomy_url
from xbrl.transformations import normalize, TransformationException, TransformationNotImplemented
Expand Down Expand Up @@ -347,7 +347,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas
if schema_uri.startswith('http://') or schema_uri.startswith("https://"):
if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
elif instance_url:
Expand Down Expand Up @@ -423,7 +423,8 @@ def parse_ixbrl_url(instance_url: str, cache: HttpCache, encoding: str or None =
return parse_ixbrl(instance_path, cache, instance_url, encoding)


def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None, encoding=None, schema_root=None) -> XbrlInstance:
def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None, encoding=None,
schema_root=None) -> XbrlInstance:
"""
Parses a inline XBRL (iXBRL) instance file.
Expand Down Expand Up @@ -452,7 +453,7 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas
if schema_uri.startswith('http'):
if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
elif schema_root:
Expand Down Expand Up @@ -727,10 +728,8 @@ def parse_instance(self, uri: str, instance_url: str or None = None, encoding: s
:return:
"""
if uri.split('.')[-1] == 'xml' or uri.split('.')[-1] == 'xbrl':
return parse_xbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \
else parse_xbrl(uri, self.cache, instance_url)
return parse_ixbrl_url(uri, self.cache) if uri.startswith('http://') or uri.startswith('https://') \
else parse_ixbrl(uri, self.cache, instance_url, encoding)
return parse_xbrl_url(uri, self.cache) if is_url(uri) else parse_xbrl(uri, self.cache, instance_url)
return parse_ixbrl_url(uri, self.cache) if is_url(uri) else parse_ixbrl(uri, self.cache, instance_url, encoding)

def __str__(self) -> str:
return 'XbrlParser with cache dir at {}'.format(self.cache.cache_dir)
15 changes: 8 additions & 7 deletions xbrl/linkbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from xbrl import XbrlParseException, LinkbaseNotFoundException
from xbrl.cache import HttpCache
from xbrl.helper.uri_helper import resolve_uri
from xbrl.helper.uri_helper import resolve_uri, is_url

LINK_NS: str = "{http://www.xbrl.org/2003/linkbase}"
XLINK_NS: str = "{http://www.w3.org/1999/xlink}"
Expand Down Expand Up @@ -383,7 +383,8 @@ class Linkbase:
Represents a complete Linkbase (non-generic).
"""

def __init__(self, extended_links: List[ExtendedLink], linkbase_type: LinkbaseType, linkbase_uri: None or str = None) -> None:
def __init__(self, extended_links: List[ExtendedLink], linkbase_type: LinkbaseType,
linkbase_uri: None or str = None) -> None:
"""
:param extended_links: All standard extended links that are defined in the linkbase
:type extended_links: [ExtendedDefinitionLink] or [ExtendedCalculationLink] or [ExtendedPresentationLink] or [ExtendedLabelArc]
Expand Down Expand Up @@ -418,8 +419,8 @@ def parse_linkbase_url(linkbase_url: str, linkbase_type: LinkbaseType, cache: Ht
:param cache: :class:`xbrl.cache.HttpCache` instance
:return: parsed :class:`xbrl.linkbase.Linkbase` object
"""
if not linkbase_url.startswith('http'): raise XbrlParseException(
'This function only parses remotely saved linkbases. Please use parse_linkbase to parse local linkbases')
if not is_url(linkbase_url): raise XbrlParseException('This function only parses remotely saved linkbases. '
'Please use parse_linkbase to parse local linkbases')

linkbase_path: str = cache.cache_file(linkbase_url)
return parse_linkbase(linkbase_path, linkbase_type, linkbase_url)
Expand All @@ -439,8 +440,8 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url
the locator with concept from the taxonomy
:return: parsed :class:`xbrl.linkbase.Linkbase` object
"""
if linkbase_path.startswith('http'): raise XbrlParseException(
'This function only parses locally saved linkbases. Please use parse_linkbase_url to parse remote linkbases')
if is_url(linkbase_path): raise XbrlParseException('This function only parses locally saved linkbases. '
'Please use parse_linkbase_url to parse remote linkbases')
if not os.path.exists(linkbase_path):
raise LinkbaseNotFoundException(f"Could not find linkbase at {linkbase_path}")

Expand Down Expand Up @@ -486,7 +487,7 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url
loc_label: str = loc.attrib[XLINK_NS + 'label']
# check if the locator href is absolute
locator_href = loc.attrib[XLINK_NS + 'href']
if not locator_href.startswith('http'):
if not is_url(locator_href):
# resolve the path
# todo, try to get the URL here, instead of the path!!!
locator_href = resolve_uri(linkbase_url if linkbase_url else linkbase_path, locator_href)
Expand Down
15 changes: 7 additions & 8 deletions xbrl/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from xbrl import XbrlParseException, TaxonomyNotFound
from xbrl.cache import HttpCache
from xbrl.helper.uri_helper import resolve_uri, compare_uri
from xbrl.helper.uri_helper import resolve_uri, compare_uri, is_url
from xbrl.linkbase import Linkbase, ExtendedLink, LinkbaseType, parse_linkbase, parse_linkbase_url, Label

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -593,9 +593,8 @@ def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema:
:param cache: :class:`xbrl.cache.HttpCache` instance
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
"""
if not schema_url.startswith('http://') and not schema_url.startswith('https://'): raise XbrlParseException(
'This function only parses remotely saved taxonomies. Please use parse_taxonomy to parse local taxonomy schemas')

if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. '
'Please use parse_taxonomy to parse local taxonomy schemas')
schema_path: str = cache.cache_file(schema_url)
return parse_taxonomy(schema_path, cache, schema_url)

Expand All @@ -611,8 +610,8 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
"""
schema_path = str(schema_path)
if schema_path.startswith('http://') or schema_path.startswith('https://'): raise XbrlParseException(
'This function only parses locally saved taxonomies. Please use parse_taxonomy_url to parse remote taxonomy schemas')
if is_url(schema_path): raise XbrlParseException('This function only parses locally saved taxonomies. '
'Please use parse_taxonomy_url to parse remote taxonomy schemas')
if not os.path.exists(schema_path):
raise TaxonomyNotFound(f"Could not find taxonomy schema at {schema_path}")

Expand All @@ -632,7 +631,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
continue

# sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd"
if import_uri.startswith('http://') or import_uri.startswith('https://'):
if is_url(import_uri):
# fetch the schema file from remote
taxonomy.imports.append(parse_taxonomy_url(import_uri, cache))
elif schema_url:
Expand Down Expand Up @@ -683,7 +682,7 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
linkbase_uri)

# check if the linkbase url is relative
if linkbase_uri.startswith('http://') or linkbase_uri.startswith('https://'):
if is_url(linkbase_uri):
# fetch the linkbase from remote
linkbase: Linkbase = parse_linkbase_url(linkbase_uri, linkbase_type, cache)
elif schema_url:
Expand Down

0 comments on commit f6a4f54

Please sign in to comment.