Skip to content

Commit

Permalink
Handle missing ppub and collection (close #154)
Browse files Browse the repository at this point in the history
  • Loading branch information
nils-herrmann authored Sep 2, 2024
1 parent a7e88c8 commit 245dd09
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pubmed_parser/pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def parse_pubmed_xml(path, include_path=False, nxml=False):
pub_date = format_date(pub_date_dict)

try:
pub_year = int(pub_date_dict["year"])
pub_year = int(pub_date_dict.get("year"))
except TypeError:
pub_year = None

Expand Down
8 changes: 7 additions & 1 deletion tests/test_pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ def fetch_pubmed_xml(db_dir):
pubmed_dir = {"3460867": "00/00/PMC3460867",
"28298962": "8e/71/PMC5334499",
"9539395": "51/b3/PMC9539395",
"1280406": "5f/92/PMC1280406"
"1280406": "5f/92/PMC1280406",
"30443433": "6f/c7/PMC6218202"
}
pubmed_xml_3460867 = fetch_pubmed_xml(pubmed_dir['3460867'])
pubmed_xml_1280406 = fetch_pubmed_xml(pubmed_dir['1280406'])
pubmed_xml_30443433 = fetch_pubmed_xml(pubmed_dir['30443433'])


pubmed_xml_9539395 = fetch_pubmed_xml(pubmed_dir['9539395'])
Expand Down Expand Up @@ -52,6 +54,10 @@ def test_parse_pubmed_xml():
assert parsed_1280406.get('publication_date') == "01-9-2005"
assert parsed_1280406.get('epublication_date') == "31-5-2005"

parsed_30443433 = pp.parse_pubmed_xml(pubmed_xml_30443433)
assert parsed_30443433.get('publication_year') is None
assert parsed_30443433.get('publication_date') == "01-01"


def test_parse_pubmed_paragraph():
"""
Expand Down

0 comments on commit 245dd09

Please sign in to comment.