diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index adaa630275..c9887cfc7c 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -6,6 +6,7 @@ - Papers can now generate citation reference strings in any CSL-supported style via `to_citation()`. Calling `to_citation()` without any arguments will produce ACL-formatted reference entries. - Papers can now generate brief markdown reference strings via `to_markdown_citation()`. +- PersonIndex now has function `find_coauthors_counter()` to find not just the identities of co-authors, but also get a count how many items they have co-authored together with someone. - SIGIndex now reverse-indexes co-located volumes, so it is now possible to get SIGs associated with volumes, e.g. via `Volume.get_sigs()`. - Papers now have attribute `thumbnail`. - Volumes now have attributes `has_abstracts`, `venue_acronym`, and `web_url`. diff --git a/python/acl_anthology/people/index.py b/python/acl_anthology/people/index.py index 41b25cb993..f58d120ce4 100644 --- a/python/acl_anthology/people/index.py +++ b/python/acl_anthology/people/index.py @@ -15,7 +15,7 @@ from __future__ import annotations from attrs import define, field, asdict -from collections import defaultdict +from collections import Counter, defaultdict import itertools as it from os import PathLike from pathlib import Path @@ -102,22 +102,34 @@ def find_coauthors(self, person: str | Person) -> list[Person]: Returns: A list of all persons who are co-authors; can be empty. """ + coauthors = self.find_coauthors_counter(person) + return [self.data[pid] for pid in coauthors] + + def find_coauthors_counter(self, person: str | Person) -> Counter[str]: + """Find the count of co-authored or co-edited items per person. + + Parameters: + person: A person ID _or_ Person instance. + + Returns: + A Counter mapping **IDs** of other persons Y to the number of papers this person has co-authored with Y. + """ if not self.is_data_loaded: self.load() if isinstance(person, str): person = self.data[person] - coauthors = set() + coauthors: Counter[str] = Counter() for item_id in person.item_ids: item = cast("Volume | Paper", self.parent.get(item_id)) - coauthors |= set( + coauthors.update( self.get_or_create_person(ns, create=False).id for ns in item.editors ) if hasattr(item, "authors"): - coauthors |= set( + coauthors.update( self.get_or_create_person(ns, create=False).id for ns in item.authors ) - coauthors.remove(person.id) - return [self.data[pid] for pid in coauthors] + del coauthors[person.id] + return coauthors def load(self) -> None: """Loads or builds the index.""" diff --git a/python/tests/people/personindex_test.py b/python/tests/people/personindex_test.py index a653189dd3..e62f6bf7b9 100644 --- a/python/tests/people/personindex_test.py +++ b/python/tests/people/personindex_test.py @@ -163,6 +163,24 @@ def test_get_person_coauthors(index_with_full_anthology): assert len(coauthors) == 1 assert coauthors[0].canonical_name == Name("Joyce", "McDowell") + person = index.get_by_name(Name("Preslav", "Nakov"))[0] + coauthors = index.find_coauthors(person) + assert len(coauthors) == 2 + + +def test_get_person_coauthors_counter(index_with_full_anthology): + index = index_with_full_anthology + person = index.get_by_name(Name("Kathleen", "Dahlgren"))[0] + coauthors = index.find_coauthors_counter(person) + assert len(coauthors) == 1 + assert coauthors["joyce-mcdowell"] == 1 + + person = index.get_by_name(Name("Preslav", "Nakov"))[0] + coauthors = index.find_coauthors_counter(person) + assert len(coauthors) == 2 + assert coauthors["joyce-mcdowell"] == 0 + assert coauthors["aline-villavicencio"] == 2 + def test_get_by_namespec(index_with_full_anthology): index = index_with_full_anthology