From f87d98bcfdd3d192929e5ad9b573dbbc0d55e57e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 21 Nov 2024 12:43:41 +0100 Subject: [PATCH] Add SerpMetadata fields to Metadata (#123) --- docs/reference/components.rst | 6 ++++- tests/test_components.py | 27 ++++++++++++++++++++ zyte_common_items/__init__.py | 1 + zyte_common_items/components/__init__.py | 1 + zyte_common_items/components/metadata.py | 21 ++++++++++++--- zyte_common_items/items/business_place.py | 4 +-- zyte_common_items/items/job_posting.py | 4 +-- zyte_common_items/items/social_media_post.py | 9 +++++-- 8 files changed, 63 insertions(+), 10 deletions(-) diff --git a/docs/reference/components.rst b/docs/reference/components.rst index f3612d6b..9d7e1e83 100644 --- a/docs/reference/components.rst +++ b/docs/reference/components.rst @@ -95,7 +95,7 @@ Item metadata components .. autoclass:: zyte_common_items.Metadata(**kwargs) :show-inheritance: - :members: dateDownloaded, probability, searchText, validationMessages + :members: dateDownloaded, displayedQuery, probability, searchedQuery, searchText, totalOrganicResults, validationMessages, get_date_downloaded_parsed .. autoclass:: zyte_common_items.ProbabilityMetadata(**kwargs) :show-inheritance: @@ -109,6 +109,10 @@ Item metadata components :show-inheritance: :members: probability, dateDownloaded, validationMessages, get_date_downloaded_parsed +.. autoclass:: zyte_common_items.SearchMetadata(**kwargs) + :show-inheritance: + :members: dateDownloaded, probability, searchText, validationMessages, get_date_downloaded_parsed + .. autoclass:: zyte_common_items.BaseMetadata(**kwargs) :show-inheritance: :members: cast diff --git a/tests/test_components.py b/tests/test_components.py index 180e74b5..28eed7e0 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -1,11 +1,13 @@ import datetime +import attrs from web_poet import RequestUrl from zyte_common_items import ( Address, AggregateRating, Amenity, + BaseMetadata, BaseSalary, Breadcrumb, BusinessPlaceMetadata, @@ -72,6 +74,31 @@ def test_metadata_get_date_downloaded(): ) +def get_all_subclasses(cls): + subclasses = set() + for subclass in cls.__subclasses__(): + subclasses.add(subclass) + subclasses.update(get_all_subclasses(subclass)) + return subclasses + + +def test_metadata_fields(): + """Metadata must contain a superset of the fields of all metadata + classes.""" + superset = set(attrs.fields_dict(Metadata)) + for cls in get_all_subclasses(BaseMetadata): + subset = set(attrs.fields_dict(cls)) + assert subset.issubset( + superset + ), f"Metadata is missing some fields from {cls.__name__}: {subset - superset}" + + +def test_metadata_subclasses(): + """Metadata should not be subclassed, since its fields will grow as new + specific metadata classes are added.""" + assert not get_all_subclasses(Metadata) + + def test_named_link_optional_fields(): NamedLink(name="foo") NamedLink(url="https://example.com") diff --git a/zyte_common_items/__init__.py b/zyte_common_items/__init__.py index c37a67e4..96854069 100644 --- a/zyte_common_items/__init__.py +++ b/zyte_common_items/__init__.py @@ -29,6 +29,7 @@ Reactions, RealEstateArea, Request, + SearchMetadata, SocialMediaPostAuthor, StarRating, Topic, diff --git a/zyte_common_items/components/__init__.py b/zyte_common_items/components/__init__.py index 017362aa..a6b3f169 100644 --- a/zyte_common_items/components/__init__.py +++ b/zyte_common_items/components/__init__.py @@ -18,6 +18,7 @@ Metadata, MetadataT, ProbabilityMetadata, + SearchMetadata, ) from .ratings import AggregateRating, StarRating from .real_estate import RealEstateArea diff --git a/zyte_common_items/components/metadata.py b/zyte_common_items/components/metadata.py index 27f41111..21899504 100644 --- a/zyte_common_items/components/metadata.py +++ b/zyte_common_items/components/metadata.py @@ -74,7 +74,16 @@ class DetailsMetadata(ListMetadata): @attrs.define(kw_only=True) -class Metadata(DetailsMetadata): +class SearchMetadata(DetailsMetadata): + """Minimal metadata for classes of items that can declare search + metadata.""" + + #: The search text used to find the item. + searchText: Optional[str] = None + + +@attrs.define(kw_only=True) +class Metadata(SearchMetadata): """Generic metadata class. It defines all attributes of metadata classes for specific item types, so @@ -82,5 +91,11 @@ class Metadata(DetailsMetadata): later converted to the corresponding, more specific metadata class. """ - #: The search text used to find the item. - searchText: Optional[str] = None + #: Search query as seen in the webpage. + displayedQuery: Optional[str] = None + + #: Search query as specified in the input URL. + searchedQuery: Optional[str] = None + + #: Total number of organic results reported by the search engine. + totalOrganicResults: Optional[int] = None diff --git a/zyte_common_items/items/business_place.py b/zyte_common_items/items/business_place.py index 4c8054e2..a2ed34f6 100644 --- a/zyte_common_items/items/business_place.py +++ b/zyte_common_items/items/business_place.py @@ -9,17 +9,17 @@ AggregateRating, Amenity, Image, - Metadata, NamedLink, OpeningHoursItem, ParentPlace, + SearchMetadata, StarRating, ) from zyte_common_items.converters import to_metadata_optional, url_to_str_optional @attrs.define(kw_only=True) -class BusinessPlaceMetadata(Metadata): +class BusinessPlaceMetadata(SearchMetadata): """Metadata class for :data:`zyte_common_items.BusinessPlace.metadata`.""" diff --git a/zyte_common_items/items/job_posting.py b/zyte_common_items/items/job_posting.py index a28aaca3..b2176889 100644 --- a/zyte_common_items/items/job_posting.py +++ b/zyte_common_items/items/job_posting.py @@ -7,13 +7,13 @@ BaseSalary, HiringOrganization, JobLocation, - Metadata, + SearchMetadata, ) from zyte_common_items.converters import to_metadata_optional, url_to_str @attrs.define(kw_only=True) -class JobPostingMetadata(Metadata): +class JobPostingMetadata(SearchMetadata): """Metadata class for :data:`zyte_common_items.JobPosting.metadata`.""" diff --git a/zyte_common_items/items/social_media_post.py b/zyte_common_items/items/social_media_post.py index d546cef3..c1b0751e 100644 --- a/zyte_common_items/items/social_media_post.py +++ b/zyte_common_items/items/social_media_post.py @@ -3,12 +3,17 @@ import attrs from zyte_common_items.base import Item -from zyte_common_items.components import Metadata, Reactions, SocialMediaPostAuthor, Url +from zyte_common_items.components import ( + Reactions, + SearchMetadata, + SocialMediaPostAuthor, + Url, +) from zyte_common_items.converters import to_metadata_optional, url_to_str @attrs.define(kw_only=True) -class SocialMediaPostMetadata(Metadata): +class SocialMediaPostMetadata(SearchMetadata): """Metadata class for :data:`zyte_common_items.SocialMediaPost.metadata`."""