Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SerpMetadata fields to Metadata #123

Merged
merged 2 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docs/reference/components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Item metadata components

.. autoclass:: zyte_common_items.Metadata(**kwargs)
:show-inheritance:
:members: dateDownloaded, probability, searchText, validationMessages
:members: dateDownloaded, displayedQuery, probability, searchedQuery, searchText, totalOrganicResults, validationMessages, get_date_downloaded_parsed

.. autoclass:: zyte_common_items.ProbabilityMetadata(**kwargs)
:show-inheritance:
Expand All @@ -109,6 +109,10 @@ Item metadata components
:show-inheritance:
:members: probability, dateDownloaded, validationMessages, get_date_downloaded_parsed

.. autoclass:: zyte_common_items.SearchMetadata(**kwargs)
:show-inheritance:
:members: dateDownloaded, probability, searchText, validationMessages, get_date_downloaded_parsed

.. autoclass:: zyte_common_items.BaseMetadata(**kwargs)
:show-inheritance:
:members: cast
Expand Down
27 changes: 27 additions & 0 deletions tests/test_components.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import datetime

import attrs
from web_poet import RequestUrl

from zyte_common_items import (
Address,
AggregateRating,
Amenity,
BaseMetadata,
BaseSalary,
Breadcrumb,
BusinessPlaceMetadata,
Expand Down Expand Up @@ -72,6 +74,31 @@ def test_metadata_get_date_downloaded():
)


def get_all_subclasses(cls):
subclasses = set()
for subclass in cls.__subclasses__():
subclasses.add(subclass)
subclasses.update(get_all_subclasses(subclass))
return subclasses


def test_metadata_fields():
"""Metadata must contain a superset of the fields of all metadata
classes."""
superset = set(attrs.fields_dict(Metadata))
for cls in get_all_subclasses(BaseMetadata):
subset = set(attrs.fields_dict(cls))
assert subset.issubset(
superset
), f"Metadata is missing some fields from {cls.__name__}: {subset - superset}"


def test_metadata_subclasses():
"""Metadata should not be subclassed, since its fields will grow as new
specific metadata classes are added."""
assert not get_all_subclasses(Metadata)


def test_named_link_optional_fields():
NamedLink(name="foo")
NamedLink(url="https://example.com")
Expand Down
1 change: 1 addition & 0 deletions zyte_common_items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
Reactions,
RealEstateArea,
Request,
SearchMetadata,
SocialMediaPostAuthor,
StarRating,
Topic,
Expand Down
1 change: 1 addition & 0 deletions zyte_common_items/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Metadata,
MetadataT,
ProbabilityMetadata,
SearchMetadata,
)
from .ratings import AggregateRating, StarRating
from .real_estate import RealEstateArea
Expand Down
21 changes: 18 additions & 3 deletions zyte_common_items/components/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,28 @@ class DetailsMetadata(ListMetadata):


@attrs.define(kw_only=True)
class Metadata(DetailsMetadata):
class SearchMetadata(DetailsMetadata):
"""Minimal metadata for classes of items that can declare search
metadata."""

#: The search text used to find the item.
searchText: Optional[str] = None


@attrs.define(kw_only=True)
class Metadata(SearchMetadata):
"""Generic metadata class.

It defines all attributes of metadata classes for specific item types, so
that it can be used during extraction instead of a more specific class, and
later converted to the corresponding, more specific metadata class.
"""

#: The search text used to find the item.
searchText: Optional[str] = None
#: Search query as seen in the webpage.
displayedQuery: Optional[str] = None

#: Search query as specified in the input URL.
searchedQuery: Optional[str] = None

#: Total number of organic results reported by the search engine.
totalOrganicResults: Optional[int] = None
4 changes: 2 additions & 2 deletions zyte_common_items/items/business_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
AggregateRating,
Amenity,
Image,
Metadata,
NamedLink,
OpeningHoursItem,
ParentPlace,
SearchMetadata,
StarRating,
)
from zyte_common_items.converters import to_metadata_optional, url_to_str_optional


@attrs.define(kw_only=True)
class BusinessPlaceMetadata(Metadata):
class BusinessPlaceMetadata(SearchMetadata):
"""Metadata class for :data:`zyte_common_items.BusinessPlace.metadata`."""


Expand Down
4 changes: 2 additions & 2 deletions zyte_common_items/items/job_posting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
BaseSalary,
HiringOrganization,
JobLocation,
Metadata,
SearchMetadata,
)
from zyte_common_items.converters import to_metadata_optional, url_to_str


@attrs.define(kw_only=True)
class JobPostingMetadata(Metadata):
class JobPostingMetadata(SearchMetadata):
"""Metadata class for :data:`zyte_common_items.JobPosting.metadata`."""


Expand Down
9 changes: 7 additions & 2 deletions zyte_common_items/items/social_media_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
import attrs

from zyte_common_items.base import Item
from zyte_common_items.components import Metadata, Reactions, SocialMediaPostAuthor, Url
from zyte_common_items.components import (
Reactions,
SearchMetadata,
SocialMediaPostAuthor,
Url,
)
from zyte_common_items.converters import to_metadata_optional, url_to_str


@attrs.define(kw_only=True)
class SocialMediaPostMetadata(Metadata):
class SocialMediaPostMetadata(SearchMetadata):
"""Metadata class for :data:`zyte_common_items.SocialMediaPost.metadata`."""


Expand Down
Loading