Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forum thread support #111

Merged
merged 5 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions tests/test_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest
from web_poet import HttpResponse, RequestUrl, ResponseUrl, Returns, field
from web_poet.fields import get_fields_dict
from web_poet.pages import get_item_cls

import zyte_common_items
from zyte_common_items import (
Expand Down Expand Up @@ -215,6 +216,7 @@ def test_matching_items():
"searchText",
"validationMessages",
},
"ForumThread": {"dateDownloaded", "validationMessages"},
"Product": {"dateDownloaded", "probability", "validationMessages"},
"ProductList": {"dateDownloaded", "validationMessages"},
"ProductNavigation": {"dateDownloaded", "validationMessages"},
Expand Down Expand Up @@ -491,3 +493,21 @@ def test_auto_fields():
auto_page_cls = zyte_common_items.__dict__[auto_page_name]
for field_name in get_fields_dict(auto_page_cls):
assert is_auto_field(auto_page_cls, field_name)


def test_auto_page_item_fields():
"""For every field in the item class of an Auto- page class, there should
be a matching field method in the Auto- page class."""
auto_pages: set[type]
auto_pages = {
obj
for obj_name, obj in zyte_common_items.__dict__.items()
if (obj_name.startswith("Auto") and obj_name.endswith("Page"))
}
for auto_page in auto_pages:
auto_page_fields = set(get_fields_dict(auto_page))
item_cls = get_item_cls(auto_page) # type: ignore[call-overload]
item_fields = set(attrs.fields_dict(item_cls))
assert (
auto_page_fields == item_fields
), f"{auto_page} does not map all {item_cls} fields"
6 changes: 6 additions & 0 deletions zyte_common_items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
Request,
SocialMediaPostAuthor,
StarRating,
Topic,
Url,
Video,
)
Expand All @@ -50,6 +51,8 @@
CustomAttributes,
CustomAttributesMetadata,
CustomAttributesValues,
ForumThread,
ForumThreadMetadata,
JobPosting,
JobPostingMetadata,
JobPostingNavigation,
Expand Down Expand Up @@ -80,6 +83,7 @@
AutoArticleNavigationPage,
AutoArticlePage,
AutoBusinessPlacePage,
AutoForumThreadPage,
AutoJobPostingNavigationPage,
AutoJobPostingPage,
AutoProductListPage,
Expand All @@ -92,6 +96,7 @@
BaseArticleNavigationPage,
BaseArticlePage,
BaseBusinessPlacePage,
BaseForumThreadPage,
BaseJobPostingNavigationPage,
BaseJobPostingPage,
BasePage,
Expand All @@ -102,6 +107,7 @@
BaseSerpPage,
BaseSocialMediaPostPage,
BusinessPlacePage,
ForumThreadPage,
HasMetadata,
JobPostingNavigationPage,
JobPostingPage,
Expand Down
1 change: 1 addition & 0 deletions zyte_common_items/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .brand import Brand
from .breadcrumbs import Breadcrumb
from .business_place import Amenity, OpeningHoursItem, ParentPlace
from .forum_thread import Topic
from .gtin import Gtin
from .job_posting import BaseSalary, HiringOrganization, JobLocation
from .links import Link, NamedLink, Url
Expand Down
13 changes: 13 additions & 0 deletions zyte_common_items/components/forum_thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations

import attrs

from zyte_common_items.base import Item


@attrs.define(kw_only=True)
class Topic(Item):
"""Topic that is discussed on the page."""

#: Name of the topic.
name: str
3 changes: 3 additions & 0 deletions zyte_common_items/components/social_media_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
class Reactions(Item):
"""Details of reactions to a post."""

#: Number of times the post received a reply.
replies: Optional[int] = None

#: Number of times the post has been shared.
reposts: Optional[int] = None

Expand Down
1 change: 1 addition & 0 deletions zyte_common_items/items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
CustomAttributesMetadata,
CustomAttributesValues,
)
from .forum_thread import ForumThread, ForumThreadMetadata
from .job_posting import JobPosting, JobPostingMetadata
from .job_posting_navigation import JobPostingNavigation, JobPostingNavigationMetadata
from .product import Product, ProductMetadata, ProductVariant
Expand Down
36 changes: 36 additions & 0 deletions zyte_common_items/items/forum_thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import List, Optional

import attrs

from zyte_common_items.base import Item
from zyte_common_items.components import ListMetadata, Topic
from zyte_common_items.converters import to_metadata_optional, url_to_str

from .social_media_post import SocialMediaPost


@attrs.define(kw_only=True)
class ForumThreadMetadata(ListMetadata):
"""Metadata class for :data:`zyte_common_items.ForumThread.metadata`."""


@attrs.define(kw_only=True)
class ForumThread(Item):
"""Represents a forum thread page."""

#: The URL of the final response, after any redirects.
url: str = attrs.field(converter=url_to_str)

#: Topic discussed on the page.
topic: Optional[Topic] = None

#: Thread ID.
threadId: Optional[str] = None

#: List of posts available on the page, including the first or top post.
posts: Optional[List[SocialMediaPost]] = None

#: Contains metadata about the data extraction process.
metadata: Optional[ForumThreadMetadata] = attrs.field(
default=None, converter=to_metadata_optional(ForumThreadMetadata), kw_only=True # type: ignore[misc]
)
1 change: 1 addition & 0 deletions zyte_common_items/pages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BaseBusinessPlacePage,
BusinessPlacePage,
)
from .forum_thread import AutoForumThreadPage, BaseForumThreadPage, ForumThreadPage
from .job_posting import AutoJobPostingPage, BaseJobPostingPage, JobPostingPage
from .job_posting_navigation import (
AutoJobPostingNavigationPage,
Expand Down
46 changes: 46 additions & 0 deletions zyte_common_items/pages/forum_thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from typing import List, Optional

import attrs
from web_poet import Returns

from zyte_common_items.components import Topic
from zyte_common_items.fields import auto_field
from zyte_common_items.items import ForumThread, ForumThreadMetadata, SocialMediaPost

from .base import BasePage, Page
from .mixins import HasMetadata


class BaseForumThreadPage(
BasePage, Returns[ForumThread], HasMetadata[ForumThreadMetadata]
):
""":class:`BasePage` subclass for :class:`ForumThread`."""


class ForumThreadPage(Page, Returns[ForumThread], HasMetadata[ForumThreadMetadata]):
""":class:`Page` subclass for :class:`ForumThread`."""


@attrs.define
class AutoForumThreadPage(BaseForumThreadPage):
forum_thread: ForumThread

@auto_field
def url(self) -> Optional[str]:
return self.forum_thread.url

@auto_field
def threadId(self) -> Optional[str]:
return self.forum_thread.threadId

@auto_field
def topic(self) -> Optional[Topic]:
return self.forum_thread.topic

@auto_field
def posts(self) -> Optional[List[SocialMediaPost]]:
return self.forum_thread.posts

@auto_field
def metadata(self) -> Optional[ForumThreadMetadata]:
return self.forum_thread.metadata