-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #98 from Gallaecio/serp
Add Serp
- Loading branch information
Showing
12 changed files
with
211 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
from typing import List, Optional | ||
|
||
import attrs | ||
|
||
from zyte_common_items.base import Item | ||
from zyte_common_items.components import ListMetadata | ||
from zyte_common_items.converters import ( | ||
to_metadata_optional, | ||
url_to_str, | ||
url_to_str_optional, | ||
) | ||
|
||
|
||
@attrs.define(kw_only=True) | ||
class SerpOrganicResult: | ||
"""Data from a non-paid result of a search engine results page.""" | ||
|
||
#: Result excerpt. | ||
description: Optional[str] = None | ||
|
||
#: Result title. | ||
name: Optional[str] = None | ||
|
||
#: Result URL. | ||
url: Optional[str] = attrs.field( | ||
default=None, converter=url_to_str_optional, kw_only=True | ||
) | ||
|
||
#: Result position among other organic results from the same search engine | ||
#: results page. | ||
#: | ||
#: This is the rank within a specific page, not within an entire search. | ||
#: That is, the first result of any page, even if it not the first page of | ||
#: a search, must be 1. | ||
rank: Optional[int] = None | ||
|
||
|
||
@attrs.define(kw_only=True) | ||
class SerpMetadata(ListMetadata): | ||
"""Metadata class for :data:`zyte_common_items.Serp.metadata`.""" | ||
|
||
#: Search query as seen in the webpage. | ||
displayedQuery: Optional[str] = None | ||
|
||
#: Search query as specified in the input URL. | ||
searchedQuery: Optional[str] = None | ||
|
||
#: Total number of organic results reported by the search engine. | ||
totalOrganicResults: Optional[int] = None | ||
|
||
|
||
@attrs.define(kw_only=True) | ||
class Serp(Item): | ||
"""Data from a `search engine results page | ||
<https://en.wikipedia.org/wiki/Search_engine_results_page>`_.""" | ||
|
||
#: List of search results excluding paid results. | ||
organicResults: Optional[List[SerpOrganicResult]] = None | ||
|
||
#: Search URL. | ||
url: str = attrs.field(converter=url_to_str) | ||
|
||
#: Page number. | ||
pageNumber: Optional[int] = None | ||
|
||
#: Contains metadata about the data extraction process. | ||
metadata: Optional[SerpMetadata] = attrs.field( | ||
default=None, converter=to_metadata_optional(SerpMetadata), kw_only=True # type: ignore[misc] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from typing import List, Optional | ||
|
||
import attrs | ||
from web_poet import Returns | ||
|
||
from zyte_common_items.fields import auto_field | ||
from zyte_common_items.items import Serp, SerpMetadata, SerpOrganicResult | ||
|
||
from .base import BasePage, Page | ||
from .mixins import HasMetadata | ||
|
||
|
||
class BaseSerpPage(BasePage, Returns[Serp], HasMetadata[SerpMetadata]): | ||
pass | ||
|
||
|
||
class SerpPage(Page, Returns[Serp], HasMetadata[SerpMetadata]): | ||
pass | ||
|
||
|
||
@attrs.define | ||
class AutoSerpPage(BaseSerpPage): | ||
serp: Serp | ||
|
||
@auto_field | ||
def organicResults(self) -> Optional[List[SerpOrganicResult]]: | ||
return self.serp.organicResults | ||
|
||
@auto_field | ||
def url(self) -> str: | ||
return self.serp.url | ||
|
||
@auto_field | ||
def pageNumber(self) -> Optional[int]: | ||
return self.serp.pageNumber | ||
|
||
@auto_field | ||
def metadata(self) -> Optional[SerpMetadata]: | ||
return self.serp.metadata |