Skip to content

Commit

Permalink
Proof of concept of auto pages for easy override
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Oct 4, 2023
1 parent c162726 commit 812bd8e
Show file tree
Hide file tree
Showing 6 changed files with 220 additions and 8 deletions.
29 changes: 27 additions & 2 deletions docs/usage/pages.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,40 @@ whose ``to_item`` method returns an instance of

.. code-block:: python
import attrs
from zyte_common_items import ProductPage
import attrs
from zyte_common_items import ProductPage
class CustomProductPage(ProductPage):
@field
def name(self):
return self.css("h1::text").get()
Page object classes with the ``Auto`` prefix can be used to easily define page
object classes that get an :ref:`item <items>` as a dependency from another
page object class, can generate an identical item by default, and can also
easily override specific fields of the item, or even return a new item with
extra fields. For example:

.. code-block:: python
import attrs
from zyte_common_items import AutoProductPage, Product, Returns, field
@attrs.define
class ExtendedProduct(Product):
foo: str
class ExtendedProductPage(AutoProductPage, Returns[ExtendedProduct]):
@field
async def name(self):
return f"{self.product.brand.name} {self.product.name}"
@field
async def foo(self):
return "bar"
Field processors
================

Expand Down
2 changes: 1 addition & 1 deletion tests/test_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ def test_metadata():
obj_name[:-4]
for obj_name in zyte_common_items.__dict__
if (
not obj_name.startswith("Base")
not (obj_name.startswith("Base") or obj_name.startswith("Auto"))
and obj_name.endswith("Page")
and obj_name != "Page"
)
Expand Down
17 changes: 13 additions & 4 deletions tests/test_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,17 @@ def validate_input(self):


def test_page_pairs():
"""For every page a base page, for every base page a page."""
"""For every page a base page and an auto page, and the other way around."""
pages = {
obj_name
for obj_name in zyte_common_items.__dict__
if (
not obj_name.startswith("Base")
not (obj_name.startswith("Base") or obj_name.startswith("Auto"))
and obj_name.endswith("Page")
and obj_name != "Page"
)
}

actual_base_pages = {
obj_name
for obj_name in zyte_common_items.__dict__
Expand All @@ -177,14 +178,22 @@ def test_page_pairs():
expected_base_pages = {f"Base{page}" for page in pages}
assert actual_base_pages == expected_base_pages

actual_auto_pages = {
obj_name
for obj_name in zyte_common_items.__dict__
if (obj_name.startswith("Auto") and obj_name.endswith("Page"))
}
expected_auto_pages = {f"Auto{page}" for page in pages}
assert actual_auto_pages == expected_auto_pages


def test_matching_items():
"""For every page, an item."""
pages = {
obj_name
for obj_name in zyte_common_items.__dict__
if (
not obj_name.startswith("Base")
not (obj_name.startswith("Base") or obj_name.startswith("Auto"))
and obj_name.endswith("Page")
and obj_name != "Page"
)
Expand Down Expand Up @@ -257,7 +266,7 @@ def test_metadata():
obj_name
for obj_name in zyte_common_items.__dict__
if (
not obj_name.startswith("Base")
not (obj_name.startswith("Base") or obj_name.startswith("Auto"))
and obj_name.endswith("Page")
and obj_name != "Page"
)
Expand Down
47 changes: 47 additions & 0 deletions tests/test_pages_auto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from copy import copy

import attrs
import pytest
from web_poet import Returns, field

from zyte_common_items import AutoProductPage, Product

from .test_items import _PRODUCT_ALL_KWARGS


@pytest.mark.asyncio
async def test_product_unmodified():
input_product = Product(**_PRODUCT_ALL_KWARGS)
page = AutoProductPage(product=input_product)
assert await page.to_item() == input_product


@pytest.mark.asyncio
async def test_product_modified():
class CustomProductPage(AutoProductPage):
@field
async def name(self):
return f"{self.product.brand.name} {self.product.name}"

input_product = Product(**_PRODUCT_ALL_KWARGS)
page = CustomProductPage(product=input_product)
expected_product = copy(input_product)
expected_product.name = f"{input_product.brand.name} {input_product.name}"
assert await page.to_item() == expected_product


@pytest.mark.asyncio
async def test_product_extended():
@attrs.define
class ExtendedProduct(Product):
foo: str

class CustomProductPage(AutoProductPage, Returns[ExtendedProduct]):
@field
async def foo(self):
return "bar"

input_product = Product(**_PRODUCT_ALL_KWARGS)
page = CustomProductPage(product=input_product)
expected_product = ExtendedProduct(**_PRODUCT_ALL_KWARGS, foo="bar")
assert await page.to_item() == expected_product
1 change: 1 addition & 0 deletions zyte_common_items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
ArticleListPage,
ArticleNavigationPage,
ArticlePage,
AutoProductPage,
BaseArticleListPage,
BaseArticleNavigationPage,
BaseArticlePage,
Expand Down
132 changes: 131 additions & 1 deletion zyte_common_items/pages.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import html
from datetime import datetime
from typing import Any, Generic, Optional, Type, TypeVar, Union
from typing import Any, Generic, List, Optional, Type, TypeVar, Union

import attrs
import html_text
Expand All @@ -13,10 +13,16 @@
from web_poet.utils import ensure_awaitable, get_generic_param

from .components import (
AdditionalProperty,
AggregateRating,
ArticleListMetadata,
ArticleMetadata,
ArticleNavigationMetadata,
Brand,
Breadcrumb,
BusinessPlaceMetadata,
Gtin,
Image,
JobPostingMetadata,
ProductListMetadata,
ProductMetadata,
Expand All @@ -33,6 +39,7 @@
Product,
ProductList,
ProductNavigation,
ProductVariant,
RealEstate,
)
from .processors import (
Expand Down Expand Up @@ -370,3 +377,126 @@ class RealEstatePage(Page, Returns[RealEstate], HasMetadata[RealEstateMetadata])
class Processors(Page.Processors):
breadcrumbs = [breadcrumbs_processor]
description = [description_processor]


@attrs.define
class AutoProductPage(
_BasePage,
DescriptionMixin,
PriceMixin,
Returns[Product],
HasMetadata[ProductMetadata],
):
product: Product

class Processors(_BasePage.Processors):
brand = [brand_processor]
breadcrumbs = [breadcrumbs_processor]
description = [description_processor]
descriptionHtml = [description_html_processor]
price = [price_processor]
regularPrice = [simple_price_processor]

@field
async def additionalProperties(self) -> Optional[List[AdditionalProperty]]:
return self.product.additionalProperties

@field
async def aggregateRating(self) -> Optional[AggregateRating]:
return self.product.aggregateRating

@field
async def availability(self) -> Optional[str]:
return self.product.availability

@field
async def brand(self) -> Optional[Brand]:
return self.product.brand

@field
async def breadcrumbs(self) -> Optional[List[Breadcrumb]]:
return self.product.breadcrumbs

@field
async def canonicalUrl(self) -> Optional[str]:
return self.product.canonicalUrl

@field
async def color(self) -> Optional[str]:
return self.product.color

@field
async def currency(self) -> Optional[str]:
return self.product.currency

@field
async def currencyRaw(self) -> Optional[str]:
return self.product.currencyRaw

@field
async def description(self) -> Optional[str]:
return self.product.description

@field
async def descriptionHtml(self) -> Optional[str]:
return self.product.descriptionHtml

@field
async def features(self) -> Optional[List[str]]:
return self.product.features

@field
async def gtin(self) -> Optional[List[Gtin]]:
return self.product.gtin

@field
async def images(self) -> Optional[List[Image]]:
return self.product.images

@field
async def mainImage(self) -> Optional[Image]:
return self.product.mainImage

@field
async def metadata(self) -> Optional[ProductMetadata]:
return self.product.metadata

@field
async def mpn(self) -> Optional[str]:
return self.product.mpn

@field
async def name(self) -> Optional[str]:
return self.product.name

@field
async def price(self) -> Optional[str]:
return self.product.price

@field
async def productId(self) -> Optional[str]:
return self.product.productId

@field
async def regularPrice(self) -> Optional[str]:
return self.product.regularPrice

@field
async def size(self) -> Optional[str]:
return self.product.size

@field
async def sku(self) -> Optional[str]:
return self.product.sku

@field
async def style(self) -> Optional[str]:
return self.product.style

@field
async def url(self) -> str:
return self.product.url

@field
async def variants(self) -> Optional[List[ProductVariant]]:
return self.product.variants

0 comments on commit 812bd8e

Please sign in to comment.