Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix pagination #1

Merged
merged 11 commits into from
Sep 8, 2024
35 changes: 35 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,41 @@ for book in books:
print(highlight.text)
```

#### Export All Highlights

```python
from readwise import Readwise

client = Readwise(token='<your_api_token>')

# Get all of a user's books/highlights from all time
all_data = client.export_highlights()

# Later, if you want to get new highlights updated since your last fetch of allData, do this.
last_fetch_was_at = datetime.datetime.now() - datetime.timedelta(days=1) # use your own stored date
new_data = client.export_highlights(last_fetch_was_at.isoformat())
```

#### Daily Review Highlights

Get the daily review details and highlights

```python
daily_review = client.get_daily_review()

completed = daily_review.review_completed # True or False
print(completed) # True or False

highlights = daily_review.highlights
```

or a generator of only the highlights.

```python
for highlight in client.get_daily_review_highlights():
print(highlight.text)
```

### Readwise Readwise API

```python
Expand Down
170 changes: 158 additions & 12 deletions readwise/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
from requests.models import ChunkedEncodingError

from readwise.models import (
DailyReviewHighlight,
ReadwiseBook,
ReadwiseDailyReview,
ReadwiseExportHighlight,
ReadwiseExportResults,
ReadwiseHighlight,
ReadwiseReaderDocument,
ReadwiseTag,
Expand Down Expand Up @@ -203,19 +207,161 @@ def _get_pagination(
Yields:
dict: Response data
"""
page = 1
while True:
response = getattr(self, get_method)(
endpoint, params={"page": page, "page_size": page_size, **params}
)
data = response.json()
yield data
if type(data) == list or not data.get("next"):
break
page += 1
if endpoint == "/export/":
pageCursor = None
while True:
if pageCursor:
params.update({"pageCursor": pageCursor})
logging.debug(f'Getting page with cursor "{pageCursor}"')
try:
response = getattr(self, get_method)(endpoint, params=params)
except ChunkedEncodingError:
logging.error(f'Error getting page with cursor "{pageCursor}"')
sleep(5)
continue
data = response.json()
yield data
if (
isinstance(data, list)
or not data.get("nextPageCursor")
or data.get("nextPageCursor") == pageCursor
):
break
pageCursor = data.get("nextPageCursor")
else:
page = 1
while True:
response = getattr(self, get_method)(
endpoint, params={"page": page, "page_size": page_size, **params}
)
data = response.json()
yield data
if isinstance(data, list) or not data.get("next"):
break
page += 1

def get_daily_review(self) -> ReadwiseDailyReview:
"""Get Readwise Daily Review.

Returns:
A ReadwiseDailyReview object
"""
return ReadwiseDailyReview(**self.get("/review/").json())

def get_daily_review_highlights(
self,
) -> Generator[DailyReviewHighlight, None, None]:
"""Get Readwise Daily Review.

Yields:
A generator of ReadwiseDailyReview objects
"""
daily_review = self.get_daily_review()
for highlight in daily_review.highlights:
yield DailyReviewHighlight(**highlight)

def export_highlights(
self, updated_after: str = None, ids: list[str] = None
) -> Generator[ReadwiseExportResults, None, None]:
"""
Export all highlights from Readwise.

Args:
updated_after: date highlight was last updated
ids: A list of book ids
Yields:
A generator of ReadwiseExportResults objects
"""
params = {}
if updated_after:
params["updatedAfter"] = updated_after
if ids:
params["ids"] = ",".join(_id for _id in ids)
for data in self.get_pagination_limit_20("/export/", params):
for book in data["results"]:
book_tags = [ReadwiseTag(**book_tag) for book_tag in book["book_tags"]]

highlights = [
ReadwiseExportHighlight(
tags=[ReadwiseTag(**tag) for tag in highlight["tags"]],
**{
key: value
for key, value in highlight.items()
if key != "tags"
},
)
for highlight in book["highlights"]
]

yield ReadwiseExportResults(
**{
key: value
for key, value in book.items()
if key not in ["book_tags", "highlights"]
},
book_tags=book_tags,
highlights=highlights,
)

def get_highlights(
self,
book_ids: list[str] = None,
updated_after: datetime = None,
updated_before: datetime = None,
highlighted_at_after: datetime = None,
highlighted_at_before: datetime = None,
) -> Generator[ReadwiseHighlight, None, None]:
"""
Get all Readwise highlights.

Args:
book_id: Readwise book ID
updated_after: Date and time the highlight was last updated
updated_before: Date and time the highlight was last updated
highlighted_after: Date and time the highlight was created
highlighted_before: Date and time the highlight was created

Returns:
A generator of ReadwiseHighlight objects
"""
params = {}
if book_ids:
params["book_id"] = ", ".join(book_ids)
if updated_after:
params["updated__lt"] = updated_after.isoformat()
if updated_before:
params["updated__gt"] = updated_before.isoformat()
if highlighted_at_after:
params["highlighted_at__lt"] = highlighted_at_after.isoformat()
if highlighted_at_before:
params["highlighted_at__gt"] = highlighted_at_before.isoformat()

for data in self.get_pagination_limit_20("/highlights/", params):
for highlight in data["results"]:
yield ReadwiseHighlight(
id=highlight["id"],
text=highlight["text"],
note=highlight["note"],
location=highlight["location"],
location_type=highlight["location_type"],
highlighted_at=datetime.fromisoformat(highlight["highlighted_at"])
if highlight["highlighted_at"]
else None,
url=highlight["url"],
color=highlight["color"],
updated=datetime.fromisoformat(highlight["updated"])
if highlight["updated"]
else None,
book_id=highlight["book_id"],
tags=[
ReadwiseTag(id=tag["id"], name=tag["name"])
for tag in highlight["tags"]
],
)

def get_books(
self, category: Literal["articles", "books", "tweets", "podcasts"]
self,
category: Literal["articles", "books", "tweets", "podcasts", "supplementals"],
) -> Generator[ReadwiseBook, None, None]:
"""
Get all Readwise books.
Expand Down Expand Up @@ -484,7 +630,7 @@ def _get_pagination(
data = response.json()
yield data
if (
type(data) == list
isinstance(data, list)
or not data.get("nextPageCursor")
or data.get("nextPageCursor") == pageCursor
):
Expand Down
Loading