-
Notifications
You must be signed in to change notification settings - Fork 0
/
elfeed-paginate.el
199 lines (175 loc) · 8.67 KB
/
elfeed-paginate.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
;;; elfeed-paginate.el --- Paginate Elfeed feeds -*- lexical-binding: t -*-
;; Copyright (C) 2024 Jim Porter
;; Author: Jim Porter
;; Version: 0.0.1-pre
;; Keywords: feed, rss
;; Package-Requires: ((emacs "29.1") (elfeed))
;; This file is NOT part of GNU Emacs.
;; This program is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the Free
;; Software Foundation, either version 3 of the License, or (at your option)
;; any later version.
;; This program is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
;; more details.
;; You should have received a copy of the GNU General Public License along with
;; this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; elfeed-paginate adds the ability to retrieve multiple pages of results for
;; a web feed. Currently, it supports pagination using RFC 5005 links and
;; for WordPress feeds.
;;; Code:
(require 'elfeed)
(defgroup elfeed-paginate nil
"Add support for fetching multiple pages of feeds with Elfeed."
:group 'comm)
(defcustom elfeed-paginate-max-pages 5
"The maximum number of feed pages to fetch when updating."
:type 'natnum)
(defcustom elfeed-paginate-next-page-url-hook
'(elfeed-paginate-next-page-url-link-rel
elfeed-paginate-next-page-url-wordpress)
"A list of functions to get the \"next\" page for a feed URL.
Each function should take the current page's URL and its XML.
Elfeed will use the first non-nil result. If the result is a
symbol, this means that there is no next page."
:type 'hook)
(defun elfeed-paginate-next-page-url-link-rel (_url xml _feed)
"Get the next page of the feed using RFC 5005.
This is a <link> tag, with a rel of either \"prev-archive\" or \"next\"."
(or (xml-query '((feed rss) link [rel "prev-archive"] :href) xml)
(xml-query '((feed rss) link [rel "next"] :href) xml)))
(defun elfeed-paginate-next-page-url-wordpress (url xml feed)
"Get the next page of a WordPress feed.
This detects WordPress feeds via the <generator> tag or the
`:generator' meta key on the Elfeed feed object."
(when (or (eq (elfeed-meta feed :generator) 'wordpress)
(when-let ((generator (xml-query* (rss channel generator *) xml))
(genurl (url-generic-parse-url generator)))
(string= (url-host genurl) "wordpress.org")))
(let* ((urlobj (url-generic-parse-url url))
(path-and-query (url-path-and-query urlobj))
(query (when (cdr path-and-query)
(url-parse-query-string (cdr path-and-query))))
(page (string-to-number (car (alist-get "paged" query '("1")
nil #'equal)))))
(setf (alist-get "paged" query nil nil #'equal)
(list (number-to-string (1+ page))))
(setf (url-filename urlobj) (concat (car path-and-query) "?"
(url-build-query-string query)))
(url-recreate-url urlobj))))
(defun elfeed-paginate-next-page-url (url xml feed)
"Return the next page of the feed for URL.
XML is the current page's XML as an S-expr. FEED is the Elfeed
feed object."
(let ((url (run-hook-with-args-until-success
'elfeed-paginate-next-page-url-hook url xml feed)))
(when (stringp url) url)))
(defmacro elfeed-paginate-with-fetch (url last-modified etag &rest body)
"Asynchronously run BODY in a buffer with the contents from URL.
This macro is anaphoric, with STATUS referring to the status from
`url-retrieve'/cURL and USE-CURL being the original invoked-value
of `elfeed-use-curl'."
(declare (indent 3))
`(let* ((use-curl elfeed-use-curl) ; capture current value in closure
(cb (lambda (status) ,@body))
(last-modified ,last-modified)
(etag ,etag))
(if elfeed-use-curl
(let ((headers `(("User-Agent" . ,elfeed-user-agent))))
(when etag
(push `("If-None-Match" . ,etag) headers))
(when last-modified
(push `("If-Modified-Since" . ,last-modified) headers))
(elfeed-curl-enqueue ,url cb :headers headers))
(url-queue-retrieve ,url cb () t t))))
(defun elfeed-paginate--update-feed (feed url &optional since etag depth)
"Update a specific FEED.
URL is the URL to fetch (possibly a subsequent page for the
feed). If non-nil, SINCE should be the `:last-modified' value
for the feed, in string form. Similarly, if non-nil, ETAG should
be the `:etag' value for the feed.
DEPTH increases by one for each nested call to this function; it
will continue calling itself for the next page until it finds a
post older than SINCE, runs out of posts, or DEPTH reaches
`elfeed-paginate-max-pages'."
(setq depth (or depth 1))
(elfeed-paginate-with-fetch url since etag
(if (elfeed-is-status-error status use-curl)
(let ((print-escape-newlines t))
(elfeed-handle-http-error
url (if use-curl elfeed-curl-error-message status)))
(condition-case error
(progn
(unless use-curl
(elfeed-move-to-first-empty-line)
(set-buffer-multibyte t))
(unless (eql elfeed-curl-status-code 304)
(when (= depth 1)
;; Update Last-Modified and Etag
(setf (elfeed-meta feed :last-modified)
(cdr (assoc "last-modified" elfeed-curl-headers))
(elfeed-meta feed :etag)
(cdr (assoc "etag" elfeed-curl-headers)))
(if (equal url elfeed-curl-location)
(setf (elfeed-meta feed :canonical-url) nil)
(setf (elfeed-meta feed :canonical-url)
elfeed-curl-location)))
(let* ((feed-id (elfeed-feed-id feed))
(original-title (elfeed-feed-title feed))
(xml (elfeed-xml-parse-region (point) (point-max)))
(entries
(cl-case (elfeed-feed-type xml)
(:atom (elfeed-entries-from-atom feed-id xml))
(:rss (elfeed-entries-from-rss feed-id xml))
(:rss1.0 (elfeed-entries-from-rss1.0 feed-id xml))
(otherwise (error (elfeed-handle-parse-error
url "Unknown feed type."))))))
(when (> depth 1)
(setf (elfeed-feed-title feed) original-title))
(elfeed-db-add entries)
(if-let (entries
((< depth elfeed-paginate-max-pages))
((or (null since)
(< (elfeed-float-time since)
(elfeed-entry-date (car (last entries))))))
(next-url (elfeed-paginate-next-page-url
url xml feed)))
;; Update the next page of the feed; never send the etag,
;; since that's only for the newest page.
(elfeed-paginate--update-feed
feed next-url since nil (1+ depth))
(run-hook-with-args 'elfeed-update-hooks url)))))
(error (elfeed-handle-parse-error url error))))
(unless use-curl
(kill-buffer))))
(defun elfeed-paginate-update-feed (url)
"Update a specific feed."
(interactive (list (completing-read "Feed: " (elfeed-feed-list))))
(unless elfeed--inhibit-update-init-hooks
(run-hooks 'elfeed-update-init-hooks))
(let ((feed (elfeed-db-get-feed url)))
(elfeed-paginate--update-feed feed url (elfeed-meta feed :last-modified)
(elfeed-meta feed :etag))))
(defun elfeed-paginate-backfill (url &optional pages)
"Backfill a specific feed.
PAGES (interactively, the prefix arg) is the maximum number of
pages to backfill."
(interactive
(list (completing-read "Feed: " (elfeed-feed-list))
(when current-prefix-arg (prefix-numeric-value current-prefix-arg))))
(let ((elfeed-paginate-max-pages (or pages elfeed-paginate-max-pages))
(feed (elfeed-db-get-feed url)))
(unless elfeed--inhibit-update-init-hooks
(run-hooks 'elfeed-update-init-hooks))
(elfeed-paginate--update-feed feed url)))
;;;###autoload
(defun elfeed-paginate ()
"Initialize Elfeed pagination."
(elfeed-log 'info "elfeed-paginate enabled")
(advice-add 'elfeed-update-feed :override #'elfeed-paginate-update-feed))
(defun elfeed-paginate-unload-function ()
(advice-remove 'elfeed-update-feed #'elfeed-paginate-update-feed))
(provide 'elfeed-paginate)
;;; elfeed-paginate.el ends here