entry content
+diff --git a/api.py b/api.py index 85d7576e..5150fcb6 100644 --- a/api.py +++ b/api.py @@ -240,10 +240,6 @@ def write_response(self, response, actor=None, url=None, title=None, reader = self.request.get('reader', 'true').lower() if reader not in ('true', 'false'): self.abort(400, 'reader param must be either true or false') - if not actor and hfeed: - actor = microformats2.json_to_object({ - 'properties': hfeed.get('properties', {}), - }) self.response.out.write(atom.activities_to_atom( activities, actor, host_url=url or self.request.host_url + '/', @@ -260,7 +256,7 @@ def write_response(self, response, actor=None, url=None, title=None, title = 'Feed for %s' % url self.response.out.write(rss.from_activities( activities, actor, title=title, - feed_url=self.request.url, hfeed=hfeed, + feed_url=self.request.url, hfeed=hfeed, actor=actor, home_page_url=util.base_url(url))) elif format in ('as1-xml', 'xml'): self.response.out.write(XML_TEMPLATE % util.to_xml(response)) diff --git a/app.py b/app.py index bb16aa2c..cb13bf77 100644 --- a/app.py +++ b/app.py @@ -193,9 +193,10 @@ def get(self): except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % final_url) - mf2 = None + soup = mf2 = None if input == 'html': - mf2 = util.parse_mf2(resp, id=fragment) + soup = util.parse_html(resp) + mf2 = util.parse_mf2(soup, id=fragment) if id and not mf2: raise exc.HTTPBadRequest('Got fragment %s but no element found with that id.' % fragment) elif input in ('mf2-json', 'json-mf2'): @@ -206,18 +207,11 @@ def get(self): mf2.__class__.__name__) mf2.setdefault('rels', {}) # mf2util expects rels - actor = None - title = None - hfeed = None - if mf2: - def fetch_mf2_func(url): - if util.domain_or_parent_in(urllib.parse.urlparse(url).netloc, SILO_DOMAINS): - return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]} - return util.fetch_mf2(url, gateway=True) - + actor = title = hfeed = None + if soup: try: - actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func) - title = microformats2.get_title(mf2) + actor = microformats2.find_feed_author(soup, url=final_url, mf2=mf2) + title = actor.get('displayName') hfeed = mf2util.find_first_entry(mf2, ['h-feed']) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (final_url, input, e)) diff --git a/granary/atom.py b/granary/atom.py index e899f610..f7fa7ec9 100644 --- a/granary/atom.py +++ b/granary/atom.py @@ -324,13 +324,14 @@ def html_to_atom(html, url=None, fetch_author=False, reader=True): if fetch_author: assert url, 'fetch_author=True requires url!' - parsed = util.parse_mf2(html, url=url) - actor = microformats2.find_author(parsed, fetch_mf2_func=util.fetch_mf2) + soup = util.parse_html(html) + actor = microformats2.find_feed_author(soup, url=url) + mf2 = util.parse_mf2(soup, url=url) return activities_to_atom( microformats2.html_to_activities(html, url, actor), actor, - title=microformats2.get_title(parsed), + title=microformats2.html_title(soup), xml_base=util.base_url(url), host_url=url, reader=reader) diff --git a/granary/microformats2.py b/granary/microformats2.py index c47de642..08e9d621 100644 --- a/granary/microformats2.py +++ b/granary/microformats2.py @@ -1055,16 +1055,59 @@ def find_author(parsed, **kwargs): parsed: dict, parsed mf2 object (ie return value from mf2py.parse()) kwargs: passed through to mf2util.find_author() """ - author = mf2util.find_author(parsed, 'http://123', **kwargs) - if author: - photo = author.get('photo') - if isinstance(photo, dict): - photo = photo.get('url') or photo.get('value') - return { - 'displayName': author.get('name'), - 'url': author.get('url'), - 'image': {'url': photo}, - } + return author_to_actor(mf2util.find_author(parsed, 'http://123', **kwargs)) + + +def find_feed_author(soup, url=None, mf2=None): + """Returns the author of a feed page as a ActivityStreams actor dict. + + Args: + soup: :class:`bs4.BeautifulSoup`, parsed HTML page + url: str, optional, URL of feed page + mf2: dict, optional, parsed mf2 object (ie return value from mf2py.parse()) + + Returns: dict, AS actor + """ + actor = {} + + if not mf2: + mf2 = util.parse_mf2(soup) + + feed = mf2util.find_first_entry(mf2, ['h-feed']) + if feed: + author = util.get_first(feed.get('properties', {}), 'author') or feed + actor = author_to_actor(mf2util.parse_author(author), ellipsize=True) + + if not actor.get('displayName'): + actor['displayName'] = html_title(soup) + + if not actor.get('url'): + actor['url'] = url + + return actor + + +def author_to_actor(author, ellipsize=False): + """Converts an mf2 author to an ActivityStreams actor. + + Args: + author: dict, parsed mf2 author object, or None + ellipsize: boolean, whether to ellipsize the name if it's too long + + Returns: dict, AS actor + """ + if not author: + return {} + + prop = first_props(author.get('properties', {})) + photo = prop.get('photo') + if isinstance(photo, dict): + photo = photo.get('url') or photo.get('value') + return { + 'displayName': prop.get('name'), + 'url': prop.get('url'), + 'image': {'url': photo}, + } def get_title(mf2): @@ -1082,6 +1125,19 @@ def get_title(mf2): return '' +def html_title(soup): + """Returns the HTML
entry content
+