-
Notifications
You must be signed in to change notification settings - Fork 34
/
protocol.py
1807 lines (1495 loc) · 69 KB
/
protocol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Base protocol class and common code."""
import copy
from datetime import datetime, timedelta
import logging
import os
import re
from threading import Lock
from urllib.parse import urljoin, urlparse
from cachetools import cached, LRUCache
from flask import request
from google.cloud import ndb
from google.cloud.ndb import OR
from google.cloud.ndb.model import _entity_to_protobuf
from granary import as1, as2
from granary.source import html_to_text
from oauth_dropins.webutil.appengine_info import DEBUG
from oauth_dropins.webutil.flask_util import cloud_tasks_only
from oauth_dropins.webutil import models
from oauth_dropins.webutil import util
from oauth_dropins.webutil.util import json_dumps, json_loads
from requests import RequestException
import werkzeug.exceptions
from werkzeug.exceptions import BadGateway, HTTPException
import common
from common import (
DOMAIN_BLOCKLIST,
DOMAIN_RE,
DOMAINS,
PRIMARY_DOMAIN,
PROTOCOL_DOMAINS,
report_error,
subdomain_wrap,
)
import dms
import ids
from ids import (
BOT_ACTOR_AP_IDS,
normalize_user_id,
translate_object_id,
translate_user_id,
)
from models import (
DM,
Follower,
Object,
PROTOCOLS,
PROTOCOLS_BY_KIND,
Target,
User,
)
OBJECT_REFRESH_AGE = timedelta(days=30)
# require a follow for users on these domains before we deliver anything from
# them other than their profile
LIMITED_DOMAINS = (os.getenv('LIMITED_DOMAINS', '').split()
or util.load_file_lines('limited_domains'))
logger = logging.getLogger(__name__)
def error(*args, status=299, **kwargs):
"""Default HTTP status code to 299 to prevent retrying task."""
return common.error(*args, status=status, **kwargs)
class ErrorButDoNotRetryTask(HTTPException):
code = 299
description = 'ErrorButDoNotRetryTask'
# https://github.com/pallets/flask/issues/1837#issuecomment-304996942
werkzeug.exceptions.default_exceptions.setdefault(299, ErrorButDoNotRetryTask)
werkzeug.exceptions._aborter.mapping.setdefault(299, ErrorButDoNotRetryTask)
def activity_id_memcache_key(id):
return common.memcache_key(f'receive-{id}')
class Protocol:
"""Base protocol class. Not to be instantiated; classmethods only.
Attributes:
LABEL (str): human-readable lower case name
OTHER_LABELS (list of str): label aliases
ABBREV (str): lower case abbreviation, used in URL paths
PHRASE (str): human-readable name or phrase. Used in phrases like
``Follow this person on {PHRASE}``
LOGO_HTML (str): logo emoji or ``<img>`` tag
CONTENT_TYPE (str): MIME type of this protocol's native data format,
appropriate for the ``Content-Type`` HTTP header.
HAS_COPIES (bool): whether this protocol is push and needs us to
proactively create "copy" users and objects, as opposed to pulling
converted objects on demand
REQUIRES_AVATAR (bool): whether accounts on this protocol are required
to have a profile picture. If they don't, their ``User.status`` will be
``blocked``.
REQUIRES_NAME (bool): whether accounts on this protocol are required to
have a profile name that's different than their handle or id. If they
don't, their ``User.status`` will be ``blocked``.
REQUIRES_OLD_ACCOUNT: (bool): whether accounts on this protocol are
required to be at least :const:`common.OLD_ACCOUNT_AGE` old. If their
profile includes creation date and it's not old enough, their
``User.status`` will be ``blocked``.
DEFAULT_ENABLED_PROTOCOLS (sequence of str): labels of other protocols
that are automatically enabled for this protocol to bridge into
SUPPORTED_AS1_TYPES (sequence of str): AS1 objectTypes and verbs that this
protocol supports receiving and sending.
SUPPORTS_DMS (bool): whether this protocol can receive DMs (chat messages)
"""
ABBREV = None
PHRASE = None
OTHER_LABELS = ()
LOGO_HTML = ''
CONTENT_TYPE = None
HAS_COPIES = False
REQUIRES_AVATAR = False
REQUIRES_NAME = False
REQUIRES_OLD_ACCOUNT = False
DEFAULT_ENABLED_PROTOCOLS = ()
SUPPORTED_AS1_TYPES = ()
SUPPORTS_DMS = False
def __init__(self):
assert False
@classmethod
@property
def LABEL(cls):
return cls.__name__.lower()
@staticmethod
def for_request(fed=None):
"""Returns the protocol for the current request.
...based on the request's hostname.
Args:
fed (str or protocol.Protocol): protocol to return if the current
request is on ``fed.brid.gy``
Returns:
Protocol: protocol, or None if the provided domain or request hostname
domain is not a subdomain of ``brid.gy`` or isn't a known protocol
"""
return Protocol.for_bridgy_subdomain(request.host, fed=fed)
@staticmethod
def for_bridgy_subdomain(domain_or_url, fed=None):
"""Returns the protocol for a brid.gy subdomain.
Args:
domain_or_url (str)
fed (str or protocol.Protocol): protocol to return if the current
request is on ``fed.brid.gy``
Returns:
class: :class:`Protocol` subclass, or None if the provided domain or request
hostname domain is not a subdomain of ``brid.gy`` or isn't a known
protocol
"""
domain = (util.domain_from_link(domain_or_url, minimize=False)
if util.is_web(domain_or_url)
else domain_or_url)
if domain == common.PRIMARY_DOMAIN or domain in common.LOCAL_DOMAINS:
return PROTOCOLS[fed] if isinstance(fed, str) else fed
elif domain and domain.endswith(common.SUPERDOMAIN):
label = domain.removesuffix(common.SUPERDOMAIN)
return PROTOCOLS.get(label)
@classmethod
def owns_id(cls, id):
"""Returns whether this protocol owns the id, or None if it's unclear.
To be implemented by subclasses.
IDs are string identities that uniquely identify users, and are intended
primarily to be machine readable and usable. Compare to handles, which
are human-chosen, human-meaningful, and often but not always unique.
Some protocols' ids are more or less deterministic based on the id
format, eg AT Protocol owns ``at://`` URIs. Others, like http(s) URLs,
could be owned by eg Web or ActivityPub.
This should be a quick guess without expensive side effects, eg no
external HTTP fetches to fetch the id itself or otherwise perform
discovery.
Returns False if the id's domain is in :const:`common.DOMAIN_BLOCKLIST`.
Args:
id (str)
Returns:
bool or None:
"""
return False
@classmethod
def owns_handle(cls, handle, allow_internal=False):
"""Returns whether this protocol owns the handle, or None if it's unclear.
To be implemented by subclasses.
Handles are string identities that are human-chosen, human-meaningful,
and often but not always unique. Compare to IDs, which uniquely identify
users, and are intended primarily to be machine readable and usable.
Some protocols' handles are more or less deterministic based on the id
format, eg ActivityPub (technically WebFinger) handles are
``@user@instance.com``. Others, like domains, could be owned by eg Web,
ActivityPub, AT Protocol, or others.
This should be a quick guess without expensive side effects, eg no
external HTTP fetches to fetch the id itself or otherwise perform
discovery.
Args:
handle (str)
allow_internal (bool): whether to return False for internal domains
like ``fed.brid.gy``, ``bsky.brid.gy``, etc
Returns:
bool or None
"""
return False
@classmethod
def handle_to_id(cls, handle):
"""Converts a handle to an id.
To be implemented by subclasses.
May incur network requests, eg DNS queries or HTTP requests. Avoids
blocked or opted out users.
Args:
handle (str)
Returns:
str: corresponding id, or None if the handle can't be found
"""
raise NotImplementedError()
@classmethod
def key_for(cls, id, allow_opt_out=False):
"""Returns the :class:`google.cloud.ndb.Key` for a given id's :class:`models.User`.
To be implemented by subclasses. Canonicalizes the id if necessary.
If called via `Protocol.key_for`, infers the appropriate protocol with
:meth:`for_id`. If called with a concrete subclass, uses that subclass
as is.
Args:
id (str):
allow_opt_out (bool): whether to allow users who are currently opted out
Returns:
google.cloud.ndb.Key: matching key, or None if the given id is not a
valid :class:`User` id for this protocol.
"""
if cls == Protocol:
proto = Protocol.for_id(id)
return proto.key_for(id, allow_opt_out=allow_opt_out) if proto else None
# load user so that we follow use_instead
existing = cls.get_by_id(id, allow_opt_out=True)
if existing:
if existing.status and not allow_opt_out:
return None
return existing.key
return cls(id=id).key
@cached(LRUCache(20000), lock=Lock())
@staticmethod
def for_id(id, remote=True):
"""Returns the protocol for a given id.
Args:
id (str)
remote (bool): whether to perform expensive side effects like fetching
the id itself over the network, or other discovery.
Returns:
Protocol subclass: matching protocol, or None if no single known
protocol definitively owns this id
"""
logger.debug(f'Determining protocol for id {id}')
if not id:
return None
if util.is_web(id):
# step 1: check for our per-protocol subdomains
try:
is_homepage = urlparse(id).path.strip('/') == ''
except ValueError as e:
logger.info(f'urlparse ValueError: {e}')
return None
by_subdomain = Protocol.for_bridgy_subdomain(id)
if by_subdomain and not is_homepage and id not in BOT_ACTOR_AP_IDS:
logger.debug(f' {by_subdomain.LABEL} owns id {id}')
return by_subdomain
# step 2: check if any Protocols say conclusively that they own it
# sort to be deterministic
protocols = sorted(set(p for p in PROTOCOLS.values() if p),
key=lambda p: p.LABEL)
candidates = []
for protocol in protocols:
owns = protocol.owns_id(id)
if owns:
logger.debug(f' {protocol.LABEL} owns id {id}')
return protocol
elif owns is not False:
candidates.append(protocol)
if len(candidates) == 1:
logger.debug(f' {candidates[0].LABEL} owns id {id}')
return candidates[0]
# step 3: look for existing Objects in the datastore
obj = Protocol.load(id, remote=False)
if obj and obj.source_protocol:
logger.debug(f' {obj.key.id()} owned by source_protocol {obj.source_protocol}')
return PROTOCOLS[obj.source_protocol]
# step 4: fetch over the network, if necessary
if not remote:
return None
for protocol in candidates:
logger.debug(f'Trying {protocol.LABEL}')
try:
if protocol.load(id, local=False, remote=True):
logger.debug(f' {protocol.LABEL} owns id {id}')
return protocol
except BadGateway:
# we tried and failed fetching the id over the network.
# this depends on ActivityPub.fetch raising this!
return None
except HTTPException as e:
# internal error we generated ourselves; try next protocol
pass
except Exception as e:
code, _ = util.interpret_http_exception(e)
if code:
# we tried and failed fetching the id over the network
return None
raise
logger.info(f'No matching protocol found for {id} !')
return None
@cached(LRUCache(20000), lock=Lock())
@staticmethod
def for_handle(handle):
"""Returns the protocol for a given handle.
May incur expensive side effects like resolving the handle itself over
the network or other discovery.
Args:
handle (str)
Returns:
(Protocol subclass, str) tuple: matching protocol and optional id (if
resolved), or ``(None, None)`` if no known protocol owns this handle
"""
# TODO: normalize, eg convert domains to lower case
logger.debug(f'Determining protocol for handle {handle}')
if not handle:
return (None, None)
# step 1: check if any Protocols say conclusively that they own it.
# sort to be deterministic.
protocols = sorted(set(p for p in PROTOCOLS.values() if p),
key=lambda p: p.LABEL)
candidates = []
for proto in protocols:
owns = proto.owns_handle(handle)
if owns:
logger.debug(f' {proto.LABEL} owns handle {handle}')
return (proto, None)
elif owns is not False:
candidates.append(proto)
if len(candidates) == 1:
logger.debug(f' {candidates[0].LABEL} owns handle {handle}')
return (candidates[0], None)
# step 2: look for matching User in the datastore
for proto in candidates:
user = proto.query(proto.handle == handle).get()
if user:
if user.status:
return (None, None)
logger.debug(f' user {user.key} handle {handle}')
return (proto, user.key.id())
# step 3: resolve handle to id
for proto in candidates:
id = proto.handle_to_id(handle)
if id:
logger.debug(f' {proto.LABEL} resolved handle {handle} to id {id}')
return (proto, id)
logger.info(f'No matching protocol found for handle {handle} !')
return (None, None)
@classmethod
def bridged_web_url_for(cls, user, fallback=False):
"""Returns the web URL for a user's bridged profile in this protocol.
For example, for Web user ``alice.com``, :meth:`ATProto.bridged_web_url_for`
returns ``https://bsky.app/profile/alice.com.web.brid.gy``
Args:
user (models.User)
fallback (bool): if True, and bridged users have no canonical user
profile URL in this protocol, return the native protocol's profile URL
Returns:
str, or None if there isn't a canonical URL
"""
if fallback:
return user.web_url()
@classmethod
def actor_key(cls, obj, allow_opt_out=False):
"""Returns the :class:`User`: key for a given object's author or actor.
Args:
obj (models.Object)
allow_opt_out (bool): whether to return a user key if they're opted out
Returns:
google.cloud.ndb.key.Key or None:
"""
owner = as1.get_owner(obj.as1)
if owner:
return cls.key_for(owner, allow_opt_out=allow_opt_out)
@classmethod
def bot_user_id(cls):
"""Returns the Web user id for the bot user for this protocol.
For example, ``'bsky.brid.gy'`` for ATProto.
Returns:
str:
"""
return f'{cls.ABBREV}{common.SUPERDOMAIN}'
@classmethod
def create_for(cls, user):
"""Creates or re-activate a copy user in this protocol.
Should add the copy user to :attr:`copies`.
If the copy user already exists and active, should do nothing.
Args:
user (models.User): original source user. Shouldn't already have a
copy user for this protocol in :attr:`copies`.
Raises:
ValueError: if we can't create a copy of the given user in this protocol
"""
raise NotImplementedError()
@classmethod
def send(to_cls, obj, url, from_user=None, orig_obj_id=None):
"""Sends an outgoing activity.
To be implemented by subclasses.
NOTE: if this protocol's ``HAS_COPIES`` is True, and this method creates
a copy and sends it, it *must* add that copy to the *object*'s (not
activity's) :attr:`copies`!
Args:
obj (models.Object): with activity to send
url (str): destination URL to send to
from_user (models.User): user (actor) this activity is from
orig_obj_id (str): :class:`models.Object` key id of the "original object"
that this object refers to, eg replies to or reposts or likes
Returns:
bool: True if the activity is sent successfully, False if it is
ignored or otherwise unsent due to protocol logic, eg no webmention
endpoint, protocol doesn't support the activity type. (Failures are
raised as exceptions.)
Raises:
werkzeug.HTTPException if the request fails
"""
raise NotImplementedError()
@classmethod
def fetch(cls, obj, **kwargs):
"""Fetches a protocol-specific object and populates it in an :class:`Object`.
Errors are raised as exceptions. If this method returns False, the fetch
didn't fail but didn't succeed either, eg the id isn't valid for this
protocol, or the fetch didn't return valid data for this protocol.
To be implemented by subclasses.
Args:
obj (models.Object): with the id to fetch. Data is filled into one of
the protocol-specific properties, eg ``as2``, ``mf2``, ``bsky``.
kwargs: subclass-specific
Returns:
bool: True if the object was fetched and populated successfully,
False otherwise
Raises:
requests.RequestException or werkzeug.HTTPException: if the fetch fails
"""
raise NotImplementedError()
@classmethod
def convert(cls, obj, from_user=None, **kwargs):
"""Converts an :class:`Object` to this protocol's data format.
For example, an HTML string for :class:`Web`, or a dict with AS2 JSON
and ``application/activity+json`` for :class:`ActivityPub`.
Just passes through to :meth:`_convert`, then does minor
protocol-independent postprocessing.
Args:
obj (models.Object):
from_user (models.User): user (actor) this activity/object is from
kwargs: protocol-specific, passed through to :meth:`_convert`
Returns:
converted object in the protocol's native format, often a dict
"""
if not obj or not obj.as1:
return {}
id = obj.key.id() if obj.key else obj.as1.get('id')
is_activity = obj.as1.get('verb') in ('post', 'update')
base_obj = as1.get_object(obj.as1) if is_activity else obj.as1
orig_our_as1 = obj.our_as1
# mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
if (from_user and base_obj
and base_obj.get('objectType') in as1.ACTOR_TYPES
and PROTOCOLS.get(obj.source_protocol) != cls
and Protocol.for_bridgy_subdomain(id) not in DOMAINS
# Web users are special cased, they don't get the label if they've
# explicitly enabled Bridgy Fed with redirects or webmentions
and not (from_user.LABEL == 'web'
and (from_user.last_webmention_in or from_user.has_redirects))):
obj.our_as1 = copy.deepcopy(obj.as1)
actor = as1.get_object(obj.as1) if is_activity else obj.as1
actor['objectType'] = 'application'
cls.add_source_links(actor=actor, obj=obj, from_user=from_user)
converted = cls._convert(obj, from_user=from_user, **kwargs)
obj.our_as1 = orig_our_as1
return converted
@classmethod
def _convert(cls, obj, from_user=None, **kwargs):
"""Converts an :class:`Object` to this protocol's data format.
To be implemented by subclasses. Implementations should generally call
:meth:`Protocol.translate_ids` (as their own class) before converting to
their format.
Args:
obj (models.Object):
from_user (models.User): user (actor) this activity/object is from
kwargs: protocol-specific
Returns:
converted object in the protocol's native format, often a dict. May
return the ``{}`` empty dict if the object can't be converted.
"""
raise NotImplementedError()
@classmethod
def add_source_links(cls, actor, obj, from_user):
"""Adds "bridged from ... by Bridgy Fed" HTML to ``actor['summary']``.
Default implementation; subclasses may override.
Args:
actor (dict): AS1 actor
obj (models.Object):
from_user (models.User): user (actor) this activity/object is from
"""
assert from_user
summary = actor.setdefault('summary', '')
if 'Bridgy Fed]' in html_to_text(summary, ignore_links=True):
return
id = actor.get('id')
proto_phrase = (PROTOCOLS[obj.source_protocol].PHRASE
if obj.source_protocol else '')
if proto_phrase:
proto_phrase = f' on {proto_phrase}'
if from_user.key and id in (from_user.key.id(), from_user.profile_id()):
source_links = f'[<a href="https://{PRIMARY_DOMAIN}{from_user.user_page_path()}">bridged</a> from <a href="{from_user.web_url()}">{from_user.handle}</a>{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
else:
url = as1.get_url(actor) or id
source = util.pretty_link(url) if url else '?'
source_links = f'[bridged from {source}{proto_phrase} by <a href="https://{PRIMARY_DOMAIN}/">Bridgy Fed</a>]'
if summary:
summary += '<br><br>'
actor['summary'] = summary + source_links
@classmethod
def set_username(to_cls, user, username):
"""Sets a custom username for a user's bridged account in this protocol.
Args:
user (models.User)
username (str)
Raises:
ValueError: if the username is invalid
RuntimeError: if the username could not be set
"""
raise NotImplementedError()
@classmethod
def target_for(cls, obj, shared=False):
"""Returns an :class:`Object`'s delivery target (endpoint).
To be implemented by subclasses.
Examples:
* If obj has ``source_protocol`` ``web``, returns its URL, as a
webmention target.
* If obj is an ``activitypub`` actor, returns its inbox.
* If obj is an ``activitypub`` object, returns it's author's or actor's
inbox.
Args:
obj (models.Object):
shared (bool): optional. If True, returns a common/shared
endpoint, eg ActivityPub's ``sharedInbox``, that can be reused for
multiple recipients for efficiency
Returns:
str: target endpoint, or None if not available.
"""
raise NotImplementedError()
@classmethod
def is_blocklisted(cls, url, allow_internal=False):
"""Returns True if we block the given URL and shouldn't deliver to it.
Default implementation here, subclasses may override.
Args:
url (str):
allow_internal (bool): whether to return False for internal domains
like ``fed.brid.gy``, ``bsky.brid.gy``, etc
"""
blocklist = DOMAIN_BLOCKLIST
if not allow_internal:
blocklist += DOMAINS
return util.domain_or_parent_in(util.domain_from_link(url), blocklist)
@classmethod
def translate_ids(to_cls, obj):
"""Translates all ids in an AS1 object to a specific protocol.
Infers source protocol for each id value separately.
For example, if ``proto`` is :class:`ActivityPub`, the ATProto URI
``at://did:plc:abc/coll/123`` will be converted to
``https://bsky.brid.gy/ap/at://did:plc:abc/coll/123``.
Wraps these AS1 fields:
* ``id``
* ``actor``
* ``author``
* ``bcc``
* ``bto``
* ``cc``
* ``object``
* ``object.actor``
* ``object.author``
* ``object.id``
* ``object.inReplyTo``
* ``attachments[].id``
* ``tags[objectType=mention].url``
* ``to``
This is the inverse of :meth:`models.Object.resolve_ids`. Much of the
same logic is duplicated there!
TODO: unify with :meth:`Object.resolve_ids`,
:meth:`models.Object.normalize_ids`.
Args:
to_proto (Protocol subclass)
obj (dict): AS1 object or activity (not :class:`models.Object`!)
Returns:
dict: wrapped AS1 version of ``obj``
"""
assert to_cls != Protocol
if not obj:
return obj
outer_obj = copy.deepcopy(obj)
inner_objs = outer_obj['object'] = as1.get_objects(outer_obj)
def translate(elem, field, fn, uri=False):
elem[field] = as1.get_objects(elem, field)
for obj in elem[field]:
if id := obj.get('id'):
if field in ('to', 'cc', 'bcc', 'bto') and as1.is_audience(id):
continue
from_cls = Protocol.for_id(id)
# TODO: what if from_cls is None? relax translate_object_id,
# make it a noop if we don't know enough about from/to?
if from_cls and from_cls != to_cls:
obj['id'] = fn(id=id, from_=from_cls, to=to_cls)
if obj['id'] and uri:
obj['id'] = to_cls(id=obj['id']).id_uri()
elem[field] = [o['id'] if o.keys() == {'id'} else o
for o in elem[field]]
if len(elem[field]) == 1:
elem[field] = elem[field][0]
type = as1.object_type(outer_obj)
translate(outer_obj, 'id',
translate_user_id if type in as1.ACTOR_TYPES
else translate_object_id)
for o in inner_objs:
is_actor = (as1.object_type(o) in as1.ACTOR_TYPES
or as1.get_owner(outer_obj) == o.get('id')
or type in ('follow', 'stop-following'))
translate(o, 'id', translate_user_id if is_actor else translate_object_id)
for o in [outer_obj] + inner_objs:
translate(o, 'inReplyTo', translate_object_id)
for field in 'actor', 'author', 'to', 'cc', 'bto', 'bcc':
translate(o, field, translate_user_id)
for tag in as1.get_objects(o, 'tags'):
if tag.get('objectType') == 'mention':
translate(tag, 'url', translate_user_id, uri=True)
for att in as1.get_objects(o, 'attachments'):
translate(att, 'id', translate_object_id)
url = att.get('url')
if url and not att.get('id'):
if from_cls := Protocol.for_id(url):
att['id'] = translate_object_id(from_=from_cls, to=to_cls,
id=url)
outer_obj = util.trim_nulls(outer_obj)
if objs := outer_obj.get('object', []):
outer_obj['object'] = [o['id'] if o.keys() == {'id'} else o for o in objs]
if len(outer_obj['object']) == 1:
outer_obj['object'] = outer_obj['object'][0]
return outer_obj
@classmethod
def receive(from_cls, obj, authed_as=None, internal=False, received_at=None):
"""Handles an incoming activity.
If ``obj``'s key is unset, ``obj.as1``'s id field is used. If both are
unset, returns HTTP 299.
Args:
obj (models.Object)
authed_as (str): authenticated actor id who sent this activity
internal (bool): whether to allow activity ids on internal domains,
from opted out/blocked users, etc.
received_at (datetime): when we first saw (received) this activity.
Right now only used for monitoring.
Returns:
(str, int) tuple: (response body, HTTP status code) Flask response
Raises:
werkzeug.HTTPException: if the request is invalid
"""
# check some invariants
assert from_cls != Protocol
assert isinstance(obj, Object), obj
if not obj.as1:
error('No object data provided')
id = None
if obj.key and obj.key.id():
id = obj.key.id()
if not id:
id = obj.as1.get('id')
obj.key = ndb.Key(Object, id)
if not id:
error('No id provided')
elif from_cls.owns_id(id) is False:
error(f'Protocol {from_cls.LABEL} does not own id {id}')
elif from_cls.is_blocklisted(id, allow_internal=internal):
error(f'Activity {id} is blocklisted')
# check that this activity is public. only do this for some activities,
# not eg likes or follows, since Mastodon doesn't currently mark those
# as explicitly public.
elif (obj.type in set(('post', 'update')) | as1.POST_TYPES | as1.ACTOR_TYPES
and not as1.is_public(obj.as1, unlisted=False)
and not as1.is_dm(obj.as1)):
logger.info('Dropping non-public activity')
return ('OK', 200)
# lease this object, atomically
memcache_key = activity_id_memcache_key(id)
leased = common.memcache.add(memcache_key, 'leased', noreply=False,
expire=5 * 60) # 5 min
# short circuit if we've already seen this activity id.
# (don't do this for bare objects since we need to check further down
# whether they've been updated since we saw them last.)
if (obj.as1.get('objectType') == 'activity'
and 'force' not in request.values
and (not leased
or (obj.new is False and obj.changed is False)
# TODO: how does this make sense? won't these two lines
# always be true?!
or (obj.new is None and obj.changed is None
and from_cls.load(id, remote=False)))):
error(f'Already seen this activity {id}', status=204)
pruned = {k: v for k, v in obj.as1.items()
if k not in ('contentMap', 'replies', 'signature')}
delay = ''
if received_at and request.headers.get('X-AppEngine-TaskRetryCount') == '0':
delay_s = int((util.now().replace(tzinfo=None)
- received_at.replace(tzinfo=None)
).total_seconds())
delay = f'({delay_s} s behind)'
logger.info(f'Receiving {from_cls.LABEL} {obj.type} {id} {delay} AS1: {json_dumps(pruned, indent=2)}')
# does this protocol support this activity/object type?
from_cls.check_supported(obj)
# load actor user, check authorization
# https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
actor = as1.get_owner(obj.as1)
if not actor:
error('Activity missing actor or author')
elif from_cls.owns_id(actor) is False:
error(f"{from_cls.LABEL} doesn't own actor {actor}, this is probably a bridged activity. Skipping.", status=204)
assert authed_as
assert isinstance(authed_as, str)
authed_as = normalize_user_id(id=authed_as, proto=from_cls)
actor = normalize_user_id(id=actor, proto=from_cls)
if actor != authed_as:
report_error("Auth: receive: authed_as doesn't match owner",
user=f'{id} authed_as {authed_as} owner {actor}')
error(f"actor {actor} isn't authed user {authed_as}")
# update copy ids to originals
obj.normalize_ids()
obj.resolve_ids()
if (obj.type == 'follow'
and Protocol.for_bridgy_subdomain(as1.get_object(obj.as1).get('id'))):
# follows of bot user; refresh user profile first
logger.info(f'Follow of bot user, reloading {actor}')
from_user = from_cls.get_or_create(id=actor, allow_opt_out=True)
from_user.reload_profile()
else:
# load actor user
from_user = from_cls.get_or_create(id=actor, allow_opt_out=internal)
if not internal and (not from_user
or from_user.manual_opt_out
# we want to override opt-out but not manual or blocked
or (from_user.status and from_user.status != 'opt-out')):
error(f'Actor {actor} is opted out or blocked', status=204)
# write Object to datastore
orig = obj
obj = Object.get_or_create(id, authed_as=actor, **orig.to_dict())
if orig.new is not None:
obj.new = orig.new
if orig.changed is not None:
obj.changed = orig.changed
# if this is an object, ie not an activity, wrap it in a create or update
obj = from_cls.handle_bare_object(obj, authed_as=authed_as)
obj.add('users', from_user.key)
inner_obj_as1 = as1.get_object(obj.as1)
if obj.type in as1.CRUD_VERBS:
if inner_owner := as1.get_owner(inner_obj_as1):
if inner_owner_key := from_cls.key_for(inner_owner):
obj.add('users', inner_owner_key)
obj.source_protocol = from_cls.LABEL
obj.put()
# store inner object
inner_obj_id = inner_obj_as1.get('id')
if obj.type in ('post', 'update') and inner_obj_as1.keys() > set(['id']):
Object.get_or_create(inner_obj_id, our_as1=inner_obj_as1,
source_protocol=from_cls.LABEL, authed_as=actor)
actor = as1.get_object(obj.as1, 'actor')
actor_id = actor.get('id')
# handle activity!
if obj.type == 'stop-following':
# TODO: unify with handle_follow?
# TODO: handle multiple followees
if not actor_id or not inner_obj_id:
error(f'stop-following requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
# deactivate Follower
from_ = from_cls.key_for(actor_id)
to_cls = Protocol.for_id(inner_obj_id)
to = to_cls.key_for(inner_obj_id)
follower = Follower.query(Follower.to == to,
Follower.from_ == from_,
Follower.status == 'active').get()
if follower:
logger.info(f'Marking {follower} inactive')
follower.status = 'inactive'
follower.put()
else:
logger.warning(f'No Follower found for {from_} => {to}')
# fall through to deliver to followee
# TODO: do we convert stop-following to webmention 410 of original
# follow?
elif obj.type in ('update', 'like', 'share'): # require object
if not inner_obj_id:
error("Couldn't find id of object to update")
# fall through to deliver to followers
elif obj.type in ('delete', 'undo'):
if not inner_obj_id:
error("Couldn't find id of object to delete")
logger.info(f'Marking Object {inner_obj_id} deleted')
Object.get_or_create(inner_obj_id, deleted=True, authed_as=authed_as)
# if this is an actor, handle deleting it later so that
# in case it's from_user, user.enabled_protocols is still populated
#
# fall through to deliver to followers and delete copy if necessary.
# should happen via protocol-specific copy target and send of
# delete activity.
# https://github.com/snarfed/bridgy-fed/issues/63
elif obj.type == 'block':
if proto := Protocol.for_bridgy_subdomain(inner_obj_id):
# blocking protocol bot user disables that protocol
from_user.delete(proto)
from_user.disable_protocol(proto)
return 'OK', 200
elif obj.type == 'post':
# handle DMs to bot users
if as1.is_dm(obj.as1):
return dms.receive(from_user=from_user, obj=obj)
# fetch actor if necessary
if (actor and actor.keys() == set(['id'])
and obj.type not in ('delete', 'undo')):
logger.debug('Fetching actor so we have name, profile photo, etc')
actor_obj = from_cls.load(actor['id'], raise_=False)
if actor_obj and actor_obj.as1:
obj.our_as1 = {**obj.as1, 'actor': actor_obj.as1}
# fetch object if necessary so we can render it in feeds
if (obj.type == 'share'
and inner_obj_as1.keys() == set(['id'])