diff --git a/.github/workflows/pullrequest_tests.yaml b/.github/workflows/pullrequest_tests.yaml index e0d2cdf9..7bd51122 100644 --- a/.github/workflows/pullrequest_tests.yaml +++ b/.github/workflows/pullrequest_tests.yaml @@ -20,12 +20,12 @@ jobs: - name: Install and test arxiv-auth working-directory: ./arxiv-auth run: | - poetry install + poetry install --with=dev poetry run pytest --cov=arxiv_auth arxiv_auth - name: Install and test accounts working-directory: ./accounts run: | - poetry install + poetry install --with=dev poetry run pytest --cov=accounts accounts - name: linter run: | diff --git a/accounts/accounts/config.py b/accounts/accounts/config.py index 58b2fe8c..25c5da56 100644 --- a/accounts/accounts/config.py +++ b/accounts/accounts/config.py @@ -54,9 +54,6 @@ Useful for testing, dev, beta.""" -JWT_SECRET = os.environ.get('JWT_SECRET', secrets.token_urlsafe(16)) -"""JWT secret used with NG JWTs in `arxiv-auth`.""" - AUTH_SESSION_COOKIE_NAME = 'ARXIVNG_SESSION_ID' AUTH_SESSION_COOKIE_DOMAIN = os.environ.get('AUTH_SESSION_COOKIE_DOMAIN', f'.{BASE_SERVER}') AUTH_SESSION_COOKIE_SECURE = bool(int(os.environ.get('AUTH_SESSION_COOKIE_SECURE', '1'))) diff --git a/accounts/accounts/controllers/authentication.py b/accounts/accounts/controllers/authentication.py index 53bc7d7c..e829a98e 100644 --- a/accounts/accounts/controllers/authentication.py +++ b/accounts/accounts/controllers/authentication.py @@ -34,6 +34,7 @@ from arxiv_auth.legacy.util import transaction from accounts import config +from accounts.next_page import good_next_page logger = logging.getLogger(__name__) @@ -136,7 +137,7 @@ def login(method: str, form_data: MultiDict, ip: str, 'classic_cookie': (c_cookie, c_session.expires) } }) - next_page = next_page if good_next_page(next_page) else config.DEFAULT_LOGIN_REDIRECT_URL + next_page = good_next_page(next_page) return data, status.HTTP_303_SEE_OTHER, {'Location': next_page} @@ -188,7 +189,7 @@ def logout(session_cookie: Optional[str], 'classic_cookie': ('', 0) } } - return data, status.HTTP_303_SEE_OTHER, {'Location': next_page} + return data, status.HTTP_303_SEE_OTHER, {'Location': good_next_page(next_page)} class LoginForm(Form): @@ -220,10 +221,3 @@ def _do_login(auths: Authorizations, ip: str, tracking_cookie: str, def _do_logout(classic_session_cookie: str) -> None: with transaction(): legacy_sessions.invalidate(classic_session_cookie) - - -def good_next_page(next_page: str) -> bool: - """True if next_page is a valid query parameter for use with the login page.""" - return next_page == config.DEFAULT_LOGIN_REDIRECT_URL \ - or (len(next_page) < 2048 # avoids crazy inputs - and re.match(config.login_redirect_pattern, next_page)) diff --git a/accounts/accounts/controllers/tests/test_authentication.py b/accounts/accounts/controllers/tests/test_authentication.py index f7203258..c7719887 100644 --- a/accounts/accounts/controllers/tests/test_authentication.py +++ b/accounts/accounts/controllers/tests/test_authentication.py @@ -114,7 +114,7 @@ def test_logout(self, mock_legacy_ses, mock_SessionStore): mock_legacy_ses.invalidate_session.return_value = None mock_SessionStore.current_session.return_value \ .delete.return_value = None - next_page = '/' + next_page = 'https://arxiv.org/some_next_page' session_id = 'foosession' classic_id = 'bazsession' with self.app.app_context(): @@ -131,7 +131,7 @@ def test_logout_anonymous(self, mock_legacy_ses, mock_SessionStore): mock_legacy_ses.invalidate_session.return_value = None mock_SessionStore.current_session.return_value \ .delete.return_value = None - next_page = '/' + next_page = 'https://arxiv.org/some_next_page?param=11%x' with self.app.app_context(): data, status_code, header = logout(None, None, next_page) self.assertEqual(status_code, status.HTTP_303_SEE_OTHER, diff --git a/accounts/accounts/next_page.py b/accounts/accounts/next_page.py new file mode 100644 index 00000000..deccaffb --- /dev/null +++ b/accounts/accounts/next_page.py @@ -0,0 +1,15 @@ +"""Next page handling.""" +import re + +from accounts import config + +def good_next_page(next_page: str) -> str: + """Checks if a next_page is good and returns it. + + If not good, it will return the default. + """ + good = (next_page and len(next_page) < 300 and + (next_page == config.DEFAULT_LOGIN_REDIRECT_URL + or re.match(config.login_redirect_pattern, next_page)) + ) + return next_page if good else config.DEFAULT_LOGIN_REDIRECT_URL diff --git a/accounts/accounts/routes/ui.py b/accounts/accounts/routes/ui.py index b7963b0b..13abbb40 100644 --- a/accounts/accounts/routes/ui.py +++ b/accounts/accounts/routes/ui.py @@ -12,6 +12,7 @@ from arxiv import status from arxiv_auth import domain +from accounts.next_page import good_next_page from accounts.controllers import captcha_image, registration, authentication EASTERN = timezone('US/Eastern') @@ -30,7 +31,8 @@ def anonymous_only(func: Callable) -> Callable: @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> Any: if request.auth: - return make_response(redirect(_checked_next_page(), code=status.HTTP_303_SEE_OTHER)) + next_page = good_next_page(request.args.get('next_page', None)) + return make_response(redirect(next_page, code=status.HTTP_303_SEE_OTHER)) else: return func(*args, **kwargs) return wrapper diff --git a/accounts/accounts/tests/test_end_to_end.py b/accounts/accounts/tests/test_end_to_end.py index e4c8f7ec..7d7fd10c 100644 --- a/accounts/accounts/tests/test_end_to_end.py +++ b/accounts/accounts/tests/test_end_to_end.py @@ -262,6 +262,31 @@ def test_already_logged_in_redirect(self): assert response.status_code == status.HTTP_303_SEE_OTHER assert response.headers['Location'] == next_page + next_page = 'https://somesubdomain.arxiv.org/some_sort_of_next_page?blt=yes%20please' + response = client.get('/login?next_page=' + quote_plus(next_page)) + assert response.status_code == status.HTTP_303_SEE_OTHER + assert response.headers['Location'] == next_page, "/login should fowrward to arxiv subdomains" + + next_page = '/some_relative_page' + response = client.get('/login?next_page=' + quote_plus(next_page)) + assert response.status_code == status.HTTP_303_SEE_OTHER + assert response.headers['Location'].endswith(next_page), "/login should fowrward to relative pages" + + next_page = 'https://scammy.example.com/whereisit?cheese=idontknow' + response = client.get('/login?next_page=' + quote_plus(next_page)) + assert response.headers['Location'] != next_page, "/login should not forward to scammmy URLs" + assert response.headers['Location'] == self.app.config['DEFAULT_LOGIN_REDIRECT_URL'] + + next_page = '/whereisit?' + ','.join(30 * 'cheese=idontknow') + response = client.get('/login?next_page=' + quote_plus(next_page)) + assert response.headers['Location'] != next_page, "/login should not forward to super long URL" + assert response.headers['Location'] == self.app.config['DEFAULT_LOGIN_REDIRECT_URL'] + + next_page = 'https://arxiv.org/whereisit?' + ','.join(30 * 'cheese=idontknow') + response = client.get('/login?next_page=' + quote_plus(next_page)) + assert response.headers['Location'] != next_page, "/login should not forward to super long URL" + assert response.headers['Location'] == self.app.config['DEFAULT_LOGIN_REDIRECT_URL'] + def test_post_login_baddata(self): """POST request to /login with invalid data returns 400.""" form_data = {'username': 'foouser', 'password': 'notthepassword'} diff --git a/arxiv-auth/arxiv_auth/auth/__init__.py b/arxiv-auth/arxiv_auth/auth/__init__.py index ca84fbb6..79582d09 100644 --- a/arxiv-auth/arxiv_auth/auth/__init__.py +++ b/arxiv-auth/arxiv_auth/auth/__init__.py @@ -172,7 +172,8 @@ def _get_legacy_session(self, def auth_debug(self) -> None: """Sets several auth loggers to DEBUG. - This is useful to get an idea of what is going on with auth.""" + This is useful to get an idea of what is going on with auth. + """ logger.setLevel(logging.DEBUG) legacy.sessions.logger.setLevel(logging.DEBUG) legacy.authenticate.logger.setLevel(logging.DEBUG) diff --git a/arxiv-auth/arxiv_auth/domain.py b/arxiv-auth/arxiv_auth/domain.py index 9e50655d..7aa0425b 100644 --- a/arxiv-auth/arxiv_auth/domain.py +++ b/arxiv-auth/arxiv_auth/domain.py @@ -33,6 +33,7 @@ def _check_category(data: Any) -> Category: class UserProfile(BaseModel): """User profile data.""" + model_config = ConfigDict(arbitrary_types_allowed=True) affiliation: str @@ -61,6 +62,7 @@ class UserProfile(BaseModel): @field_validator('default_category', mode='before') @classmethod def check_category(cls, data: Any) -> Category: + """Checks if `data` is a category.""" return _check_category(data) homepage_url: str = '' @@ -161,6 +163,7 @@ def from_str(cls, scopestr:str) -> "Scope": class Authorizations(BaseModel): """Authorization information, e.g. associated with a :class:`.Session`.""" + model_config = ConfigDict(arbitrary_types_allowed=True) classic: int = 0 @@ -172,6 +175,7 @@ class Authorizations(BaseModel): @field_validator('endorsements', mode='before') @classmethod def check_endorsements(cls, data: Any) -> List[Category]: + """Checks if `data` contains endorsements.""" if isinstance(data, str) or not issubclass(type(data), Iterable): raise ValidationError("endorsements must be a list") return [ _check_category(obj) for obj in data ] @@ -307,6 +311,7 @@ class Client(BaseModel): class Session(BaseModel): """Represents an authenticated session in the arXiv system.""" + model_config = ConfigDict(arbitrary_types_allowed=True) session_id: str diff --git a/arxiv-auth/arxiv_auth/legacy/util.py b/arxiv-auth/arxiv_auth/legacy/util.py index 5ddccdeb..3ecbcd07 100644 --- a/arxiv-auth/arxiv_auth/legacy/util.py +++ b/arxiv-auth/arxiv_auth/legacy/util.py @@ -105,9 +105,7 @@ def is_configured() -> bool: return not bool(missing_configs(config)) def missing_configs(config) -> List[str]: - """Returns a list of all missing keys for configs that are needed in - `Flask.config` for legacy auth to work. - """ + """Returns missing keys for configs needed in `Flask.config` for legacy auth to work.""" missing = [key for key in ['CLASSIC_SESSION_HASH', 'SESSION_DURATION', 'CLASSIC_COOKIE_NAME'] if key not in config] return missing