Merge pull request #149 from arXiv/develop

Pre-release merge for browse v0.3.0
arXiv · Apr 16, 2020 · ea6f693 · ea6f693
2 parents 889be06 + aad4a2e
commit ea6f693
Show file tree

Hide file tree

Showing 56 changed files with 2,134 additions and 706 deletions.
diff --git a/.gitignore b/.gitignore
@@ -123,3 +123,5 @@ private_vars.sh
 .pycheckers
 .pyre
 legacy_comparison.org
+
+tests/data/browse.db
diff --git a/.travis.yml b/.travis.yml
@@ -10,6 +10,7 @@ python:
 script:
   - pip install pipenv
   - pipenv sync --dev
+  - FLASK_APP=app.py pipenv run python populate_test_database.py --drop_and_create
   - pipenv run nose2 --with-coverage --coverage-config .coveragerc
   - tests/lint.sh
   - tests/docstyle.sh

diff --git a/Pipfile b/Pipfile
@@ -22,7 +22,7 @@ arxiv-auth = "==0.4.2rc1"
 mypy = "*"
 jinja2 = "==2.10.1"
 flask-s3 = "*"
-arxiv-base = "==0.16.6"
+arxiv-base = "==0.16.7"
 retry = "==0.9.2"
 
 [dev-packages]

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/README.md b/README.md
@@ -18,8 +18,9 @@ If all goes well, http://127.0.0.1:5000/abs/0906.5132 should render the basic
 abs page.
 
 By default, the application will use the directory trees in
-`tests/data/abs_files` and `tests/data/cache` and when looking for the document metadata and cache files, respectively. These paths can be overridden via
-environment variables (see `browse/config.py`).
+`tests/data/abs_files` and `tests/data/cache` and when looking for the
+document metadata and cache files, respectively. These paths can be
+overridden via environment variables (see `browse/config.py`).
 
 ### Rebuilding the test database
 

diff --git a/browse/config.py b/browse/config.py
@@ -8,7 +8,7 @@
 import dateutil.parser
 from datetime import datetime, timedelta
 
-APP_VERSION = '0.2.9'
+APP_VERSION = '0.3.0'
 """The application version """
 
 ON = 'yes'
@@ -236,20 +236,15 @@
 SQLALCHEMY_ECHO = False
 SQLALCHEMY_RECORD_QUERIES = False
 
-SQLALCHEMY_POOL_SIZE = int(os.environ.get('BROWSE_SQLALCHEMY_POOL_SIZE', '10'))
-"""SQLALCHEMY_POOL_SIZE is set from BROWSE_SQLALCHEMY_POOL_SIZE.
-
-Ignored under sqlite."""
-
-SQLALCHEMY_MAX_OVERFLOW = int(os.environ.get('BROWSE_SQLALCHEMY_MAX_OVERFLOW', '0'))
-"""SQLALCHEMY_MAX_OVERFLOW is set from BROWSE_SQLALCHEMY_MAX_OVERFLOW.
-
-Ignored under sqlite."""
-
 # SQLALCHEMY_POOL_SIZE and SQLALCHEMY_MAX_OVERFLOW will not work with sqlite
-if 'sqlite' in SQLALCHEMY_DATABASE_URI:
-    SQLALCHEMY_POOL_SIZE = None
-    SQLALCHEMY_MAX_OVERFLOW = None
+if 'sqlite' not in SQLALCHEMY_DATABASE_URI:
+    SQLALCHEMY_POOL_SIZE = int(os.environ.get('BROWSE_SQLALCHEMY_POOL_SIZE', '10'))
+    """SQLALCHEMY_POOL_SIZE is set from BROWSE_SQLALCHEMY_POOL_SIZE.
+    Ignored under sqlite."""
+
+    SQLALCHEMY_MAX_OVERFLOW = int(os.environ.get('BROWSE_SQLALCHEMY_MAX_OVERFLOW', '0'))
+    """SQLALCHEMY_MAX_OVERFLOW is set from BROWSE_SQLALCHEMY_MAX_OVERFLOW.
+    Ignored under sqlite."""
 
 BROWSE_DAILY_STATS_PATH = os.environ.get(
     'BROWSE_DAILY_STATS_PATH', 'tests/data/daily_stats')
@@ -283,23 +278,41 @@
     'BROWSE_USER_BANNER_ENABLED', '0')))
 """Enable/disable user banner."""
 try:
-    BROWSE_USER_BANNER_START_DATE = dateutil.parser.parse(
-        os.environ.get('BROWSE_USER_BANNER_START_DATE')
-    ).replace(hour=0, minute=0, second=0)
+    if os.environ.get('BROWSE_USER_BANNER_START_DATE',None):
+        BROWSE_USER_BANNER_START_DATE = dateutil.parser.parse(
+            os.environ.get('BROWSE_USER_BANNER_START_DATE', 'nodate')
+        ).replace(hour=0, minute=0, second=0)
+    else:
+        raise ValueError
 except Exception:
     if BROWSE_USER_BANNER_ENABLED:
         warnings.warn("Bad value for BROWSE_USER_BANNER_START_DATE")
     BROWSE_USER_BANNER_START_DATE = datetime.now() - timedelta(days=1)
 
 try:
-    BROWSE_USER_BANNER_END_DATE = dateutil.parser.parse(
-        os.environ.get('BROWSE_USER_BANNER_END_DATE')
-    ).replace(hour=23, minute=59, second=59)
+    if os.environ.get('BROWSE_USER_BANNER_END_DATE', None):
+        BROWSE_USER_BANNER_END_DATE = dateutil.parser.parse(
+            os.environ.get('BROWSE_USER_BANNER_END_DATE', 'noate')
+        ).replace(hour=23, minute=59, second=59)
+    else:
+        raise ValueError
 except Exception:
     if BROWSE_USER_BANNER_ENABLED:
         warnings.warn("Bad value for BROWSE_USER_BANNER_END_DATE")
     BROWSE_USER_BANNER_END_DATE = datetime.now() + timedelta(days=1)
 
+BROWSE_STATUS_BANNER_ENABLED = bool(int(os.environ.get(
+    'BROWSE_STATUS_BANNER_ENABLED', '0')))
+"""Enable/disable status service banner."""
+
+BROWSE_STATUS_BANNER_SCRIPT_URL = os.environ.get(
+    'BROWSE_STATUS_BANNER_SCRIPT_URL',
+    'https://code.sorryapp.com/status-bar/4.latest/status-bar.min.js')
+
+BROWSE_STATUS_BANNER_SITE_ID = os.environ.get(
+    'BROWSE_STATUS_BANNER_SITE_ID', 'foo')
+"""Enable/disable status service banner."""
+
 DOCUMENT_LATEST_VERSIONS_PATH = os.environ.get(
     'DOCUMENT_LATEST_VERSIONS_PATH', 'tests/data/abs_files/ftp')
 """Paths to .abs and source files."""
@@ -341,10 +354,7 @@
 CLASSIC_DATABASE_URI = os.environ.get('CLASSIC_DATABASE_URI', os.environ.get(
     'BROWSE_SQLALCHEMY_DATABASE_URI', default=None))
 """If not set, legacy database integrations for auth will not be available."""
-if not CLASSIC_DATABASE_URI:
-    warnings.warn("No value set for CLASSIC_DATABASE_URI")
-elif 'sqlite' in CLASSIC_DATABASE_URI:
-    warnings.warn("Using sqlite in CLASSIC_DATABASE_URI")
+
 
 CLASSIC_SESSION_HASH = os.environ.get('CLASSIC_SESSION_HASH', 'foosecret')
 SESSION_DURATION = os.environ.get(

diff --git a/browse/controllers/archive_page/__init__.py b/browse/controllers/archive_page/__init__.py
@@ -14,7 +14,7 @@
 
 
 
-def get_archive(archive_id: str) -> Response:
+def get_archive(archive_id: str) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
     """Gets archive page."""
     data: Dict[str, Any] = {}
     response_headers: Dict[str, Any] = {}
@@ -59,10 +59,10 @@ def get_archive(archive_id: str) -> Response:
     data["catchup_to"] = datetime.date.today() - datetime.timedelta(days=7)
 
     data["template"] = "archive/single_archive.html"
-    return data, status.HTTP_200_OK, response_headers  # type: ignore
+    return data, status.HTTP_200_OK, response_headers
 
 
-def archive_index(archive_id: str, status: int) -> Response:
+def archive_index(archive_id: str, status: int) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
     """Landing page for when there is no archive specified."""
     data: Dict[str, Any] = {}
     data["bad_archive"] = archive_id
@@ -84,7 +84,7 @@ def archive_index(archive_id: str, status: int) -> Response:
     data["defunct"] = defunct
 
     data["template"] = "archive/archive_list_all.html"
-    return data, status, {}  # type: ignore
+    return data, status, {} 
 
 
 def subsumed_msg(archive: Dict[str, str], subsumed_by: str) -> Dict[str, str]:

diff --git a/browse/controllers/list_page/__init__.py b/browse/controllers/list_page/__init__.py
@@ -100,8 +100,8 @@ def get_listing(subject_or_category: str,
        Number of articles to show
     """
     # TODO make sure to handle POST too
-    skip = skip or request.args.get('skip', None)
-    show = show or request.args.get('show', None)
+    skip = skip or request.args.get('skip', '0')
+    show = show or request.args.get('show', '')
     if request.args.get('archive', None) is not None:
         subject_or_category = request.args.get('archive')  # type: ignore
     if request.args.get('year', None):
@@ -421,8 +421,7 @@ def sub_sections_for_types(
         })
 
     for sec in secs:
-        typ = {'new': 'New', 'cross': 'Cross', 'rep': 'Replacement'}[  # type: ignore
-            sec['type']]
+        typ = {'new': 'New', 'cross': 'Cross', 'rep': 'Replacement'}[sec['type']] # type: ignore
         date = resp['announced'].strftime('%A, %-d %B %Y')
 
         showing = 'showing '
@@ -441,7 +440,8 @@ def sub_sections_for_types(
 
 
 def _not_modified(response: Union[ListingResponse, NewResponse, NotModifiedResponse]) -> bool:
-    return bool(response and response.get('not_modified', False))
+    return bool ( response and 'not_modified' in response)
+#    return bool(response and response.get('not_modified', False))
 
 
 def _expires_headers(listing_resp:

diff --git a/browse/controllers/prevnext/__init__.py b/browse/controllers/prevnext/__init__.py
@@ -1,9 +1,8 @@
 """Handle requests to support sequential navigation between arXiv IDs."""
 
-from flask import url_for
+from flask import url_for, escape
 from typing import Tuple, Dict, Any
-from werkzeug.datastructures import MultiDict
-from werkzeug.exceptions import InternalServerError, BadRequest
+from werkzeug.exceptions import BadRequest
 
 from browse.domain.identifier import Identifier, IdentifierException
 from browse.services.database import get_sequential_id
@@ -16,70 +15,59 @@
 logger = logging.getLogger(__name__)
 
 
-def get_prevnext(request_params: MultiDict) -> Response:
+def get_prevnext(id: str, function: str, context: str) -> Response:
     """
     Get the next or previous arXiv ID in the browse context.
 
-    The 'id', 'function', and 'context' request parameters are required. The
-    'site' parameter from the classic prevnext is no longer supported.
+    The 'site' parameter from the classic prevnext is no longer supported.
 
     Parameters
     ----------
-    request_params : dict
+    id
+        arxiv id
+    function
+        prev or next
+    context
+        which archive or category to browse
 
     Returns
     -------
     dict
-        Search result response data.
+        Result response data.
     int
         HTTP status code.
     dict
         Headers to add to the response.
 
     Raises
     ------
-    InternalServerError
-        Raised when there was an unexpected problem executing the query.
     BadRequest
         Raised when request parameters are missing, invalid, or when an ID
         redirect cannot be returned even when the request parameters are valid.
 
     """
-    if 'id' not in request_params:
+    if id is None or not id:
         raise BadRequest('Missing article identifier')
-    try:
-        arxiv_id = Identifier(request_params['id'])
-    except IdentifierException:
-        raise BadRequest(f"Invalid article identifier {request_params['id']}")
-
-    if not ('function' in request_params
-            and request_params['function'] in ['prev', 'next']):
+    if function not in ['prev', 'next']:
         raise BadRequest('Missing or invalid function request')
-
-    if 'context' not in request_params:
+    if context is None or not context:
         raise BadRequest('Missing context')
-    context = request_params['context']
-
     if not (context in CATEGORIES_ACTIVE
             or context in ARCHIVES or context == 'all'):
         raise BadRequest('Invalid context')
 
-    is_next = request_params['function'] == 'next'
     try:
-        seq_id = get_sequential_id(paper_id=arxiv_id,
-                                   is_next=is_next,
-                                   context=context)
-    except Exception as ex:
-        logger.warning(f'Error getting sequential ID: {ex}')
-        raise InternalServerError from ex
+        arxiv_id = Identifier(id)
+    except IdentifierException:
+        raise BadRequest(escape(f"Invalid article identifier {id}"))
 
+    seq_id = get_sequential_id(paper_id=arxiv_id,
+                               is_next=function == 'next',
+                               context=context)
     if not seq_id:
         raise BadRequest(
-            f'No {"next" if is_next else "previous"} article found for '
-            f'{arxiv_id.id} in {context}'
-        )
+            escape(f'No {function} article found for '
+                   f'{arxiv_id.id} in {context}'))
 
-    redirect_url = url_for('browse.abstract',
-                           arxiv_id=seq_id,
-                           context=context)
+    redirect_url = url_for('browse.abstract', arxiv_id=seq_id, context=context)
     return {}, status.HTTP_301_MOVED_PERMANENTLY, {'Location': redirect_url}