forked from ourresearch/openalex-guts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
172 lines (145 loc) · 5.64 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask_compress import Compress
from flask_debugtoolbar import DebugToolbarExtension
from sqlalchemy import exc
from sqlalchemy import event
from sqlalchemy.pool import NullPool
from sqlalchemy.pool import Pool
import logging
import sys
import os
import requests
import json
import random
import warnings
from urllib.parse import urlparse
import psycopg2
import psycopg2.extras # needed though you wouldn't guess it
from psycopg2.pool import ThreadedConnectionPool
from contextlib import contextmanager
import re
from util import safe_commit
from util import elapsed
from util import HTTPMethodOverrideMiddleware
HEROKU_APP_NAME = "openalex-guts"
USER_AGENT = "OpenAlex/0.1 (https://openalex.org; team@ourresearch.org)"
# set up logging
# see http://wiki.pylonshq.com/display/pylonscookbook/Alternative+logging+configuration
logging.basicConfig(
stream=sys.stdout,
level=logging.DEBUG,
format='%(thread)d: %(message)s' #tried process but it was always "6" on heroku
)
logger = logging.getLogger("oadoi")
API_HOST = os.getenv("API_HOST")
MAX_MAG_ID = 4200000000
libraries_to_mum = [
"requests",
"urllib3",
"requests.packages.urllib3",
"requests_oauthlib",
"stripe",
"oauthlib",
"boto",
"boto3",
"botocore",
"newrelic",
"RateLimiter",
"paramiko",
"chardet",
"cryptography",
"psycopg2",
"s3_concat",
]
for a_library in libraries_to_mum:
the_logger = logging.getLogger(a_library)
the_logger.setLevel(logging.WARNING)
the_logger.propagate = True
warnings.filterwarnings("ignore", category=UserWarning, module=a_library)
# disable extra warnings
requests.packages.urllib3.disable_warnings()
warnings.filterwarnings("ignore", category=DeprecationWarning)
app = Flask(__name__)
# database stuff
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True # as instructed, to suppress warning
app.config['SQLALCHEMY_ECHO'] = (os.getenv("SQLALCHEMY_ECHO", False) == "True")
# app.config['SQLALCHEMY_ECHO'] = True
database_to_use = os.getenv("DATABASE_TO_USE", "")
MY_DATABASE = "DATABASE_URL_OPENALEX_REDSHIFT_BASE"
if database_to_use.startswith("q") or database_to_use.startswith("h") or (database_to_use=="api") or (database_to_use=="spare"):
MY_DATABASE = f"DATABASE_URL_{database_to_use}"
elif database_to_use == "6-HIGH":
MY_DATABASE = "DATABASE_URL_OPENALEX_REDSHIFT_FAST"
print(f"Using database {MY_DATABASE}")
app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv(MY_DATABASE) # don't use this though, default is unclear, use binds
app.config["SQLALCHEMY_BINDS"] = {
"redshift_db": os.getenv(MY_DATABASE)
}
redshift_url = urlparse(os.getenv(MY_DATABASE))
app.config['postgreSQL_pool'] = ThreadedConnectionPool(2, 5,
database=redshift_url.path[1:],
user=redshift_url.username,
password=redshift_url.password,
host=redshift_url.hostname,
port=redshift_url.port)
# see https://stackoverflow.com/questions/43594310/redshift-sqlalchemy-long-query-hangs
app.config["SQLALCHEMY_ENGINE_OPTIONS"] = { "pool_pre_ping": True,
"pool_recycle": 300,
"connect_args": {
"keepalives": 1,
"keepalives_idle": 10,
"keepalives_interval": 2,
"keepalives_count": 5
}
}
# from http://stackoverflow.com/a/12417346/596939
# class NullPoolSQLAlchemy(SQLAlchemy):
# def apply_driver_hacks(self, app, info, options):
# options['poolclass'] = NullPool
# return super(NullPoolSQLAlchemy, self).apply_driver_hacks(app, info, options)
#
# db = NullPoolSQLAlchemy(app, session_options={"autoflush": False})
app.config["SQLALCHEMY_POOL_SIZE"] = 10
db = SQLAlchemy(app, session_options={"autoflush": False, "autocommit": False})
# do compression. has to be above flask debug toolbar so it can override this.
compress_json = os.getenv("COMPRESS_DEBUG", "True")=="True"
# set up Flask-DebugToolbar
if (os.getenv("FLASK_DEBUG", False) == "True"):
logger.info(u"Setting app.debug=True; Flask-DebugToolbar will display")
compress_json = False
app.debug = True
app.config['DEBUG'] = True
app.config["DEBUG_TB_INTERCEPT_REDIRECTS"] = False
app.config["SQLALCHEMY_RECORD_QUERIES"] = True
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY")
toolbar = DebugToolbarExtension(app)
# gzip responses
Compress(app)
app.config["COMPRESS_DEBUG"] = compress_json
@contextmanager
def get_db_connection():
try:
connection = app.config['postgreSQL_pool'].getconn()
connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
connection.autocommit=True
# connection.readonly = True
yield connection
finally:
app.config['postgreSQL_pool'].putconn(connection)
@contextmanager
def get_db_cursor(commit=False):
with get_db_connection() as connection:
cursor = connection.cursor(
cursor_factory=psycopg2.extras.RealDictCursor)
try:
yield cursor
if commit:
connection.commit()
finally:
cursor.close()
pass
def get_apiurl_from_openalex_url(openalex_url):
if not openalex_url:
return None
return re.sub("https://openalex.org/(?P<id>[A-Za-z\d]{3,})", "https://api.openalex.org/\g<id>?apiurls", openalex_url)