Skip to content

Commit

Permalink
Merge pull request #66 from frictionlessdata/dev-v2
Browse files Browse the repository at this point in the history
Create v2 branch with support to Py3 and CKAN >= 2.9 only
  • Loading branch information
aivuk authored Dec 1, 2022
2 parents f151aa5 + bd79867 commit 9c23581
Show file tree
Hide file tree
Showing 41 changed files with 10,632 additions and 7,391 deletions.
22 changes: 6 additions & 16 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.6'
python-version: '3.7'
- name: Install requirements
run: pip install flake8 pycodestyle
- name: Check syntax
Expand All @@ -17,7 +17,7 @@ jobs:
needs: lint
strategy:
matrix:
ckan-version: [2.9, 2.9-py2, 2.8]
ckan-version: [2.9]
fail-fast: false

name: CKAN ${{ matrix.ckan-version }}
Expand Down Expand Up @@ -45,28 +45,18 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Install requirements (py3)
if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' && matrix.ckan-version != '2.9-py2' }}
- name: Install requirements
run: |
pip install -r dev-requirements.txt
- name: Install requirements (py2)
if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' || matrix.ckan-version == '2.9-py2' }}
run: |
pip install -r dev-requirements-py2.txt
- name: Install requirements (common)
run: |
pip install -r requirements.txt
pip install --no-warn-conflicts jinja2==2.10.1
pip install --no-warn-conflicts markupsafe==2.0.1
pip install -e .
# Replace default path to CKAN core config file with the one on the container
sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini
- name: Setup extension (CKAN >= 2.9)
if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }}
- name: Setup extension
run: |
ckan -c test.ini db init
- name: Setup extension (CKAN < 2.9)
if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }}
run: |
paster --plugin=ckan db init -c test.ini
- name: Run tests
run: pytest --ckan-ini=test.ini --cov=ckanext.validation --cov-report=xml --cov-append --disable-warnings ckanext/validation/tests -vv

Expand Down
266 changes: 194 additions & 72 deletions README.md

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions ckanext/validation/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@ def validation_extract_report_from_errors(errors):
if error == 'validation':
report = errors[error][0]
# Remove full path from table source
source = report['tables'][0]['source']
report['tables'][0]['source'] = source.split('/')[-1]
if 'tasks' in report:
source = report['tasks'][0]['place']
report['tasks'][0]['place'] = source.split('/')[-1]
elif 'tables' in report:
source = report['tables'][0]['source']
report['tables'][0]['source'] = source.split('/')[-1]
msg = _('''
There are validation issues with this file, please see the
<a {params}>report</a> for details. Once you have resolved the issues,
Expand All @@ -68,6 +72,8 @@ def validation_extract_report_from_errors(errors):

return report, errors

def validation_dict(validation_json):
return json.loads(validation_json)

def dump_json_value(value, indent=None):
"""
Expand Down
130 changes: 82 additions & 48 deletions ckanext/validation/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@
import json
import re

import six
import requests
from sqlalchemy.orm.exc import NoResultFound
from goodtables import validate
from frictionless import validate, system, Report, Schema, Dialect, Check

from ckan.model import Session
import ckan.lib.uploader as uploader

import ckantoolkit as t

from ckanext.validation.model import Validation
from ckanext.validation.utils import get_update_mode_from_config


log = logging.getLogger(__name__)


def run_validation_job(resource):

log.debug(u'Validating resource %s', resource['id'])
log.debug('Validating resource %s', resource['id'])

try:
validation = Session.query(Validation).filter(
Expand All @@ -34,19 +34,19 @@ def run_validation_job(resource):
if not validation:
validation = Validation(resource_id=resource['id'])

validation.status = u'running'
validation.status = 'running'
Session.add(validation)
Session.commit()

options = t.config.get(
u'ckanext.validation.default_validation_options')
'ckanext.validation.default_validation_options')
if options:
options = json.loads(options)
else:
options = {}

resource_options = resource.get(u'validation_options')
if resource_options and isinstance(resource_options, six.string_types):
resource_options = resource.get('validation_options')
if resource_options and isinstance(resource_options, str):
resource_options = json.loads(resource_options)
if resource_options:
options.update(resource_options)
Expand All @@ -55,81 +55,115 @@ def run_validation_job(resource):
{'ignore_auth': True}, {'id': resource['package_id']})

source = None
if resource.get(u'url_type') == u'upload':
if resource.get('url_type') == 'upload':
upload = uploader.get_resource_uploader(resource)
if isinstance(upload, uploader.ResourceUpload):
source = upload.get_path(resource[u'id'])
source = upload.get_path(resource['id'])
else:
# Upload is not the default implementation (ie it's a cloud storage
# implementation)
pass_auth_header = t.asbool(
t.config.get(u'ckanext.validation.pass_auth_header', True))
if dataset[u'private'] and pass_auth_header:
t.config.get('ckanext.validation.pass_auth_header', True))
if dataset['private'] and pass_auth_header:
s = requests.Session()
s.headers.update({
u'Authorization': t.config.get(
u'ckanext.validation.pass_auth_header_value',
'Authorization': t.config.get(
'ckanext.validation.pass_auth_header_value',
_get_site_user_api_key())
})

options[u'http_session'] = s
options['http_session'] = s

if not source:
source = resource[u'url']

schema = resource.get(u'schema')
if schema and isinstance(schema, six.string_types):
if schema.startswith('http'):
r = requests.get(schema)
schema = r.json()
else:
source = resource['url']

schema = resource.get('schema')
if schema:
if isinstance(schema, str):
if schema.startswith('http'):
r = requests.get(schema)
schema = r.json()
schema = json.loads(schema)

_format = resource[u'format'].lower()

_format = resource['format'].lower()
report = _validate_table(source, _format=_format, schema=schema, **options)

# Hide uploaded files
for table in report.get('tables', []):
if table['source'].startswith('/'):
table['source'] = resource['url']
for index, warning in enumerate(report.get('warnings', [])):
report['warnings'][index] = re.sub(r'Table ".*"', 'Table', warning)

if report['table-count'] > 0:
validation.status = u'success' if report[u'valid'] else u'failure'
validation.report = report
if type(report) == Report:
report = report.to_dict()

if 'tasks' in report:
for table in report['tasks']:
if table['place'].startswith('/'):
table['place'] = resource['url']
if 'warnings' in report:
validation.status = 'error'
for index, warning in enumerate(report['warnings']):
report['warnings'][index] = re.sub(r'Table ".*"', 'Table', warning)
if 'valid' in report:
validation.status = 'success' if report['valid'] else 'failure'
validation.report = json.dumps(report)
else:
validation.status = u'error'
validation.error = {
'message': '\n'.join(report['warnings']) or u'No tables found'}
validation.report = json.dumps(report)
if 'errors' in report and report['errors']:
validation.status = 'error'
validation.error = {
'message': [str(err) for err in report['errors']]}
else:
validation.error = {'message': ['Errors validating the data']}
validation.finished = datetime.datetime.utcnow()

Session.add(validation)
Session.commit()

# Store result status in resource
t.get_action('resource_patch')(
{'ignore_auth': True,
'user': t.get_action('get_site_user')({'ignore_auth': True})['name'],
'_validation_performed': True},
{'id': resource['id'],
'validation_status': validation.status,
'validation_timestamp': validation.finished.isoformat()})
data_dict = {
'id': resource['id'],
'validation_status': validation.status,
'validation_timestamp': validation.finished.isoformat(),
}

if get_update_mode_from_config() == 'sync':
data_dict['_skip_next_validation'] = True,

patch_context = {
'ignore_auth': True,
'user': t.get_action('get_site_user')({'ignore_auth': True})['name'],
'_validation_performed': True
}
t.get_action('resource_patch')(patch_context, data_dict)


def _validate_table(source, _format=u'csv', schema=None, **options):

http_session = options.pop('http_session', None) or requests.Session()

def _validate_table(source, _format='csv', schema=None, **options):

# This option is needed to allow Frictionless Framework to validate absolute paths
frictionless_context = { 'trusted': True }
http_session = options.pop('http_session', None) or requests.Session()
use_proxy = 'ckan.download_proxy' in t.config

if use_proxy:
proxy = t.config.get('ckan.download_proxy')
log.debug(u'Download resource for validation via proxy: %s', proxy)
log.debug('Download resource for validation via proxy: %s', proxy)
http_session.proxies.update({'http': proxy, 'https': proxy})
report = validate(source, format=_format, schema=schema, http_session=http_session, **options)

log.debug(u'Validating source: %s', source)
frictionless_context['http_session'] = http_session
resource_schema = Schema.from_descriptor(schema) if schema else None

# Load the Resource Dialect as described in https://framework.frictionlessdata.io/docs/framework/dialect.html
if 'dialect' in options:
dialect = Dialect.from_descriptor(options['dialect'])
options['dialect'] = dialect

# Load the list of checks and its parameters declaratively as in https://framework.frictionlessdata.io/docs/checks/table.html
if 'checks' in options:
checklist = [Check.from_descriptor(c) for c in options['checks']]
options['checks'] = checklist

with system.use_context(**frictionless_context):
report = validate(source, format=_format, schema=resource_schema, **options)
log.debug('Validating source: %s', source)

return report

Expand Down
Loading

0 comments on commit 9c23581

Please sign in to comment.