Skip to content

Commit

Permalink
feat: Add non-ascii character report (#365)
Browse files Browse the repository at this point in the history
This change adds output from `xml2rfc --warn-bare-unicode` and
kramdown-rfc's `echars` to `/api/validate` API call.

Fixes #364
  • Loading branch information
kesara authored Aug 7, 2023
1 parent a8ffd23 commit df936f1
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 11 deletions.
9 changes: 9 additions & 0 deletions api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,15 @@ paths:
idnits:
type: string
description: idnits output
non_ascii:
type: string
description: kramdown-rfc echars output
bare_unicode:
type: array
description: list of bare Unicode (non-ascci) usage report
items:
type: string
description: warning description
'400':
description: Error has occured.
content:
Expand Down
15 changes: 11 additions & 4 deletions at/utils/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def process_xml2rfc_log(output, filename):
log = []
errors = []
warnings = []
unicode = []

if output.stderr:
log = cleanup_output(filename,
Expand All @@ -30,12 +31,18 @@ def process_xml2rfc_log(output, filename):
else:
errors.append(message)
elif warning and (message := warning.group('message')):
if line and (line := line.group('line')):
warnings.append(f'({line}) {message}')
if 'Found non-ascii characters' in message:
if line and (line := line.group('line')):
unicode.append(f'({line}) {message}')
else:
warnings.append(message)
else:
warnings.append(message)
if line and (line := line.group('line')):
warnings.append(f'({line}) {message}')
else:
warnings.append(message)

return {'errors': errors, 'warnings': warnings}
return {'errors': errors, 'warnings': warnings, 'bare_unicode': unicode}


def get_errors(output, filename):
Expand Down
16 changes: 15 additions & 1 deletion at/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def validate_draft(file, upload_dir, logger=getLogger()):
logger=logger)
log = validate_xml(filename, logger=logger)

# get list of non ASCII chars
log['non_ascii'] = get_non_ascii_chars(filename=filename, logger=logger)

return log


Expand Down Expand Up @@ -78,7 +81,8 @@ def xml2rfc_validation(filename, logger=getLogger()):
text_file = get_filename(filename, 'txt')

output = proc_run(
args=['xml2rfc', '--out', text_file, filename],
args=['xml2rfc', '--warn-bare-unicode', '--out', text_file,
filename],
capture_output=True)

try:
Expand Down Expand Up @@ -199,3 +203,13 @@ def svgcheck(filename, logger=getLogger()):
return (parsed_svg,
cleanup_output(filename, result),
cleanup_output(filename, errors))


def get_non_ascii_chars(filename, logger=getLogger()):
'''Run kramdown-rfc echars and return output'''

logger.debug('running echars')

output = proc_run(['echars', filename], capture_output=True)

return output.stdout.decode('utf-8')
2 changes: 1 addition & 1 deletion tests/data/draft-smoke-signals-00.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ effectively.</t>

<middle>
<section anchor="introduction" title="Introduction">
<t>Smoke signal is a form of visual communication used over a long
<t>Smoke (දුම්) signal is a form of visual communication used over a long
distance. It is one of the oldest forms of long distance communcation
methods that has been used by many in many different countries throughout
the history.</t>
Expand Down
8 changes: 7 additions & 1 deletion tests/test_api_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,13 @@ def test_validate(self):
self.assertIn('errors', json_data)
self.assertIn('warnings', json_data)
self.assertIn('idnits', json_data)
self.assertIn('bare_unicode', json_data)
self.assertIn('non_ascii', json_data)
self.assertEqual(len(json_data['errors']), 0)
self.assertGreaterEqual(len(json_data['warnings']), 0)
self.assertGreater(len(json_data['idnits']), 0)
self.assertGreaterEqual(len(json_data['warnings']), 0)
self.assertGreaterEqual(len(json_data['bare_unicode']), 0)
self.assertGreaterEqual(len(json_data['non_ascii']), 0)

def test_validate_text(self):
with self.app.test_client() as client:
Expand All @@ -127,6 +131,8 @@ def test_validate_text(self):
self.assertEqual(result.status_code, 200)
self.assertNotIn('errors', json_data)
self.assertNotIn('warnings', json_data)
self.assertNotIn('bare_unicode', json_data)
self.assertIn('non_ascii', json_data)
self.assertIn('idnits', json_data)
self.assertGreater(len(json_data['idnits']), 0)

Expand Down
4 changes: 4 additions & 0 deletions tests/test_utils_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,18 @@ def test_process_xml2rfc_log(self):

self.assertIn('errors', log.keys())
self.assertIn('warnings', log.keys())
self.assertIn('bare_unicode', log.keys())
self.assertGreaterEqual(len(log['errors']), 0)
self.assertGreaterEqual(len(log['warnings']), 0)
self.assertGreaterEqual(len(log['bare_unicode']), 0)
for error in log['errors']:
self.assertNotRegex(r'xml2rfc', error)
self.assertNotRegex(r'Error:', error)
for warning in log['warnings']:
self.assertNotRegex(r'xml2rfc', warning)
self.assertNotRegex(r'Warning:', warning)
for bare_unicde in log['bare_unicode']:
self.assertIn('Found non-ascii characters', bare_unicde)

def test_get_errors_valid(self):
filename = ''.join([TEMPORARY_DATA_DIR, TEST_XML_DRAFT])
Expand Down
23 changes: 19 additions & 4 deletions tests/test_utils_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from werkzeug.datastructures import FileStorage

from at.utils.validation import (
convert_v2v3, idnits, svgcheck, validate_draft, validate_xml,
xml2rfc_validation)
convert_v2v3, get_non_ascii_chars, idnits, svgcheck, validate_draft,
validate_xml, xml2rfc_validation)

TEST_DATA_DIR = './tests/data/'
TEST_XML_DRAFT = 'draft-smoke-signals-00.xml'
Expand Down Expand Up @@ -46,16 +46,19 @@ def test_validate_xml_valid_xml(self):
self.assertIn('errors', log.keys())
self.assertIn('warnings', log.keys())
self.assertIn('idnits', log.keys())
self.assertIn('bare_unicode', log.keys())
self.assertEqual(len(log['errors']), 0)
self.assertGreaterEqual(len(log['warnings']), 0)
self.assertGreater(len(log['idnits']), 0)
self.assertGreaterEqual(len(log['warnings']), 0)
self.assertGreaterEqual(len(log['bare_unicode']), 0)

def test_validate_xml_invalid_xml(self):
log = validate_xml(''.join([TEMPORARY_DATA_DIR, TEST_XML_INVALID]))

self.assertIn('errors', log.keys())
self.assertIn('warnings', log.keys())
self.assertIn('idnits', log.keys())
self.assertIn('bare_unicode', log.keys())
self.assertGreater(len(log['errors']), 0)
self.assertGreater(len(log['idnits']), 0)
self.assertGreater(len(log['idnits']), 0)
Expand Down Expand Up @@ -174,9 +177,13 @@ def test_validate_draft(self):
self.assertIn('errors', log.keys())
self.assertIn('warnings', log.keys())
self.assertIn('idnits', log.keys())
self.assertIn('bare_unicode', log.keys())
self.assertIn('non_ascii', log.keys())
self.assertEqual(len(log['errors']), 0)
self.assertGreaterEqual(len(log['warnings']), 0)
self.assertGreater(len(log['idnits']), 0)
self.assertGreaterEqual(len(log['warnings']), 0)
self.assertGreaterEqual(len(log['bare_unicode']), 0)
self.assertGreaterEqual(len(log['non_ascii']), 0)

def test_validate_draft_text(self):
with open(''.join([TEST_DATA_DIR, TEST_TEXT_DRAFT]), 'rb') as file:
Expand All @@ -186,4 +193,12 @@ def test_validate_draft_text(self):
self.assertNotIn('errors', log.keys())
self.assertNotIn('warnings', log.keys())
self.assertIn('idnits', log.keys())
self.assertIn('non_ascii', log.keys())
self.assertGreater(len(log['idnits']), 0)
self.assertGreaterEqual(len(log['non_ascii']), 0)

def test_get_non_ascii_chars(self):
log = get_non_ascii_chars(''.join([TEMPORARY_DATA_DIR,
TEST_XML_DRAFT]))

self.assertIn('Sinhala', log)

0 comments on commit df936f1

Please sign in to comment.