Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSONSchema compliance for namespaces #213

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions jams/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,3 @@

from .core import *
from .nsconvert import convert
from .schema import list_namespaces


# Populate the namespace mapping
for _ in util.find_with_extension(resource_filename(__name__, schema.NS_SCHEMA_DIR),
'json'):
schema.add_namespace(_)

# Populate local namespaces

try:
for _ in util.find_with_extension(os.environ['JAMS_SCHEMA_DIR'], 'json'):
schema.add_namespace(_)
except KeyError:
pass
244 changes: 165 additions & 79 deletions jams/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
Sandbox
JObject
Observation
import_lab
"""

import json
Expand All @@ -41,6 +42,8 @@
import contextlib
import gzip
import six
import math
import itertools

import numpy as np
import pandas as pd
Expand All @@ -57,7 +60,7 @@
'JObject', 'Sandbox',
'Annotation', 'Curator', 'AnnotationMetadata',
'FileMetadata', 'AnnotationArray', 'JAMS',
'Observation']
'Observation', 'import_lab']


def deprecated(version, version_removed):
Expand Down Expand Up @@ -672,6 +675,27 @@ def _display_properties(self):
('data', 'Data'),
('sandbox', 'Sandbox')]

def _combine_observations(self):
"""
Combines all observations in the data to a single pair of arrays, one
containing all times, one containing all values.

Returns
-------
times: list(float)
A flat list of all times in all observations.
vals: list(...)
A flat list of all values in all observations.
"""
try:
times = list(itertools.chain(*[obs.time for obs in self.data]))
# If the time was an iterable, it's okay combine all values in the same way.
vals = list(itertools.chain(*[obs.value for obs in self.data]))
except TypeError:
times = [obs.time for obs in self.data]
vals = [obs.value for obs in self.data]
return times, vals

def append(self, time=None, duration=None, value=None, confidence=None):
'''Append an observation to the data field

Expand All @@ -692,9 +716,35 @@ def append(self, time=None, duration=None, value=None, confidence=None):
>>> ann = jams.Annotation(namespace='chord')
>>> ann.append(time=3, duration=2, value='E#')
'''
# NOTE [matthew.mccallum 01.02.21]: Currently we check if time and duration
# are lists to determine if we need to handle them differently. This type
# specific handling should not be required once we move on to phase 3 of
# the issue here:
# https://github.com/marl/jams/issues/208
# That is, the different behaviour based on the type of observation should
# be handled in the observation type that is assigned to each annotation,
# rather than this being mixed in with the Annotation itself and it being
# the Annotation's responsibility to know how to handle each observation type.

try:
# Convert any iterables into lists
time = [float(t) for t in time]
duration = [float(d) for d in duration]
if value is None:
value = [None]*len(time)
else:
value = [v for v in value]
if confidence is None:
confidence = [None]*len(time)
else:
confidence = [c for c in confidence]

except TypeError:
time = float(time)
duration = float(duration)

self.data.add(Observation(time=float(time),
duration=float(duration),
self.data.add(Observation(time=time,
duration=duration,
value=value,
confidence=confidence))

Expand Down Expand Up @@ -733,55 +783,6 @@ def append_columns(self, columns):
columns['value'],
columns['confidence'])])

def validate(self, strict=True):
'''Validate this annotation object against the JAMS schema,
and its data against the namespace schema.

Parameters
----------
strict : bool
If `True`, then schema violations will cause an Exception.
If `False`, then schema violations will issue a warning.

Returns
-------
valid : bool
`True` if the object conforms to schema.
`False` if the object fails to conform to schema,
but `strict == False`.

Raises
------
SchemaError
If `strict == True` and the object fails validation

See Also
--------
JObject.validate
'''

# Get the schema for this annotation
ann_schema = schema.namespace_array(self.namespace)

valid = True

try:
schema.VALIDATOR.validate(self.__json_light__(data=False),
schema.JAMS_SCHEMA)

# validate each record in the frame
data_ser = [serialize_obj(obs) for obs in self.data]
schema.VALIDATOR.validate(data_ser, ann_schema)

except jsonschema.ValidationError as invalid:
if strict:
raise SchemaError(str(invalid))
else:
warnings.warn(str(invalid))
valid = False

return valid

def trim(self, start_time, end_time, strict=False):
'''
Trim the annotation and return as a new `Annotation` object.
Expand Down Expand Up @@ -1077,15 +1078,18 @@ def to_interval_values(self):
List view of value field.
'''

ints, vals = [], []
for obs in self.data:
ints.append([obs.time, obs.time + obs.duration])
vals.append(obs.value)
times, vals = self._combine_observations()
try:
durs = list(itertools.chain(*[obs.duration for obs in self.data]))
except TypeError:
durs = [obs.duration for obs in self.data]

intervals = [(t, t+d) for t, d in zip(times, durs)]

if not ints:
if not len(intervals):
return np.empty(shape=(0, 2), dtype=float), []

return np.array(ints), vals
return np.array(intervals), vals

def to_event_values(self):
'''Extract observation data in a `mir_eval`-friendly format.
Expand All @@ -1098,12 +1102,8 @@ def to_event_values(self):
labels : list
List view of value field.
'''
ints, vals = [], []
for obs in self.data:
ints.append(obs.time)
vals.append(obs.value)

return np.array(ints), vals
times, vals = self._combine_observations()
return np.array(times), vals

def to_dataframe(self):
'''Convert this annotation to a pandas dataframe.
Expand Down Expand Up @@ -1288,19 +1288,7 @@ def __json_light__(self, data=True):
@property
def __json_data__(self):
r"""JSON-serialize the observation sequence."""
if schema.is_dense(self.namespace):
dense_records = dict()
for field in Observation._fields:
dense_records[field] = []

for obs in self.data:
for key, val in six.iteritems(obs._asdict()):
dense_records[key].append(serialize_obj(val))

return dense_records

else:
return [serialize_obj(_) for _ in self.data]
return [serialize_obj(_) for _ in self.data]

@classmethod
def _key(cls, obs):
Expand Down Expand Up @@ -1806,7 +1794,7 @@ def validate(self, strict=True):
'''
valid = True
try:
schema.VALIDATOR.validate(self.__json_light__, schema.JAMS_SCHEMA)
schema.VALIDATOR.validate(self.__json_light__, self.__schema__)

for ann in self.annotations:
if isinstance(ann, Annotation):
Expand Down Expand Up @@ -2087,18 +2075,24 @@ def serialize_obj(obj):

'''

if isinstance(obj, np.bool_):
return bool(obj)

if isinstance(obj, np.integer):
return int(obj)

elif isinstance(obj, np.floating):
return float(obj)

elif isinstance(obj, np.ndarray):
return obj.tolist()
return [serialize_obj(x) for x in obj.tolist()]

elif isinstance(obj, list):
return [serialize_obj(x) for x in obj]

elif isinstance(obj, dict):
return {k: serialize_obj(v) for k, v in six.iteritems(obj)}

elif isinstance(obj, Observation):
return {k: serialize_obj(v) for k, v in six.iteritems(obj._asdict())}

Expand Down Expand Up @@ -2166,3 +2160,95 @@ def _get_divid(obj):
global __DIVID_COUNT__
__DIVID_COUNT__ += 1
return '{}-{}'.format(id(obj), __DIVID_COUNT__)


def import_lab(namespace, filename, infer_duration=True, **parse_options):
r'''Load a .lab file as an Annotation object.

.lab files are assumed to have the following format:

``TIME_START\tTIME_END\tANNOTATION``

By default, .lab files are assumed to have columns separated by one
or more white-space characters, and have no header or index column
information.

If the .lab file contains only two columns, then an empty duration
field is inferred.

If the .lab file contains more than three columns, each row's
annotation value is assigned the contents of last non-empty column.


Parameters
----------
namespace : str
The namespace for the new annotation

filename : str
Path to the .lab file

infer_duration : bool
If `True`, interval durations are inferred from `(start, end)` columns,
or difference between successive times.

If `False`, interval durations are assumed to be explicitly coded as
`(start, duration)` columns. If only one time column is given, then
durations are set to 0.

For instantaneous event annotations (e.g., beats or onsets), this
should be set to `False`.

parse_options : additional keyword arguments
Passed to ``pandas.DataFrame.read_csv``

Returns
-------
annotation : Annotation
The newly constructed annotation object

See Also
--------
pandas.DataFrame.read_csv
'''

# Create a new annotation object
annotation = Annotation(namespace)

parse_options.setdefault('sep', r'\s+')
parse_options.setdefault('engine', 'python')
parse_options.setdefault('header', None)
parse_options.setdefault('index_col', False)

# This is a hack to handle potentially ragged .lab data
parse_options.setdefault('names', range(20))

data = pd.read_csv(filename, **parse_options)

# Drop all-nan columns
data = data.dropna(how='all', axis=1)

# Do we need to add a duration column?
# This only applies to event annotations
if len(data.columns) == 2:
# Insert a column of zeros after the timing
data.insert(1, 'duration', 0)
if infer_duration:
data['duration'][:-1] = data.loc[:, 0].diff()[1:].values

else:
# Convert from time to duration
if infer_duration:
data.loc[:, 1] -= data[0]

for row in data.itertuples():
time, duration = row[1:3]

value = [x for x in row[3:] if x is not None][-1]

annotation.append(time=time,
duration=duration,
confidence=1.0,
value=value)

return annotation
16 changes: 9 additions & 7 deletions jams/nsconvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,12 @@ def pitch_hz_to_contour(annotation):
data = annotation.pop_data()

for obs in data:
annotation.append(time=obs.time, duration=obs.duration,
annotation.append(time=obs.time,
duration=obs.duration,
confidence=obs.confidence,
value=dict(index=0,
frequency=np.abs(obs.value),
voiced=obs.value > 0))
value=[dict(index=0,
frequency=np.abs(v),
voiced=v > 0) for v in obs.value])
return annotation


Expand Down Expand Up @@ -200,9 +201,10 @@ def pitch_midi_to_hz(annotation):
data = annotation.pop_data()

for obs in data:
annotation.append(time=obs.time, duration=obs.duration,
annotation.append(time=obs.time,
duration=obs.duration,
confidence=obs.confidence,
value=440 * (2.0**((obs.value - 69.0)/12.0)))
value=[440 * (2.0**((v - 69.0)/12.0)) for v in obs.value])

return annotation

Expand All @@ -218,7 +220,7 @@ def pitch_hz_to_midi(annotation):
for obs in data:
annotation.append(time=obs.time, duration=obs.duration,
confidence=obs.confidence,
value=12 * (np.log2(obs.value) - np.log2(440.0)) + 69)
value=[12 * (np.log2(v) - np.log2(440.0)) + 69 for v in obs.value])
return annotation


Expand Down
Loading