prepare_dataset_Spectrogram.py

import librosa
import os
import json
from python_speech_features import logfbank

dataset_path = 'dataset'
json_path = 'data.json'
SAMPLES_TO_CONSIDER = 22050 # Samples per second


def preprocess_dataset(dataset_path, json_path, n_fft=2048, hop_length=512):
    """Extracts MFCCs from music dataset and saves them into a json file.

    :param dataset_path (str): Path to dataset
    :param json_path (str): Path to json file used to save MFCCs
    :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
    :param hop_length (int): Sliding window for FFT. Measured in # of samples
    :return:
    """

    # dictionary where we'll store mapping, labels, MFCCs and filenames
    data = {
        "mapping": [],
        "labels": [],
        "Spectograms": [],
        "files": []
    }

    # loop through all sub-dirs
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're at sub-folder level
        if dirpath is not dataset_path:

            # save label (i.e., sub-folder name) in the mapping
            dirpath_components = dirpath.split("/")  # Por ejemplo genre/blues => ["genre", "blues"]
            label = dirpath_components[-1]
            data["mapping"].append(label)
            print("\nProcessing {}".format(label))

            # process all audio files in sub-dir and store MFCCs
            for f in filenames:
                file_path = os.path.join(dirpath, f)

                # load audio file and slice it to ensure length consistency among different files
                signal, sample_rate = librosa.load(file_path)

                # drop audio files with less than pre-decided number of samples
                if len(signal) >= SAMPLES_TO_CONSIDER:

                    # ensure consistency of the length of the signal
                    signal = signal[:SAMPLES_TO_CONSIDER]

                    # extract MFCCs
                    spectrogram = logfbank(signal[:sample_rate], sample_rate, nfilt = 26, nfft = 1103).T

                    # store data for analysed track
                    data["Spectograms"].append(spectrogram.T.tolist())
                    data["labels"].append(i-1)
                    data["files"].append(file_path)
                    print("{}: {}".format(file_path, i-1))

    # save data in json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)


if __name__ == "__main__":
    preprocess_dataset(dataset_path, json_path)