conda create -n emo python=3.8
conda activate emo
pip install -r requirements.txt
python env.py
Download the model by this link, and then put them into /chinese-roberta-wwm-ext
Collet the data by this
Use this code to complete the following preprocessing:
- Change the audio to single channel, sampling rate to 22050, format to wav.
- Merge and slice the audio into 10s segments.
- Use ASR technology to recognize text in speech.
- Store the audio, emotion and text in 3 folders with corresponding file names.
# The audio path and corresponding text and emotion are stored and divided into training set and validation set.
python getdata.py
python split.py
cd monotonic_align
python setup.py build_ext --inplace
cd ..
python train.py -c path/to/json -m model
python infer.py