Skip to content

Commit

Permalink
Upkeep/christmas updates: bugfixes and more (#87)
Browse files Browse the repository at this point in the history
* Fix PYIN bugs and failures

* Switch to CMake

* Fix power-of-two FFT/autocorrelation with r2c/c2r transforms

* Remove SWIPE algorithm

* Improve sine waves and add many more unit tests
  • Loading branch information
sevagh authored Dec 27, 2023
1 parent 4d94d7e commit faa04bb
Show file tree
Hide file tree
Showing 55 changed files with 1,356 additions and 976 deletions.
4 changes: 2 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
*
!src
!lib
!cmake
!misc
!include
!test
!degraded_audio_tests
!wav_analyzer
!Makefile
!CMakeLists.txt
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
*.png filter=lfs diff=lfs merge=lfs -text
*.txt filter=lfs diff=lfs merge=lfs -text
misc/samples/*.txt filter=lfs diff=lfs merge=lfs -text
misc/* linguist-documentation
*.wav filter=lfs diff=lfs merge=lfs -text
*.aiff filter=lfs diff=lfs merge=lfs -text
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
lib
.dir-locals.el
/build
84 changes: 84 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
cmake_minimum_required(VERSION 3.10)
project(pitch_detection VERSION 1.0 LANGUAGES CXX)

list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)

# use this for dependency graph generation
#set_property(GLOBAL PROPERTY GRAPHVIZ_EXPORT_TARGETS TRUE)

set(CMAKE_CXX_FLAGS "-Wall -Wextra -ansi -pedantic -fext-numeric-literals -fopenmp")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -flto")

include_directories(include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/test)

find_package(mlpack REQUIRED)
find_package(FFTS REQUIRED)

# Assuming FFTS does not have a built-in find module

# Add the library target
file(GLOB_RECURSE LIB_SOURCES "src/*.cpp")
add_library(pitch_detection SHARED ${LIB_SOURCES})
target_link_libraries(pitch_detection PUBLIC
${MLPACK_LIBRARIES}
${FFTS_LIBRARIES})

find_package(gflags QUIET)
find_package(libnyquist QUIET)
find_package(PkgConfig QUIET)
pkg_search_module(OPUS QUIET opus)
pkg_search_module(WAVPACK QUIET wavpack)

if(gflags_FOUND AND OPUS_FOUND AND WAVPACK_FOUND AND libnyquist_FOUND)
file(GLOB WAV_ANALYZER_SOURCES "wav_analyzer/*.cpp")
add_executable(wav_analyzer ${WAV_ANALYZER_SOURCES})
target_link_libraries(wav_analyzer PRIVATE
pitch_detection
${GFLAGS_LIBRARIES}
${OPUS_LIBRARIES}
${WAVPACK_LIBRARIES}
libnyquist)

target_include_directories(wav_analyzer PRIVATE
${GFLAGS_INCLUDE_DIRS}
${OPUS_INCLUDE_DIRS}
${WAVPACK_INCLUDE_DIRS}
${LIBNYQUIST_INCLUDE_DIRS})

link_directories(
${GFLAGS_LIBRARY_DIRS}
${OPUS_LIBRARY_DIRS}
${WAVPACK_LIBRARY_DIRS}
${LIBYNQUIST_LIBRARY_DIRS})
endif()

include(CTest)
find_package(GTest QUIET)
find_package(opus QUIET)
find_package(lib QUIET)
if(GTEST_FOUND)
enable_testing()
file(GLOB TEST_SOURCES "test/test*.cpp" "test/util.cpp")
add_executable(pitch_tests ${TEST_SOURCES})
target_link_libraries(pitch_tests PRIVATE
pitch_detection
GTest::GTest
GTest::Main)
endif()

find_package(benchmark QUIET)
if(benchmark_FOUND)
file(GLOB BENCH_SOURCES "test/bench.cpp" "test/util.cpp")
add_executable(pitch_bench ${BENCH_SOURCES})
target_link_libraries(pitch_bench PRIVATE
pitch_detection
benchmark::benchmark)
endif()

install(TARGETS pitch_detection LIBRARY DESTINATION lib)
install(FILES include/pitch_detection.h DESTINATION include)
4 changes: 1 addition & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ git \
cmake \
gcc \
g++ \
libblas-dev \
liblapack-dev \
libboost-dev \
libarmadillo-dev \
libmlpack-dev \
Expand All @@ -29,6 +27,6 @@ RUN cd /usr/src \
&& make install

# Build and install the pitch-detection library, as well as the tests and benchmarks
RUN cd /usr/src/pitch-detection && make clean all && make -C test clean all && make install
RUN cd /usr/src/pitch-detection && cmake -S . -B build -DCMAKE_BUILD_TYPE=Release && cmake --build "build"

LABEL Name=pitch-detection Version=0.0.1
31 changes: 0 additions & 31 deletions Makefile

This file was deleted.

138 changes: 83 additions & 55 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,94 +1,123 @@
### Pitch detection algorithms
# pitch-detection

Autocorrelation-based C++ pitch detection algorithms with **O(nlogn) or lower** running time:

* McLeod pitch method - [2005 paper](http://miracle.otago.ac.nz/tartini/papers/A_Smarter_Way_to_Find_Pitch.pdf) - [visualization](./misc/mcleod)
* YIN(-FFT) - [2002 paper](http://audition.ens.fr/adc/pdf/2002_JASA_YIN.pdf) - [visualization](./misc/yin)
* Probabilistic YIN - [2014 paper](https://www.eecs.qmul.ac.uk/~simond/pub/2014/MauchDixon-PYIN-ICASSP2014.pdf)
* Probabilistic MPM - [my own invention](./misc/probabilistic-mcleod)
* SWIPE' - [2007 paper](https://pdfs.semanticscholar.org/0fd2/6e267cfa9b6d519967ea00db4ffeac272777.pdf) - [transliterated to C++ from kylebgorman's C implementation](https://github.com/kylebgorman/swipe)\*, \*\*

\*: SWIPE' appears to be O(n) but with an enormous constant factor. The implementation complexity is much higher than MPM and YIN and it brings in additional dependencies (BLAS + LAPACK).
The size of the FFT used is the same as the size of the input waveform, such that the output is a single pitch for the entire waveform.

\*\*: There's a parallel version of SWIPE, [Aud-SWIPE-P](https://github.com/saul-calderonramirez/Aud-SWIPE-P).
Librosa (among other libraries) uses the STFT to create _frames_ of the input waveform, and applies pitch tracking to each frame with a fixed FFT size (typically 2048 or some other power of two). If you want to track the temporal evolution of pitches in sub-sections of the waveform, you have to handle the waveform splitting yourself (look at [wav_analyzer](./wav_analyzer/wav_analyzer.cpp) for more details).

Suggested usage of this library can be seen in the utility [wav_analyzer](./wav_analyzer), which divides a wav file into chunks of 0.01s and checks the pitch of each chunk. Sample output of wav_analyzer:
## :postal_horn: Latest news :newspaper:

Dec 27, 2023 :santa: release:
* Removed SWIPE' algorithm
* It is not based on autocorrelation, I skipped it in all of the tests, and my implementation was basically copy-pasted from [kylebgorman/swipe](https://github.com/kylebgorman/swipe): just use their code instead!
* Fix autocorrelation (in YIN and MPM) for power-of-two sizes in FFTS (see [ffts issue #65](https://github.com/anthonix/ffts/issues/65)) by using r2c/c2r transforms (addresses [bug #72](https://github.com/sevagh/pitch-detection/issues/72) reported by jeychenne)
* Fix PYIN bugs to pass all test cases (addresses jansommer's comments in [pull-request #84](https://github.com/sevagh/pitch-detection/pull/84#issuecomment-1843623594))
* Added many more unit tests, all passing (228/228)

## Other programming languages

* Go: [Go implementation of YIN](./misc/yin) in this repo (for tutorial purposes)
* Rust: [Rust implementation of MPM](./misc/mcleod) in this repo (for tutorial purposes)
* Python: [transcribe](https://github.com/sevagh/transcribe) is a Python version of MPM for a proof-of-concept of primitive pitch transcription
* Javascript (WebAssembly): [pitchlite](https://github.com/sevagh/pitchlite) has WASM modules of MPM/YIN running at realtime speeds in the browser, and also introduces sub-chunk detection to return the overall pitch of the chunk and the temporal sub-sequence of pitches within the chunk

## Usage

Suggested usage of this library can be seen in the utility [wav_analyzer](./wav_analyzer) which divides a wav file into chunks of 0.01s and checks the pitch of each chunk. Sample output of wav_analyzer:

```
At t: 0.5
mpm: 162.529
yin: 162.543
swipe: 162.183
pmpm: 162.529
pyin: 162.543
std::vector<float> chunk; // chunk of audio
float pitch_mpm = pitch::mpm(chunk, sample_rate);
float pitch_yin = pitch::yin(chunk, sample_rate);
```

### Degraded audio tests
## Tests

All testing files are [here](./degraded_audio_tests) - the progressive degradations are described by the respective numbered JSON file, generated using [audio-degradation-toolbox](https://github.com/sevagh/audio-degradation-toolbox). The original clip is a Viola playing E3 from the [University of Iowa MIS](http://theremin.music.uiowa.edu/MIS.html).
### Unit tests

The results come from parsing the output of wav_analyzer to count how many 0.1s slices of the input clip were in the ballpark of the expected value of 164.81 - I considered anything 160-169 to be acceptable:
There are unit tests that use sinewaves (both generated with `std::sin` and with [librosa.tone](https://librosa.org/doc/main/generated/librosa.tone.html)), and instrument tests using txt files containing waveform samples from the [University of Iowa MIS](http://theremin.music.uiowa.edu/MIS.html) recordings:
```
$ ./build/pitch_tests
Running main() from ./googletest/src/gtest_main.cc
[==========] Running 228 tests from 22 test suites.
[----------] Global test environment set-up.
[----------] 2 tests from MpmSinewaveTestManualAllocFloat
[ RUN ] MpmSinewaveTestManualAllocFloat.OneAllocMultipleFreqFromFile
[ OK ] MpmSinewaveTestManualAllocFloat.OneAllocMultipleFreqFromFile (38 ms)
...
[----------] 5 tests from YinInstrumentTestFloat
...
[ RUN ] YinInstrumentTestFloat.Acoustic_E2_44100
[ OK ] YinInstrumentTestFloat.Acoustic_E2_44100 (1 ms)
[ RUN ] YinInstrumentTestFloat.Classical_FSharp4_48000
[ OK ] YinInstrumentTestFloat.Classical_FSharp4_48000 (58 ms)
[----------] 5 tests from YinInstrumentTestFloat (174 ms total)
...
[----------] 5 tests from MpmInstrumentTestFloat
[ RUN ] MpmInstrumentTestFloat.Violin_A4_44100
[ OK ] MpmInstrumentTestFloat.Violin_A4_44100 (61 ms)
[ RUN ] MpmInstrumentTestFloat.Piano_B4_44100
[ OK ] MpmInstrumentTestFloat.Piano_B4_44100 (24 ms)
...
[==========] 228 tests from 22 test suites ran. (2095 ms total)
[ PASSED ] 228 tests.
```

| Degradation level | MPM # correct | YIN # correct | SWIPE' # correct |
| ------------- | ------------- | ------------- | ------------- |
| 0 | 26 | 22 | 5 |
| 1 | 23 | 21 | 13 |
| 2 | 19 | 21 | 9 |
| 3 | 18 | 19 | 7 |
| 4 | 19 | 19 | 6 |
| 5 | 18 | 19 | 5 |
### Degraded audio tests

### Build and install
All testing files are [here](./misc/degraded_audio_tests) - the progressive degradations are described by the respective numbered JSON file, generated using [audio-degradation-toolbox](https://github.com/sevagh/audio-degradation-toolbox). The original clip is a Viola playing E3 from the [University of Iowa MIS](http://theremin.music.uiowa.edu/MIS.html). The results come from parsing the output of wav_analyzer to count how many 0.1s slices of the input clip were in the ballpark of the expected value of 164.81 - I considered anything 160-169 to be acceptable:

Using this project should be as easy as `make && sudo make install` on Linux with a modern GCC - I don't officially support other platforms.
| Degradation level | MPM # correct | YIN # correct |
| ------------- | ------------- | ------------- |
| 0 | 26 | 22 |
| 1 | 23 | 21 |
| 2 | 19 | 21 |
| 3 | 18 | 19 |
| 4 | 19 | 19 |
| 5 | 18 | 19 |

This project depends on [ffts](https://github.com/anthonix/ffts), BLAS/LAPACK, and mlpack. To run the tests, you need [googletest](https://github.com/google/googletest), and run `make -C test/ && ./test/test`. To run the bench, you need [google benchmark](https://github.com/google/benchmark), and run `make -C test/ bench && ./test/bench`.
## Build and install

Build and install pitch_detection, run the tests, and build the sample application, wav_analyzer:
You need Linux, cmake, and gcc (I don't officially support other platforms). The library depends on [ffts](https://github.com/anthonix/ffts) and [mlpack](https://www.mlpack.org/). The tests depend on [libnyquist](https://github.com/ddiakopoulos/libnyquist), [googletest](https://github.com/google/googletest), and [google benchmark](https://github.com/google/benchmark). Dependency graph:
![dep-graph](./misc/deps.png)

Build and install with cmake:
```bash
# build libpitch_detection.so
make clean all
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
cmake --build "build"

# build tests and benches
make -C test clean all
# install to your system
cd build && make install

# run tests and benches
./test/test
./test/bench

# install the library and headers to `/usr/local/lib` and `/usr/local/include`
sudo make install
./build/pitch_tests
./build/pitch_bench

# build and run C++ sample
make -C wav_analyzer clean all
./wav_analyzer/wav_analyzer
# run wav_analyzer
./build/wav_analyzer
```

#### Docker

To simplify the setup, there's a [Dockerfile](./Dockerfile) that sets up a Ubuntu container with all the dependencies for compiling the library and running the included tests and benchmarks. You can build the image or pull it from DockerHub ([esimkowitz/pitchdetection](https://hub.docker.com/repository/docker/esimkowitz/pitchdetection)):
### Docker

To simplify the setup, there's a [Dockerfile](./Dockerfile) that sets up a Ubuntu container with all the dependencies for compiling the library and running the included tests and benchmarks:
```bash
# build
$ docker build --rm --pull -f "Dockerfile" -t pitchdetection:latest "."
$ docker run --rm --init -it pitchdetection:latest

# pull
$ docker pull esimkowitz/pitchdetection:latest
$ docker run --rm --init -it esimkowitz/pitchdetection:latest
```

Once you're in the container, run the tests and benches:

```bash
./test/test
./test/bench
```
**n.b.** You can pull the [esimkowitz/pitchdetection](https://hub.docker.com/repository/docker/esimkowitz/pitchdetection) image from DockerHub, but I can't promise that it's up-to-date.

### Usage
## Detailed usage

Read the [header](./include/pitch_detection.h) and [sample wav_analyzer](./wav_analyzer).
Read the [header](./include/pitch_detection.h) and the example [wav_analyzer program](./wav_analyzer).

The namespaces are `pitch` and `pitch_alloc`. The functions and classes are templated for `<double>` and `<float>` support.

Expand All @@ -103,7 +132,6 @@ double pitch_yin = pitch::yin<double>(audio_buffer, 48000);
double pitch_mpm = pitch::mpm<double>(audio_buffer, 48000);
double pitch_pyin = pitch::pyin<double>(audio_buffer, 48000);
double pitch_pmpm = pitch::pmpm<double>(audio_buffer, 48000);
double pitch_swipe = pitch::swipe<double>(audio_buffer, 48000);

pitch_alloc::Mpm<double> ma(8192);
pitch_alloc::Yin<double> ya(8192);
Expand Down
18 changes: 18 additions & 0 deletions cmake/FindFFTS.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# FindFFTS.cmake

# Try to find FFTS
# Once done, this will define
# FFTS_FOUND - System has FFTS
# FFTS_INCLUDE_DIRS - The FFTS include directories
# FFTS_LIBRARIES - The libraries needed to use FFTS

find_path(FFTS_INCLUDE_DIR NAMES ffts/ffts.h)
find_library(FFTS_LIBRARY NAMES ffts)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(FFTS DEFAULT_MSG FFTS_LIBRARY FFTS_INCLUDE_DIR)

if(FFTS_FOUND)
set(FFTS_INCLUDE_DIRS ${FFTS_INCLUDE_DIR})
set(FFTS_LIBRARIES ${FFTS_LIBRARY})
endif()
Loading

0 comments on commit faa04bb

Please sign in to comment.