Skip to content

Commit

Permalink
Added code
Browse files Browse the repository at this point in the history
  • Loading branch information
mryndzionek committed Jul 29, 2024
1 parent 0e7eeec commit 1b18b19
Show file tree
Hide file tree
Showing 8 changed files with 3,021 additions and 12 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: build

on:
push:
branches:
- build
jobs:
build:

env:
BUILD_TYPE: Release

runs-on: ubuntu-latest

steps:

- name: Prepare
run: |
sudo apt-get update
- name: Checkout
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

- name: Build
run: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
make
- uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b
with:
name: binaries
path: |
build/kws_cli
12 changes: 1 addition & 11 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1 @@
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
build/
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.16.3)

project(kws_cli)

set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s")

include_directories(include)
set(LIBS m)

add_compile_options(-Wno-deprecated-declarations
-Wall -Werror -fPIC)

add_executable(kws_cli src/kws_cli.c src/fbank.c)
target_link_libraries(kws_cli ${LIBS})


52 changes: 51 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,51 @@
# kws_cli
# kws_cli

[![build](https://github.com/mryndzionek/kws_cli/actions/workflows/build.yml/badge.svg)](https://github.com/mryndzionek/kws_cli/actions/workflows/build.yml)

## About

Speech recognition in ~300kB of code.

Small footprint, standalone, zero dependency, offline
keyword spotting (KWS) CLI tool. Might be useful in
some automation task. Accepts audio on stdin a and recognizes
following words: `up`, `down`, `left`, `right`, `stop`.

Here is an example invocation:

```
rec -q -t raw -c1 -e signed -b 16 -r16k - | ./kws_cli
```

Make sure you have decent microphone and the system audio
is on a decent level.

Individual WAV files can piped (e.g. for testing) using:

```
sox -S ../untitled.wav -t raw -c1 -e signed -b 16 -r16k - | ./kws_cli
```

## More details

Speech recognition is based on [this](https://github.com/microsoft/EdgeML/blob/master/docs/publications/Sha-RNN.pdf)
model and examples from the same repository.
This simple model with three layers: 2x LSTM + 1x fully connected.
The model is trained in PyTorch and exported to ONNX.
Then [onnx2c](https://github.com/kraiskil/onnx2c)
is used to convert the model to a bunch of C code.
The LSTM layers had become mainstream in recent years and are well
supported in different frameworks. The model is small, so it might
be possible to run it on Cortex-M4/M7, or ESP32 (future work).

## Building

The usual CMake routine:

```
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release
make
```

14 changes: 14 additions & 0 deletions include/fbank.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef __FBANK_H__
#define __FBANK_H__

#define SAMPLE_LEN (16000UL)
#define NUM_FRAMES (99UL)
#define NUM_FILT (32UL)

void fbank(float input[SAMPLE_LEN], float output[NUM_FRAMES][NUM_FILT]);
void fbank_norm(float inputoutput[NUM_FILT]);
void fbank_speech_detect(float input[NUM_FRAMES][NUM_FILT], size_t *label, float *logit);
void fbank_print_min_max(float input[NUM_FRAMES][NUM_FILT]);
char const *const fbank_label_idx_to_str(size_t label);

#endif // __FBANK_H__
1,372 changes: 1,372 additions & 0 deletions include/speech_nn.h

Large diffs are not rendered by default.

1,454 changes: 1,454 additions & 0 deletions src/fbank.c

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions src/kws_cli.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>

#include <signal.h>
#include <string.h>

#include "fbank.h"

#define CHUNK_SIZE (SAMPLE_LEN / 2)

static volatile bool byebye = false;

static void INThandler(int sig)
{
byebye = true;
}

int main(int argc, char *argv[])
{
int16_t in[CHUNK_SIZE] = {0.0};
float input[SAMPLE_LEN] = {0.0};
float fbins[NUM_FRAMES][NUM_FILT];
size_t n = 0;
size_t label;
float logit;
bool debounce_active = false;

struct sigaction sa = {.sa_handler = INThandler, .sa_flags = 0};
sigaction(SIGINT, &sa, 0);

fprintf(stderr, "\nStarting...\n");

while (!byebye)
{
size_t m = fread(&in[n], sizeof(int16_t), CHUNK_SIZE - n, stdin);
n += m;

if (m == 0)
{
break;
}

if (n == CHUNK_SIZE)
{
if (CHUNK_SIZE < SAMPLE_LEN)
{
memmove(input, &input[CHUNK_SIZE], (SAMPLE_LEN - CHUNK_SIZE) * sizeof(float));
}

for (size_t i = 0; i < CHUNK_SIZE; i++)
{
input[SAMPLE_LEN - CHUNK_SIZE + i] = in[i];
}

fbank(input, fbins);
fbank_speech_detect(fbins, &label, &logit);
if (debounce_active)
{
if (label == 0)
{
debounce_active = false;
}
}
else
{
if (label > 0)
{
fprintf(stderr, "label: '%s', label_idx: %ld, logit: %f\n", fbank_label_idx_to_str(label), label, logit);
fflush(stderr);
debounce_active = true;
}
}
n = 0;
}
}
}

0 comments on commit 1b18b19

Please sign in to comment.