From f3f351fed310e56592f5621eb3a716d93575b89e Mon Sep 17 00:00:00 2001
From: Robert M Ochshorn <rmo@NUMM.ORG>
Date: Fri, 27 May 2016 00:58:50 +0200
Subject: [PATCH] remove sharded_transcribe and simplify POST requirements

---
 examples/sharded_transcribe.py | 60 ----------------------------------
 serve.py                       | 10 ++----
 2 files changed, 3 insertions(+), 67 deletions(-)
 delete mode 100644 examples/sharded_transcribe.py

diff --git a/examples/sharded_transcribe.py b/examples/sharded_transcribe.py
deleted file mode 100644
index 453defe1..00000000
--- a/examples/sharded_transcribe.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import gentle.standard_kaldi as standard_kaldi
-import numm3
-from Queue import Queue
-from multiprocessing.pool import ThreadPool as Pool
-import json
-import math
-import sys
-
-AUDIOPATH = sys.argv[1]
-JSON_OUT  = sys.argv[2]
-
-N_THREADS = 4
-T_PER_CHUNK = 10
-OVERLAP_T = 2
-
-kaldi_queue = Queue()
-for i in range(N_THREADS):
-    kaldi_queue.put(standard_kaldi.Kaldi())
-
-# Preload entire audio
-audiobuf  = numm3.sound2np(AUDIOPATH, R=8000, nchannels=1)
-n_chunks = int(math.ceil(len(audiobuf) / (8000.0 * (T_PER_CHUNK-OVERLAP_T))))
-
-print 'sharding into %d chunks' % (n_chunks)
-
-chunks = []                   # (idx, [words])
-
-def transcribe_chunk(idx):
-    st = idx * (T_PER_CHUNK-OVERLAP_T) * 8000
-    end= st + T_PER_CHUNK * 8000
-
-    buf = audiobuf[st:end]
-    print buf.shape
-
-    k = kaldi_queue.get()
-
-    # # Break into 2s chunks
-    # n_buf_chunks = int(buf.shape[0] / 16000.0
-
-    k.push_chunk(buf.tostring())
-    
-    ret = k.get_final()
-    print ' '.join([X['word'] for X in ret])
-    k.reset()
-
-    chunks.append({"start": idx*(T_PER_CHUNK-OVERLAP_T), "words": ret})
-
-    print '%d chunks (of %d)' % (len(chunks), n_chunks)
-
-    kaldi_queue.put(k)
-
-    
-pool = Pool(N_THREADS)
-pool.map(transcribe_chunk, range(n_chunks))
-pool.close()
-pool.join()
-
-chunks.sort(key=lambda x: x['start'])
-
-json.dump(chunks, open(JSON_OUT, 'w'), indent=2)
diff --git a/serve.py b/serve.py
index e574b3db..119442a1 100644
--- a/serve.py
+++ b/serve.py
@@ -6,18 +6,14 @@
 
 import json
 import logging
-import math
 import multiprocessing
-from multiprocessing.pool import ThreadPool as Pool
 import os
 from Queue import Queue
 import shutil
-import subprocess
-import sys
 import uuid
 import wave
 
-from gentle.paths import get_binary, get_resource, get_datadir
+from gentle.paths import get_resource, get_datadir
 from gentle.transcription import to_csv, MultiThreadedTranscriber
 from gentle.cyst import Insist
 from gentle.ffmpeg import to_wav
@@ -206,7 +202,7 @@ def getChild(self, uid, req):
     def render_POST(self, req):
         uid = self.transcriber.next_id()
 
-        tran = req.args['transcript'][0]
+        tran = req.args.get('transcript', [''])[0]
         audio = req.args['audio'][0]
 
         async = True
@@ -230,7 +226,7 @@ def render_POST(self, req):
         if not async:
             def write_result(result):
                 '''Write JSON to client on completion'''
-                req.headers["Content-Type"] = "application/json"
+                req.setHeader("Content-Type", "application/json")
                 req.write(json.dumps(result, indent=2))
                 req.finish()
             result_promise.addCallback(write_result)