Merge pull request #5 from danthelion/infra_updates

Python3.7 base & singular input paramter
danthelion · Aug 26, 2019 · 700714b · 700714b
2 parents ce42023 + 803fc03
commit 700714b
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 47 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,25 +1,23 @@
-FROM python:3.6
+FROM python:3.7
 
 MAINTAINER Daniel Palma <danivgy@gmail.com>
 
 RUN apt-get update && apt-get install -y \
-    build-essential python-dev libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr \
+    build-essential python-dev libxml2-dev libxslt1-dev antiword unrtf poppler-utils tesseract-ocr \
     flac lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig libpulse-dev
 
-RUN echo "deb http://www.deb-multimedia.org jessie main non-free"  >> /etc/apt/sources.list \
-    && echo "deb-src http://www.deb-multimedia.org jessie main non-free" >> /etc/apt/sources.list \
-    && apt-get update \
-    && apt-get install -y --force-yes deb-multimedia-keyring \
-    && apt-get install -y --force-yes --no-install-recommends ffmpeg=10:2.6.9-dmo1
+RUN echo "deb http://www.deb-multimedia.org buster main non-free"  >> /etc/apt/sources.list \
+    && apt-get update -oAcquire::AllowInsecureRepositories=true \
+    && apt-get install -y --allow-downgrades --allow-remove-essential --allow-change-held-packages \
+    --allow-unauthenticated deb-multimedia-keyring \
+    && apt-get install -y --allow-downgrades --allow-remove-essential --allow-change-held-packages \
+    --allow-unauthenticated ffmpeg=10:4.1.4-dmo1+deb10u1
 
 RUN mkdir -p /code/doc2audiobook
 
 COPY requirements.txt /code/doc2audiobook
 RUN pip install -r /code/doc2audiobook/requirements.txt
 
-# Explicityl upgrade chardet because some conflicts in google libraries relating to the requests package.
-RUN pip install --upgrade chardet
-
 COPY . /code/doc2audiobook
 
 ENV GOOGLE_APPLICATION_CREDENTIALS /.secrets/client_secret.json

diff --git a/doc2audiobook.py b/doc2audiobook.py
@@ -19,11 +19,12 @@ def parse_arguments():
     parser = argparse.ArgumentParser(description='Synthesise text from various documents into high fidelity speech.')
     parser.add_argument('-list-voices', help='List available voices.', action='store_true')
     parser.add_argument('--voice', type=str, help='Voice to use for synthesis. Use -list-voices to see options.')
+    parser.add_argument('--input', type=str, help='Specify singular file input, defaults to whole input directory.')
 
     return parser.parse_args()
 
 
-def process_input_files(input_directory_path: Path,
+def process_input_files(input_path: Path,
                         output_directory_path: Path,
                         client: texttospeech.TextToSpeechClient,
                         voice: texttospeech.types.VoiceSelectionParams,
@@ -32,31 +33,44 @@ def process_input_files(input_directory_path: Path,
     """
     Process every file inside `input_directory_path` and save results in `output_directory_path`.
 
-    :param input_directory_path: Full path to the input directory.
+    :param input_path: Full path to the input directory or a singular file (inside the mapped input directory).
     :param output_directory_path: Full path to the output directory.
     :param client: TextToSpeechClient instance.
     :param voice: VoiceSelectionParams instance.
     :param audio_config: AudioConfig instance.
     :return: None
     """
-    input_files = collect_input_files(input_directory_path=input_directory_path)
-
-    for input_file in input_files:
-        logger.info(f'Processing input file `{input_file}`')
-        output_file = output_directory_path / (input_file.stem + '.mp3')
-        logger.info(f'Target output file is: `{output_file}`')
-
-        text_to_translate = textract.process(str(input_file))
-
-        text_to_mp3(
-            client=client,
-            voice=voice,
-            audio_config=audio_config,
-            text=text_to_translate,
-            output_file_path=output_file
-        )
+    if input_path.is_dir():
+        input_files = collect_input_files(input_directory_path=input_path)
+
+        processed_files = 0
+
+        for input_file in input_files:
+            process_input_file(client=client, voice=voice, audio_config=audio_config, input_file=input_file,
+                               output_directory_path=output_directory_path)
+            processed_files += 1
+        logger.info(f'Files processed from input directory {input_path}: {processed_files}.')
+    else:
+        process_input_file(client=client, voice=voice, audio_config=audio_config, input_file=input_path,
+                           output_directory_path=output_directory_path)
+
+
+def process_input_file(client, voice, audio_config, input_file: Path, output_directory_path):
+    logger.info(f'Processing input file `{input_file}`')
+    output_file = output_directory_path / (input_file.stem + '.mp3')
+    logger.info(f'Target output file is: `{output_file}`')
+
+    text_to_translate = textract.process(str(input_file))
+
+    text_to_mp3(
+        client=client,
+        voice=voice,
+        audio_config=audio_config,
+        text=text_to_translate,
+        output_file_path=output_file
+    )
 
-        logger.info(f'Processing done for input file `{input_file}`')
+    logger.info(f'Processing done for input file `{input_file}`')
 
 
 def main():
@@ -79,8 +93,13 @@ def main():
     voice = texttospeech.types.VoiceSelectionParams(language_code=use_language, name=use_voice)
     audio_config = texttospeech.types.AudioConfig(audio_encoding=texttospeech.enums.AudioEncoding.MP3)
 
+    if args.input:
+        input_path = Path('/data/input') / Path(args.input)
+    else:
+        input_path = Path('/data/input')
+
     process_input_files(
-        input_directory_path=Path('/data/input'), output_directory_path=Path('/data/output'),
+        input_path=input_path, output_directory_path=Path('/data/output'),
         client=client, voice=voice, audio_config=audio_config
     )
 

diff --git a/lib/tts_utils.py b/lib/tts_utils.py
@@ -1,7 +1,7 @@
 import json
 import traceback
 from pathlib import Path
-from typing import List
+from typing import List, Generator
 
 from google.cloud import texttospeech
 
@@ -10,7 +10,7 @@
 logger = get_module_logger(__name__)
 
 
-def collect_input_files(input_directory_path: Path) -> List[Path]:
+def collect_input_files(input_directory_path: Path) -> Generator[Path, None, None]:
     """
     Grab every file inside the input directory.
 
@@ -41,7 +41,7 @@ def text_to_mp3(client: texttospeech.TextToSpeechClient,
 
     output_file_log = output_file_path.parent / (output_file_path.stem + '_log.json')
 
-    with open(output_file_path, 'wb') as output_file:
+    with output_file_path.open(mode='wb') as output_file:
         for (i, text_chunk) in enumerate(lines):
             # skip empty lines
             if len(text_chunk) > 0:

diff --git a/readme.MD b/readme.MD
@@ -38,7 +38,7 @@ Available source formats (from `textract`)
 ## Prerequisites
 
 GCP
-1. Select or create a Cloud Platform project.
+1. Select or create a Google Cloud Platform project.
 2. Enable billing for your project.
 3. Enable the Cloud Text-to-Speech API.
 4. Setup Authentication using a Service Account.
@@ -52,9 +52,9 @@ Host Machine
 ## Build
 
 ```
-$ git clone git@github.com:danthelion/doc2audiobook.git
-$ cd doc2audiobook
-$ docker build -t doc2audiobook .
+git clone git@github.com:danthelion/doc2audiobook.git
+cd doc2audiobook
+docker build -t doc2audiobook .
 ```
 
 ## Run
@@ -63,16 +63,23 @@ Make sure to put your documents in the folder that is mapped to `/data` before r
 
 List available voices
 ```
-$ docker run \
-$ -v /doc2audiobook/data:/data:rw \
-$ -v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
-$ doc2audiobook -list-voices
+docker run \
+-v /doc2audiobook/data:/data:rw \
+-v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
+doc2audiobook -list-voices
 ```
 
-Convert a document to an audiobook using the _en-GB-Standard-C_ voice.
+Convert all documents in the mapped input folder to audiobooks using the _en-GB-Standard-C_ voice.
+```
+docker run \
+-v /doc2audiobook/data:/data:rw \
+-v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
+doc2audiobook --voice en-GB-Standard-C
+```
+Convert a single document in the mapped input folder to an audiobook using the _en-GB-Standard-C_ voice.
+```
+docker run \
+-v /doc2audiobook/data:/data:rw \
+-v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
+doc2audiobook --voice en-GB-Standard-C --input test_input.txt
 ```
-$ docker run \
-$ -v /doc2audiobook/data:/data:rw \
-$ -v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
-$ doc2audiobook --voice en-GB-Standard-C
-```