Skip to content

Commit

Permalink
Merge pull request #5 from danthelion/infra_updates
Browse files Browse the repository at this point in the history
Python3.7 base & singular input paramter
  • Loading branch information
danthelion authored Aug 26, 2019
2 parents ce42023 + 803fc03 commit 700714b
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 47 deletions.
18 changes: 8 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
FROM python:3.6
FROM python:3.7

MAINTAINER Daniel Palma <danivgy@gmail.com>

RUN apt-get update && apt-get install -y \
build-essential python-dev libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr \
build-essential python-dev libxml2-dev libxslt1-dev antiword unrtf poppler-utils tesseract-ocr \
flac lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig libpulse-dev

RUN echo "deb http://www.deb-multimedia.org jessie main non-free" >> /etc/apt/sources.list \
&& echo "deb-src http://www.deb-multimedia.org jessie main non-free" >> /etc/apt/sources.list \
&& apt-get update \
&& apt-get install -y --force-yes deb-multimedia-keyring \
&& apt-get install -y --force-yes --no-install-recommends ffmpeg=10:2.6.9-dmo1
RUN echo "deb http://www.deb-multimedia.org buster main non-free" >> /etc/apt/sources.list \
&& apt-get update -oAcquire::AllowInsecureRepositories=true \
&& apt-get install -y --allow-downgrades --allow-remove-essential --allow-change-held-packages \
--allow-unauthenticated deb-multimedia-keyring \
&& apt-get install -y --allow-downgrades --allow-remove-essential --allow-change-held-packages \
--allow-unauthenticated ffmpeg=10:4.1.4-dmo1+deb10u1

RUN mkdir -p /code/doc2audiobook

COPY requirements.txt /code/doc2audiobook
RUN pip install -r /code/doc2audiobook/requirements.txt

# Explicityl upgrade chardet because some conflicts in google libraries relating to the requests package.
RUN pip install --upgrade chardet

COPY . /code/doc2audiobook

ENV GOOGLE_APPLICATION_CREDENTIALS /.secrets/client_secret.json
Expand Down
59 changes: 39 additions & 20 deletions doc2audiobook.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ def parse_arguments():
parser = argparse.ArgumentParser(description='Synthesise text from various documents into high fidelity speech.')
parser.add_argument('-list-voices', help='List available voices.', action='store_true')
parser.add_argument('--voice', type=str, help='Voice to use for synthesis. Use -list-voices to see options.')
parser.add_argument('--input', type=str, help='Specify singular file input, defaults to whole input directory.')

return parser.parse_args()


def process_input_files(input_directory_path: Path,
def process_input_files(input_path: Path,
output_directory_path: Path,
client: texttospeech.TextToSpeechClient,
voice: texttospeech.types.VoiceSelectionParams,
Expand All @@ -32,31 +33,44 @@ def process_input_files(input_directory_path: Path,
"""
Process every file inside `input_directory_path` and save results in `output_directory_path`.
:param input_directory_path: Full path to the input directory.
:param input_path: Full path to the input directory or a singular file (inside the mapped input directory).
:param output_directory_path: Full path to the output directory.
:param client: TextToSpeechClient instance.
:param voice: VoiceSelectionParams instance.
:param audio_config: AudioConfig instance.
:return: None
"""
input_files = collect_input_files(input_directory_path=input_directory_path)

for input_file in input_files:
logger.info(f'Processing input file `{input_file}`')
output_file = output_directory_path / (input_file.stem + '.mp3')
logger.info(f'Target output file is: `{output_file}`')

text_to_translate = textract.process(str(input_file))

text_to_mp3(
client=client,
voice=voice,
audio_config=audio_config,
text=text_to_translate,
output_file_path=output_file
)
if input_path.is_dir():
input_files = collect_input_files(input_directory_path=input_path)

processed_files = 0

for input_file in input_files:
process_input_file(client=client, voice=voice, audio_config=audio_config, input_file=input_file,
output_directory_path=output_directory_path)
processed_files += 1
logger.info(f'Files processed from input directory {input_path}: {processed_files}.')
else:
process_input_file(client=client, voice=voice, audio_config=audio_config, input_file=input_path,
output_directory_path=output_directory_path)


def process_input_file(client, voice, audio_config, input_file: Path, output_directory_path):
logger.info(f'Processing input file `{input_file}`')
output_file = output_directory_path / (input_file.stem + '.mp3')
logger.info(f'Target output file is: `{output_file}`')

text_to_translate = textract.process(str(input_file))

text_to_mp3(
client=client,
voice=voice,
audio_config=audio_config,
text=text_to_translate,
output_file_path=output_file
)

logger.info(f'Processing done for input file `{input_file}`')
logger.info(f'Processing done for input file `{input_file}`')


def main():
Expand All @@ -79,8 +93,13 @@ def main():
voice = texttospeech.types.VoiceSelectionParams(language_code=use_language, name=use_voice)
audio_config = texttospeech.types.AudioConfig(audio_encoding=texttospeech.enums.AudioEncoding.MP3)

if args.input:
input_path = Path('/data/input') / Path(args.input)
else:
input_path = Path('/data/input')

process_input_files(
input_directory_path=Path('/data/input'), output_directory_path=Path('/data/output'),
input_path=input_path, output_directory_path=Path('/data/output'),
client=client, voice=voice, audio_config=audio_config
)

Expand Down
6 changes: 3 additions & 3 deletions lib/tts_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import traceback
from pathlib import Path
from typing import List
from typing import List, Generator

from google.cloud import texttospeech

Expand All @@ -10,7 +10,7 @@
logger = get_module_logger(__name__)


def collect_input_files(input_directory_path: Path) -> List[Path]:
def collect_input_files(input_directory_path: Path) -> Generator[Path, None, None]:
"""
Grab every file inside the input directory.
Expand Down Expand Up @@ -41,7 +41,7 @@ def text_to_mp3(client: texttospeech.TextToSpeechClient,

output_file_log = output_file_path.parent / (output_file_path.stem + '_log.json')

with open(output_file_path, 'wb') as output_file:
with output_file_path.open(mode='wb') as output_file:
for (i, text_chunk) in enumerate(lines):
# skip empty lines
if len(text_chunk) > 0:
Expand Down
35 changes: 21 additions & 14 deletions readme.MD
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Available source formats (from `textract`)
## Prerequisites

GCP
1. Select or create a Cloud Platform project.
1. Select or create a Google Cloud Platform project.
2. Enable billing for your project.
3. Enable the Cloud Text-to-Speech API.
4. Setup Authentication using a Service Account.
Expand All @@ -52,9 +52,9 @@ Host Machine
## Build

```
$ git clone git@github.com:danthelion/doc2audiobook.git
$ cd doc2audiobook
$ docker build -t doc2audiobook .
git clone git@github.com:danthelion/doc2audiobook.git
cd doc2audiobook
docker build -t doc2audiobook .
```

## Run
Expand All @@ -63,16 +63,23 @@ Make sure to put your documents in the folder that is mapped to `/data` before r

List available voices
```
$ docker run \
$ -v /doc2audiobook/data:/data:rw \
$ -v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
$ doc2audiobook -list-voices
docker run \
-v /doc2audiobook/data:/data:rw \
-v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
doc2audiobook -list-voices
```

Convert a document to an audiobook using the _en-GB-Standard-C_ voice.
Convert all documents in the mapped input folder to audiobooks using the _en-GB-Standard-C_ voice.
```
docker run \
-v /doc2audiobook/data:/data:rw \
-v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
doc2audiobook --voice en-GB-Standard-C
```
Convert a single document in the mapped input folder to an audiobook using the _en-GB-Standard-C_ voice.
```
docker run \
-v /doc2audiobook/data:/data:rw \
-v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
doc2audiobook --voice en-GB-Standard-C --input test_input.txt
```
$ docker run \
$ -v /doc2audiobook/data:/data:rw \
$ -v /doc2audiobook/.secrets/client_secret.json:/.secrets/client_secret.json:ro \
$ doc2audiobook --voice en-GB-Standard-C
```

0 comments on commit 700714b

Please sign in to comment.