-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ini
73 lines (50 loc) · 3.71 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
[SETTINGS]
# Set to True if you don't want to translate the subtitles. If so, ignore the language variables
skip_translation = False
# Set to True if you don't want to synthesize the audio. For example, if you already did that and are testing
skip_synthesize = False
# Set to True if you want to stop the program after translating the subtitles.
# For example, if you want to manually review the resulting subtitles before synthesizing the audio.
# Note that to resume the process, you must set this to False again and set skip_translation to True
stop_after_translation = False
# The BCP-47 language code for the original text language
original_language = en-US
# Applies to DeepL translations only - Whether to have it use more or less formal language
# Possible Values: default | more | less
formality_preference = default
# The format/codec of the final audio file
# Possible Values: mp3 | aac | wav
output_format = aac
# Must be a codec from 'Supported Audio Encodings' section here: https://cloud.google.com/speech-to-text/docs/encoding#audio-encodings
# This determines the codec returned by the API, not the one produced by the program! You probably shouldn't change this, it might not work otherwise
synth_audio_encoding = MP3
# Enter the native sample rate for the voice audio provided by the TTS service
# This is usually 24KHz (24000), but some services like Azure offer higher quality audio at 48KHz (48000)
# Enter only number digits, no commas or anything
synth_sample_rate = 48000
# This will drastically improve the quality of the final result, BUT see note below
# Note! Setting this to true will make it so instead of just stretching the audio clips, it will have the API generate new audio clips with adjusted speaking rates
# This can't be done on the first pass because we don't know how long the audio clips will be until we generate them
two_pass_voice_synth = True
# On the second pass, each audio clip will be extremely close to the desired length, but a bit off
# Set this to True if you want to stretch the second-pass clip anyway to be exact, down to the millisecond
# However, this will degrade the voice and make it sound similar to if it was just 1-Pass
force_stretch_with_twopass = False
# Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a period between sentences
# Set it to "default" to keep it default which is quite slow. I find 80ms is pretty good
# Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
# Possible values: default | Any integer
azure_sentence_pause = default
# Adds a silence buffer between each spoken clip, but keeps the speech "centered" at the right spot so it's still synced
# > To be clear the total length of the audio file will remain the same, each spoken clip gets shrunk within it
# Useful if your subtitles file butts all the beginning and end timings right up against each other
# Note, this applies both before and after, so the total extra between clips will be 2x this
# Warning, setting this too high could result in the TTS speaking extremely fast to fit into remaining clip duration
# > Around 25 - 50 milliseconds is a good starting point
add_line_buffer_milliseconds = 25
# If the combination of two adjacent subtitle lines is below this amount, and one starts at the same time the other ends, it will combine the lines
# This should improve the speech synthesis by reducing unnatural splits in spoken sentences.
# Setting this to zero or a low number will effectively disable it
combine_subtitles_max_chars = 5
# Mostly prevents the program from deleting files in the working directory, and also generates files for each audio step
debug_mode = False