Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthesizerの構造改革をする #685

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/voicevox_core/src/__internal.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod doctest_fixtures;
pub mod interp;

// VOICEVOX CORE内のラッパー向けの実装
// FIXME: 要議論: https://github.com/VOICEVOX/voicevox_core/issues/595
Expand Down
2 changes: 1 addition & 1 deletion crates/voicevox_core/src/__internal/doctest_fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{AccelerationMode, InitializeOptions, OpenJtalk, Synthesizer, VoiceMo

pub async fn synthesizer_with_sample_voice_model(
open_jtalk_dic_dir: impl AsRef<Path>,
) -> anyhow::Result<Synthesizer> {
) -> anyhow::Result<Synthesizer<Arc<OpenJtalk>>> {
let syntesizer = Synthesizer::new(
Arc::new(OpenJtalk::new(open_jtalk_dic_dir).unwrap()),
&InitializeOptions {
Expand Down
46 changes: 46 additions & 0 deletions crates/voicevox_core/src/__internal/interp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use easy_ext::ext;
use ndarray::{Array1, ArrayView1, ArrayView2};

use crate::{StyleId, Synthesizer};

#[ext(PerformInference)]
impl Synthesizer<()> {
pub fn predict_duration(
&self,
phoneme_list: Array1<i64>,
style_id: StyleId,
) -> crate::Result<Vec<f32>> {
self.predict_duration(phoneme_list, style_id)
}

#[allow(clippy::too_many_arguments)]
pub fn predict_intonation(
&self,
vowel_phoneme_list: Array1<i64>,
consonant_phoneme_list: Array1<i64>,
start_accent_list: Array1<i64>,
end_accent_list: Array1<i64>,
start_accent_phrase_list: Array1<i64>,
end_accent_phrase_list: Array1<i64>,
style_id: StyleId,
) -> crate::Result<Vec<f32>> {
self.predict_intonation(
vowel_phoneme_list,
consonant_phoneme_list,
start_accent_list,
end_accent_list,
start_accent_phrase_list,
end_accent_phrase_list,
style_id,
)
}

pub fn decode(
&self,
f0: ArrayView1<'_, f32>,
phoneme: ArrayView2<'_, f32>,
style_id: StyleId,
) -> crate::Result<Vec<f32>> {
self.decode(f0, phoneme, style_id)
}
}
10 changes: 4 additions & 6 deletions crates/voicevox_core/src/engine/acoustic_feature_extractor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use std::collections::HashMap;

#[rustfmt::skip]
const PHONEME_LIST: &[&str] = &[
const PHONEME_LIST: [&str; 45] = [
"pau",
"A",
"E",
Expand Down Expand Up @@ -70,9 +70,7 @@ pub struct OjtPhoneme {
}

impl OjtPhoneme {
pub fn num_phoneme() -> usize {
PHONEME_MAP.len()
}
pub(crate) const NUM_PHONEME: usize = PHONEME_LIST.len();

pub fn space_phoneme() -> String {
"pau".into()
Expand Down Expand Up @@ -134,8 +132,8 @@ mod tests {
}

#[rstest]
fn test_num_phoneme_works() {
assert_eq!(OjtPhoneme::num_phoneme(), 45);
fn test_phoneme_map_has_enough_elements() {
assert_eq!(OjtPhoneme::NUM_PHONEME, PHONEME_MAP.len());
}

#[rstest]
Expand Down
3 changes: 1 addition & 2 deletions crates/voicevox_core/src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@ mod kana_parser;
mod model;
mod mora_list;
mod open_jtalk;
mod synthesis_engine;

use super::*;

pub use self::acoustic_feature_extractor::*;
pub use self::full_context_label::*;
pub use self::kana_parser::*;
pub use self::model::*;
pub(crate) use self::mora_list::mora2text;
pub use self::open_jtalk::OpenJtalk;
pub use self::synthesis_engine::*;
75 changes: 25 additions & 50 deletions crates/voicevox_core/src/engine/open_jtalk.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use std::io::Write;
use std::{
path::{Path, PathBuf},
sync::Mutex,
};
use std::{path::Path, sync::Mutex};

use anyhow::anyhow;
use tempfile::NamedTempFile;
Expand All @@ -22,7 +19,7 @@ pub(crate) struct OpenjtalkFunctionError {
/// テキスト解析器としてのOpen JTalk。
pub struct OpenJtalk {
resources: Mutex<Resources>,
dict_dir: Option<PathBuf>,
dict_dir: String,
}

struct Resources {
Expand All @@ -35,37 +32,34 @@ struct Resources {
unsafe impl Send for Resources {}

impl OpenJtalk {
// FIXME: この関数は廃止し、`Synthesizer`は`Option<OpenJtalk>`という形でこの構造体を持つ
pub fn new_without_dic() -> Self {
Self {
resources: Mutex::new(Resources {
mecab: ManagedResource::initialize(),
njd: ManagedResource::initialize(),
jpcommon: ManagedResource::initialize(),
}),
dict_dir: None,
}
}
pub fn new(open_jtalk_dict_dir: impl AsRef<Path>) -> crate::result::Result<Self> {
let mut s = Self::new_without_dic();
s.load(open_jtalk_dict_dir).map_err(|()| {
// FIXME: 「システム辞書を読もうとしたけど読めなかった」というエラーをちゃんと用意する
ErrorRepr::NotLoadedOpenjtalkDict
})?;
Ok(s)
let mut resources = Resources {
mecab: ManagedResource::initialize(),
njd: ManagedResource::initialize(),
jpcommon: ManagedResource::initialize(),
};
let dict_dir = open_jtalk_dict_dir
.as_ref()
.to_str()
.unwrap_or_else(|| todo!("Rust APIでは`Utf8Path`で受けるようにする"))
.to_owned();

let result = resources.mecab.load(&dict_dir);
if !result {
return Err(ErrorRepr::LoadOpenjtalkSystemDic(dict_dir).into());
}

Ok(Self {
resources: resources.into(),
dict_dir,
})
}

// 先に`load`を呼ぶ必要がある。
/// ユーザー辞書を設定する。
///
/// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。
pub fn use_user_dict(&self, user_dict: &UserDict) -> crate::result::Result<()> {
let dict_dir = self
.dict_dir
.as_ref()
.and_then(|dict_dir| dict_dir.to_str())
.ok_or(ErrorRepr::NotLoadedOpenjtalkDict)?;

// ユーザー辞書用のcsvを作成
let mut temp_csv = NamedTempFile::new().map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
temp_csv
Expand All @@ -80,7 +74,7 @@ impl OpenJtalk {
mecab_dict_index(&[
"mecab-dict-index",
"-d",
dict_dir,
&self.dict_dir,
"-u",
temp_dict_path.to_str().unwrap(),
"-f",
Expand All @@ -93,7 +87,8 @@ impl OpenJtalk {

let Resources { mecab, .. } = &mut *self.resources.lock().unwrap();

let result = mecab.load_with_userdic(Path::new(dict_dir), Some(Path::new(&temp_dict_path)));
let result =
mecab.load_with_userdic(self.dict_dir.as_ref(), Some(Path::new(&temp_dict_path)));

if !result {
return Err(ErrorRepr::UseUserDict(anyhow!("辞書のコンパイルに失敗しました")).into());
Expand Down Expand Up @@ -150,26 +145,6 @@ impl OpenJtalk {
})
}
}

fn load(&mut self, open_jtalk_dict_dir: impl AsRef<Path>) -> std::result::Result<(), ()> {
let result = self
.resources
.lock()
.unwrap()
.mecab
.load(open_jtalk_dict_dir.as_ref());
if result {
self.dict_dir = Some(open_jtalk_dict_dir.as_ref().into());
Ok(())
} else {
self.dict_dir = None;
Err(())
}
}

pub fn dict_loaded(&self) -> bool {
self.dict_dir.is_some()
}
}

#[cfg(test)]
Expand Down
Loading
Loading