diff --git a/3rdparty/voicevox/.gitignore b/3rdparty/voicevox/.gitignore
new file mode 100644
index 000000000..8cb8e60a3
--- /dev/null
+++ b/3rdparty/voicevox/.gitignore
@@ -0,0 +1,6 @@
+build
+dict
+lib
+node_scripts/voicevox_engine
+requirements.txt
+!.gitignore
diff --git a/3rdparty/voicevox/CMakeLists.txt b/3rdparty/voicevox/CMakeLists.txt
new file mode 100644
index 000000000..631126026
--- /dev/null
+++ b/3rdparty/voicevox/CMakeLists.txt
@@ -0,0 +1,73 @@
+cmake_minimum_required(VERSION 2.8.3)
+project(voicevox)
+
+find_package(catkin REQUIRED
+ COMPONENTS
+ catkin_virtualenv
+)
+
+catkin_python_setup()
+
+set(INSTALL_DIR ${PROJECT_SOURCE_DIR})
+
+catkin_package()
+
+catkin_generate_virtualenv(
+ INPUT_REQUIREMENTS requirements.in
+ PYTHON_INTERPRETER python3
+ USE_SYSTEM_PACKAGES FALSE
+)
+
+add_custom_command(
+ OUTPUT voicevox_model_installed
+ COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.model
+ MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+ INSTALL_DIR=${INSTALL_DIR}
+)
+
+
+add_custom_command(
+ OUTPUT voicevox_core_installed
+ COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.core
+ MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+ INSTALL_DIR=${INSTALL_DIR}
+)
+
+add_custom_command(
+ OUTPUT voicevox_engine_installed
+ COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.engine
+ MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+ INSTALL_DIR=${INSTALL_DIR}
+)
+
+add_custom_command(
+ OUTPUT open_jtalk_dic_installed
+ COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.open_jtalk_dic
+ MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+ INSTALL_DIR=${INSTALL_DIR}
+)
+
+add_custom_target(all_installed ALL DEPENDS
+ voicevox_model_installed
+ voicevox_core_installed
+ voicevox_engine_installed
+ open_jtalk_dic_installed)
+
+file(GLOB NODE_SCRIPTS_FILES node_scripts/*.py)
+catkin_install_python(
+ PROGRAMS ${NODE_SCRIPTS_FILES}
+ DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}/node_scripts/
+)
+install(DIRECTORY node_scripts/voicevox_engine
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/catkin_virtualenv_scripts/
+ USE_SOURCE_PERMISSIONS)
+install(DIRECTORY launch dict
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+ USE_SOURCE_PERMISSIONS)
+install(PROGRAMS bin/text2wave
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin)
+
+install(DIRECTORY
+ ${INSTALL_DIR}/lib
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+ USE_SOURCE_PERMISSIONS)
diff --git a/3rdparty/voicevox/Makefile b/3rdparty/voicevox/Makefile
new file mode 100644
index 000000000..a2c90f3bb
--- /dev/null
+++ b/3rdparty/voicevox/Makefile
@@ -0,0 +1,11 @@
+all:
+ make -f Makefile.core
+ make -f Makefile.model
+ make -f Makefile.engine
+ make -f Makefile.open_jtalk_dic
+clean:
+ make -f Makefile.core clean
+ make -f Makefile.model clean
+ make -f Makefile.engine clean
+ make -f Makefile.open_jtalk_dic clean
+ rm -rf build
diff --git a/3rdparty/voicevox/Makefile.core b/3rdparty/voicevox/Makefile.core
new file mode 100644
index 000000000..bac21eb0f
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.core
@@ -0,0 +1,28 @@
+# -*- makefile -*-
+
+all: installed.viocevox_core
+
+VERSION = 0.11.4
+FILENAME = core.zip
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/VOICEVOX/voicevox_core/releases/download/$(VERSION)/core.zip"
+SOURCE_DIR = build/core
+UNPACK_CMD = unzip
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum
+SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR = './'
+
+
+installed.viocevox_core: $(SOURCE_DIR)/unpacked
+ mkdir -p $(INSTALL_DIR)/lib
+ cp build/core/lib*.so $(INSTALL_DIR)/lib/
+ cp build/core/*.bin $(INSTALL_DIR)/lib/
+ cp build/core/metas.json $(INSTALL_DIR)/lib/metas.json
+
+clean:
+ rm -rf $(TARBALL)
+ rm -rf $(SOURCE_DIR)
+ rm -rf $(INSTALL_DIR)/lib
+ rm -rf build
diff --git a/3rdparty/voicevox/Makefile.engine b/3rdparty/voicevox/Makefile.engine
new file mode 100644
index 000000000..b3d6899fa
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.engine
@@ -0,0 +1,24 @@
+# -*- makefile -*-
+
+all: installed.voicevox_engine
+
+VERSION = 0.11.4
+FILENAME = $(VERSION).tar.gz
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/VOICEVOX/voicevox_engine/archive/refs/tags/$(FILENAME)"
+SOURCE_DIR = build/voicevox_engine-$(VERSION)
+UNPACK_CMD = tar xvzf
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/voicevox_engine.tar.gz.md5sum
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR = './'
+
+
+installed.voicevox_engine: $(SOURCE_DIR)/unpacked
+ cp -r build/voicevox_engine-$(VERSION) $(INSTALL_DIR)/node_scripts/voicevox_engine
+
+clean:
+ rm -rf $(TARBALL)
+ rm -rf $(SOURCE_DIR)
+ rm -rf $(INSTALL_DIR)/node_scripts/voicevox_engine
+ rm -rf build
diff --git a/3rdparty/voicevox/Makefile.model b/3rdparty/voicevox/Makefile.model
new file mode 100644
index 000000000..004028105
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.model
@@ -0,0 +1,26 @@
+# -*- makefile -*-
+
+all: installed.voicevox_model
+
+VERSION = 1.10.0
+FILENAME = onnxruntime-linux-x64-$(VERSION).tgz
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/microsoft/onnxruntime/releases/download/v$(VERSION)/$(FILENAME)"
+SOURCE_DIR = build/onnxruntime-linux-x64-$(VERSION)
+UNPACK_CMD = tar xvzf
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum
+SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR = './'
+
+
+installed.voicevox_model: $(SOURCE_DIR)/unpacked
+ mkdir -p $(INSTALL_DIR)/lib
+ cp build/onnxruntime-linux-x64-$(VERSION)/lib/* $(INSTALL_DIR)/lib
+
+clean:
+ rm -rf $(TARBALL)
+ rm -rf $(SOURCE_DIR)
+ rm -rf $(INSTALL_DIR)/lib
+ rm -rf build
diff --git a/3rdparty/voicevox/Makefile.open_jtalk_dic b/3rdparty/voicevox/Makefile.open_jtalk_dic
new file mode 100644
index 000000000..646921159
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.open_jtalk_dic
@@ -0,0 +1,25 @@
+# -*- makefile -*-
+
+all: installed.open_jtalk_dic
+
+VERSION = 1.11.1
+FILENAME = open_jtalk_dic_utf_8-1.11.tar.gz
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/r9y9/open_jtalk/releases/download/v$(VERSION)/$(FILENAME)"
+SOURCE_DIR = build/open_jtalk_dic_utf_8-1.11
+UNPACK_CMD = tar xvzf
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/open_jtalk_dic.tar.gz.md5sum
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR = './'
+
+
+installed.open_jtalk_dic: $(SOURCE_DIR)/unpacked
+ mkdir -p $(INSTALL_DIR)/dict
+ cp -r build/open_jtalk_dic_utf_8-1.11 $(INSTALL_DIR)/dict
+
+clean:
+ rm -rf $(TARBALL)
+ rm -rf $(SOURCE_DIR)
+ rm -rf $(INSTALL_DIR)/dict/open_jtalk_dic_utf_8-1.11
+ rm -rf build
diff --git a/3rdparty/voicevox/README.md b/3rdparty/voicevox/README.md
new file mode 100644
index 000000000..d5602db71
--- /dev/null
+++ b/3rdparty/voicevox/README.md
@@ -0,0 +1,103 @@
+# voicevox
+
+ROS Interface for [VOICEVOX](https://voicevox.hiroshiba.jp/) (AI speech synthesis)
+
+## TERM
+
+[VOICEVOX](https://voicevox.hiroshiba.jp/) is basically free to use, but please check the terms of use below.
+
+[TERM](https://voicevox.hiroshiba.jp/term)
+
+Each voice synthesis character has its own rules. Please use this package according to those terms.
+
+| Character name | term link |
+| ---- | ---- |
+| 四国めたん | https://zunko.jp/con_ongen_kiyaku.html |
+| ずんだもん | https://zunko.jp/con_ongen_kiyaku.html |
+| 春日部つむぎ | https://tsukushinyoki10.wixsite.com/ktsumugiofficial/利用規約 |
+| 波音リツ | http://canon-voice.com/kiyaku.html |
+| 雨晴はう | https://amehau.com/?page_id=225 |
+| 玄野武宏 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
+| 白上虎太郎 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
+| 青山龍星 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
+| 冥鳴ひまり | https://kotoran8zunzun.wixsite.com/my-site/利用規約 |
+| 九州そら | https://zunko.jp/con_ongen_kiyaku.html |
+
+## Installation
+
+Build this package.
+
+```bash
+cd /path/to/catkin_workspace
+catkin build voicevox
+```
+
+## Usage
+
+### Launch sound_play with VOICEVOX Text-to-Speech
+
+```bash
+roslaunch voicevox voicevox_texttospeech.launch
+```
+
+
+### Say something
+
+#### For python users
+
+```python
+import rospy
+from sound_play.libsoundplay import SoundClient
+
+rospy.init_node('say_node')
+
+client = SoundClient(sound_action='robotsound_jp', sound_topic='robotsound_jp')
+
+client.say('こんにちは', voice='四国めたん-あまあま')
+```
+
+You can change the voice by changing the voice_name.
+You can also specify the speaker id.
+Look at the following tables for further details.
+
+| speaker_id | voice_name |
+| ---- | ---- |
+| 0 | 四国めたん-あまあま |
+| 1 | ずんだもん-あまあま |
+| 2 | 四国めたん-ノーマル |
+| 3 | ずんだもん-ノーマル |
+| 4 | 四国めたん-セクシー |
+| 5 | ずんだもん-セクシー |
+| 6 | 四国めたん-ツンツン |
+| 7 | ずんだもん-ツンツン |
+| 8 | 春日部つむぎ-ノーマル |
+| 9 | 波音リツ-ノーマル |
+| 10 | 雨晴はう-ノーマル |
+| 11 | 玄野武宏-ノーマル |
+| 12 | 白上虎太郎-ノーマル |
+| 13 | 青山龍星-ノーマル |
+| 14 | 冥鳴ひまり-ノーマル |
+| 15 | 九州そら-あまあま |
+| 16 | 九州そら-ノーマル |
+| 17 | 九州そら-セクシー |
+| 18 | 九州そら-ツンツン |
+| 19 | 九州そら-ささやき |
+
+#### For roseus users
+
+```
+$ roseus
+(load "package://pr2eus/speak.l")
+
+(ros::roseus "say_node")
+
+(speak "JSKへようこそ。" :lang "波音リツ" :wait t :topic-name "robotsound_jp")
+```
+
+### Tips
+
+Normally, the server for speech synthesis starts up at `http://localhost:50021`.
+You can change the url and port by setting values for `VOICEVOX_TEXTTOSPEECH_URL` and `VOICEVOX_TEXTTOSPEECH_PORT`.
+
+You can also set the default character by setting `VOICEVOX_DEFAULT_SPEAKER_ID`.
+Please refer to [here](#saysomething) for the speaker id.
diff --git a/3rdparty/voicevox/bin/text2wave b/3rdparty/voicevox/bin/text2wave
new file mode 100755
index 000000000..ca9630f39
--- /dev/null
+++ b/3rdparty/voicevox/bin/text2wave
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import argparse
+import os
+import shutil
+import sys
+
+import requests
+
+from voicevox.filecheck_utils import checksum_md5
+from voicevox.filecheck_utils import get_cache_dir
+
+
+speaker_id_to_name = {
+ '0': '四国めたん-あまあま',
+ '1': 'ずんだもん-あまあま',
+ '2': '四国めたん-ノーマル',
+ '3': 'ずんだもん-ノーマル',
+ '4': '四国めたん-セクシー',
+ '5': 'ずんだもん-セクシー',
+ '6': '四国めたん-ツンツン',
+ '7': 'ずんだもん-ツンツン',
+ '8': '春日部つむぎ-ノーマル',
+ '9': '波音リツ-ノーマル',
+ '10': '雨晴はう-ノーマル',
+ '11': '玄野武宏-ノーマル',
+ '12': '白上虎太郎-ノーマル',
+ '13': '青山龍星-ノーマル',
+ '14': '冥鳴ひまり-ノーマル',
+ '15': '九州そら-あまあま',
+ '16': '九州そら-ノーマル',
+ '17': '九州そら-セクシー',
+ '18': '九州そら-ツンツン',
+ '19': '九州そら-ささやき',
+}
+
+name_to_speaker_id = {
+ b: a for a, b in speaker_id_to_name.items()
+}
+
+
+DEFAULT_SPEAKER_ID = os.environ.get(
+ 'VOICEVOX_DEFAULT_SPEAKER_ID', '2')
+if not DEFAULT_SPEAKER_ID.isdigit():
+ DEFAULT_SPEAKER_ID = name_to_speaker_id[DEFAULT_SPEAKER_ID]
+VOICEVOX_TEXTTOSPEECH_URL = os.environ.get(
+ 'VOICEVOX_TEXTTOSPEECH_URL', 'localhost')
+VOICEVOX_TEXTTOSPEECH_PORT = os.environ.get(
+ 'VOICEVOX_TEXTTOSPEECH_PORT', 50021)
+cache_enabled = os.environ.get(
+ 'ROS_VOICEVOX_TEXTTOSPEECH_CACHE_ENABLED', True)
+cache_enabled = cache_enabled is True \
+ or cache_enabled == 'true' # for launch env tag.
+
+
+def determine_voice_name(voice_name):
+ if len(voice_name) == 0:
+ speaker_id = DEFAULT_SPEAKER_ID
+ else:
+ if voice_name.isdigit():
+ if voice_name in speaker_id_to_name:
+ speaker_id = voice_name
+ else:
+ print(
+ '[Text2Wave] Invalid speaker_id ({}). Use default voice.'
+ .format(speaker_id_to_name[DEFAULT_SPEAKER_ID]))
+ speaker_id = DEFAULT_SPEAKER_ID
+ else:
+ candidates = list(filter(
+ lambda name: name.startswith(voice_name),
+ name_to_speaker_id))
+ if candidates:
+ speaker_id = name_to_speaker_id[candidates[0]]
+ else:
+ print('[Text2Wave] Invalid voice_name ({}). Use default voice.'
+ .format(speaker_id_to_name[DEFAULT_SPEAKER_ID]))
+ speaker_id = DEFAULT_SPEAKER_ID
+ print('[Text2Wave] Speak using voice_name ({})..'.format(
+ speaker_id_to_name[speaker_id]))
+ return speaker_id
+
+
+def convert_to_str(x):
+ if isinstance(x, str):
+ pass
+ elif isinstance(x, bytes):
+ x = x.decode('utf-8')
+ else:
+ raise ValueError(
+ 'Invalid input x type: {}'
+ .format(type(x)))
+ return x
+
+
+def request_synthesis(
+ sentence, output_path, speaker_id='1'):
+ headers = {'accept': 'application/json'}
+
+ sentence = convert_to_str(sentence)
+ speaker_id = convert_to_str(speaker_id)
+ params = {
+ 'speaker': speaker_id,
+ 'text': sentence,
+ }
+ base_url = 'http://{}:{}'.format(
+ VOICEVOX_TEXTTOSPEECH_URL,
+ VOICEVOX_TEXTTOSPEECH_PORT)
+ url = '{}/audio_query'.format(base_url)
+ response = requests.post(url, headers=headers,
+ params=params)
+ data = response.json()
+ url = '{}/synthesis'.format(base_url)
+ response = requests.post(url, headers=headers,
+ params=params,
+ json=data)
+ with open(output_path, 'wb') as f:
+ f.write(response.content)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='')
+ parser.add_argument('-eval', '--evaluate')
+ parser.add_argument('-o', '--output')
+ parser.add_argument('text')
+ args = parser.parse_args()
+
+ with open(args.text, 'rb') as f:
+ speech_text = f.readline()
+
+ speaker_id = determine_voice_name(
+ args.evaluate.lstrip('(').rstrip(')'))
+
+ if cache_enabled:
+ cache_dir = get_cache_dir()
+ md5 = checksum_md5(args.text)
+ cache_filename = os.path.join(
+ cache_dir,
+ '--'.join([md5, speaker_id])
+ + '.wav')
+ if os.path.exists(cache_filename):
+ print('[Text2Wave] Using cached sound file ({}) for {}'
+ .format(cache_filename, speech_text.decode('utf-8')))
+ shutil.copy(cache_filename, args.output)
+ sys.exit(0)
+
+ request_synthesis(speech_text,
+ args.output,
+ speaker_id)
+ if cache_enabled:
+ text_cache_filename = os.path.splitext(cache_filename)[0] + '.txt'
+ shutil.copy(args.text, text_cache_filename)
+ shutil.copy(args.output, cache_filename)
diff --git a/3rdparty/voicevox/launch/voicevox_texttospeech.launch b/3rdparty/voicevox/launch/voicevox_texttospeech.launch
new file mode 100644
index 000000000..d42d1961f
--- /dev/null
+++ b/3rdparty/voicevox/launch/voicevox_texttospeech.launch
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/3rdparty/voicevox/md5sum/core.zip.md5sum b/3rdparty/voicevox/md5sum/core.zip.md5sum
new file mode 100644
index 000000000..f5b5ac439
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/core.zip.md5sum
@@ -0,0 +1 @@
+96149a074d8ee093039321a88e00076d core.zip
diff --git a/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum b/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum
new file mode 100644
index 000000000..817b68d89
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum
@@ -0,0 +1 @@
+9ca61e2009a16cf8a1e9ab9ad0655009 onnxruntime-linux-x64-1.10.0.tgz
diff --git a/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum b/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum
new file mode 100644
index 000000000..8ce4bb07b
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum
@@ -0,0 +1 @@
+ba02dac4143492c3790f949be224dfdf open_jtalk_dic_utf_8-1.11.tar.gz
diff --git a/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum b/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum
new file mode 100644
index 000000000..5947e3633
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum
@@ -0,0 +1 @@
+997bf9e915f7d6288c923ab1ff5f4ff6 0.11.4.tar.gz
diff --git a/3rdparty/voicevox/node_scripts/server.py b/3rdparty/voicevox/node_scripts/server.py
new file mode 100644
index 000000000..add596aff
--- /dev/null
+++ b/3rdparty/voicevox/node_scripts/server.py
@@ -0,0 +1,573 @@
+#!/usr/bin/env python3
+
+# This code was created based on the following link's code.
+# https://github.com/VOICEVOX/voicevox_engine/blob/0.11.4/run.py
+
+import base64
+from distutils.version import LooseVersion
+from functools import lru_cache
+import imp
+import json
+import multiprocessing
+import os
+import os.path as osp
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from tempfile import TemporaryFile
+from typing import Dict
+from typing import List
+from typing import Optional
+import zipfile
+
+from fastapi import FastAPI
+from fastapi import HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.params import Query
+from fastapi import Response
+import rospkg
+import rospy
+import soundfile
+from starlette.responses import FileResponse
+import uvicorn
+
+
+PKG_NAME = 'voicevox'
+abs_path = osp.dirname(osp.abspath(__file__))
+voicevox_engine = imp.load_package(
+ 'voicevox_engine', osp.join(abs_path, 'voicevox_engine/voicevox_engine'))
+rospack = rospkg.RosPack()
+voicevox_dir = rospack.get_path(PKG_NAME)
+voicevox_lib_dir = osp.join(voicevox_dir, 'lib')
+# set pyopenjtalk's dic.tar.gz file
+os.environ['OPEN_JTALK_DICT_DIR'] = osp.join(
+ voicevox_dir, 'dict', 'open_jtalk_dic_utf_8-1.11')
+
+
+from voicevox_engine import __version__
+from voicevox_engine.kana_parser import create_kana
+from voicevox_engine.kana_parser import parse_kana
+from voicevox_engine.model import AccentPhrase
+from voicevox_engine.model import AudioQuery
+from voicevox_engine.model import ParseKanaBadRequest
+from voicevox_engine.model import ParseKanaError
+from voicevox_engine.model import Speaker
+from voicevox_engine.model import SpeakerInfo
+from voicevox_engine.model import SupportedDevicesInfo
+from voicevox_engine.morphing import \
+ synthesis_morphing_parameter as _synthesis_morphing_parameter
+from voicevox_engine.morphing import synthesis_morphing
+from voicevox_engine.preset import Preset
+from voicevox_engine.preset import PresetLoader
+from voicevox_engine.synthesis_engine import make_synthesis_engines
+from voicevox_engine.synthesis_engine import SynthesisEngineBase
+from voicevox_engine.user_dict import user_dict_startup_processing
+from voicevox_engine.utility import connect_base64_waves
+from voicevox_engine.utility import ConnectBase64WavesException
+from voicevox_engine.utility import engine_root
+
+
+def b64encode_str(s):
+ return base64.b64encode(s).decode("utf-8")
+
+
+def generate_app(
+ synthesis_engines: Dict[str, SynthesisEngineBase], latest_core_version: str
+) -> FastAPI:
+ root_dir = engine_root()
+
+ default_sampling_rate = synthesis_engines[latest_core_version].default_sampling_rate
+
+ app = FastAPI(
+ title="VOICEVOX ENGINE",
+ description="VOICEVOXの音声合成エンジンです。",
+ version=__version__,
+ )
+
+ app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+ )
+
+ preset_loader = PresetLoader(
+ preset_path=root_dir / "presets.yaml",
+ )
+
+ # キャッシュを有効化
+ # モジュール側でlru_cacheを指定するとキャッシュを制御しにくいため、HTTPサーバ側で指定する
+ # TODO: キャッシュを管理するモジュール側API・HTTP側APIを用意する
+ synthesis_morphing_parameter = lru_cache(maxsize=4)(_synthesis_morphing_parameter)
+
+ # @app.on_event("startup")
+ # async def start_catch_disconnection():
+ # if args.enable_cancellable_synthesis:
+ # loop = asyncio.get_event_loop()
+ # _ = loop.create_task(cancellable_engine.catch_disconnection())
+
+ @app.on_event("startup")
+ def apply_user_dict():
+ user_dict_startup_processing()
+
+ def get_engine(core_version: Optional[str]) -> SynthesisEngineBase:
+ if core_version is None:
+ return synthesis_engines[latest_core_version]
+ if core_version in synthesis_engines:
+ return synthesis_engines[core_version]
+ raise HTTPException(status_code=422, detail="不明なバージョンです")
+
+ @app.post(
+ "/audio_query",
+ response_model=AudioQuery,
+ tags=["クエリ作成"],
+ summary="音声合成用のクエリを作成する",
+ )
+ def audio_query(text: str, speaker: int, core_version: Optional[str] = None):
+ """
+ クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
+ """
+ engine = get_engine(core_version)
+ accent_phrases = engine.create_accent_phrases(text, speaker_id=speaker)
+ return AudioQuery(
+ accent_phrases=accent_phrases,
+ speedScale=1,
+ pitchScale=0,
+ intonationScale=1,
+ volumeScale=1,
+ prePhonemeLength=0.1,
+ postPhonemeLength=0.1,
+ outputSamplingRate=default_sampling_rate,
+ outputStereo=False,
+ kana=create_kana(accent_phrases),
+ )
+
+ @app.post(
+ "/audio_query_from_preset",
+ response_model=AudioQuery,
+ tags=["クエリ作成"],
+ summary="音声合成用のクエリをプリセットを用いて作成する",
+ )
+ def audio_query_from_preset(
+ text: str, preset_id: int, core_version: Optional[str] = None
+ ):
+ """
+ クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
+ """
+ engine = get_engine(core_version)
+ presets, err_detail = preset_loader.load_presets()
+ if err_detail:
+ raise HTTPException(status_code=422, detail=err_detail)
+ for preset in presets:
+ if preset.id == preset_id:
+ selected_preset = preset
+ break
+ else:
+ raise HTTPException(status_code=422, detail="該当するプリセットIDが見つかりません")
+
+ accent_phrases = engine.create_accent_phrases(
+ text, speaker_id=selected_preset.style_id
+ )
+ return AudioQuery(
+ accent_phrases=accent_phrases,
+ speedScale=selected_preset.speedScale,
+ pitchScale=selected_preset.pitchScale,
+ intonationScale=selected_preset.intonationScale,
+ volumeScale=selected_preset.volumeScale,
+ prePhonemeLength=selected_preset.prePhonemeLength,
+ postPhonemeLength=selected_preset.postPhonemeLength,
+ outputSamplingRate=default_sampling_rate,
+ outputStereo=False,
+ kana=create_kana(accent_phrases),
+ )
+
+ @app.post(
+ "/accent_phrases",
+ response_model=List[AccentPhrase],
+ tags=["クエリ編集"],
+ summary="テキストからアクセント句を得る",
+ responses={
+ 400: {
+ "description": "読み仮名のパースに失敗",
+ "model": ParseKanaBadRequest,
+ }
+ },
+ )
+ def accent_phrases(
+ text: str,
+ speaker: int,
+ is_kana: bool = False,
+ core_version: Optional[str] = None,
+ ):
+ """
+ テキストからアクセント句を得ます。
+ is_kanaが`true`のとき、テキストは次のようなAquesTalkライクな記法に従う読み仮名として処理されます。デフォルトは`false`です。
+ * 全てのカナはカタカナで記述される
+ * アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。
+ * カナの手前に`_`を入れるとそのカナは無声化される
+ * アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。
+ * アクセント句末に`?`(全角)を入れることにより疑問文の発音ができる。
+ """
+ engine = get_engine(core_version)
+ if is_kana:
+ try:
+ accent_phrases = parse_kana(text)
+ except ParseKanaError as err:
+ raise HTTPException(
+ status_code=400,
+ detail=ParseKanaBadRequest(err).dict(),
+ )
+ accent_phrases = engine.replace_mora_data(
+ accent_phrases=accent_phrases, speaker_id=speaker
+ )
+
+ return accent_phrases
+ else:
+ return engine.create_accent_phrases(text, speaker_id=speaker)
+
+ @app.post(
+ "/mora_data",
+ response_model=List[AccentPhrase],
+ tags=["クエリ編集"],
+ summary="アクセント句から音高・音素長を得る",
+ )
+ def mora_data(
+ accent_phrases: List[AccentPhrase],
+ speaker: int,
+ core_version: Optional[str] = None,
+ ):
+ engine = get_engine(core_version)
+ return engine.replace_mora_data(accent_phrases, speaker_id=speaker)
+
+ @app.post(
+ "/mora_length",
+ response_model=List[AccentPhrase],
+ tags=["クエリ編集"],
+ summary="アクセント句から音素長を得る",
+ )
+ def mora_length(
+ accent_phrases: List[AccentPhrase],
+ speaker: int,
+ core_version: Optional[str] = None,
+ ):
+ engine = get_engine(core_version)
+ return engine.replace_phoneme_length(
+ accent_phrases=accent_phrases, speaker_id=speaker
+ )
+
+ @app.post(
+ "/mora_pitch",
+ response_model=List[AccentPhrase],
+ tags=["クエリ編集"],
+ summary="アクセント句から音高を得る",
+ )
+ def mora_pitch(
+ accent_phrases: List[AccentPhrase],
+ speaker: int,
+ core_version: Optional[str] = None,
+ ):
+ engine = get_engine(core_version)
+ return engine.replace_mora_pitch(
+ accent_phrases=accent_phrases, speaker_id=speaker
+ )
+
+ @app.post(
+ "/synthesis",
+ response_class=FileResponse,
+ responses={
+ 200: {
+ "content": {
+ "audio/wav": {"schema": {"type": "string", "format": "binary"}}
+ },
+ }
+ },
+ tags=["音声合成"],
+ summary="音声合成する",
+ )
+ def synthesis(
+ query: AudioQuery,
+ speaker: int,
+ enable_interrogative_upspeak: bool = Query( # noqa: B008
+ default=True,
+ description="疑問系のテキストが与えられたら語尾を自動調整する",
+ ),
+ core_version: Optional[str] = None,
+ ):
+ engine = get_engine(core_version)
+ wave = engine.synthesis(
+ query=query,
+ speaker_id=speaker,
+ enable_interrogative_upspeak=enable_interrogative_upspeak,
+ )
+
+ with NamedTemporaryFile(delete=False) as f:
+ soundfile.write(
+ file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
+ )
+
+ return FileResponse(f.name, media_type="audio/wav")
+
+ @app.post(
+ "/multi_synthesis",
+ response_class=FileResponse,
+ responses={
+ 200: {
+ "content": {
+ "application/zip": {
+ "schema": {"type": "string", "format": "binary"}
+ }
+ },
+ }
+ },
+ tags=["音声合成"],
+ summary="複数まとめて音声合成する",
+ )
+ def multi_synthesis(
+ queries: List[AudioQuery],
+ speaker: int,
+ core_version: Optional[str] = None,
+ ):
+ engine = get_engine(core_version)
+ sampling_rate = queries[0].outputSamplingRate
+
+ with NamedTemporaryFile(delete=False) as f:
+
+ with zipfile.ZipFile(f, mode="a") as zip_file:
+
+ for i in range(len(queries)):
+
+ if queries[i].outputSamplingRate != sampling_rate:
+ raise HTTPException(
+ status_code=422, detail="サンプリングレートが異なるクエリがあります"
+ )
+
+ with TemporaryFile() as wav_file:
+
+ wave = engine.synthesis(query=queries[i], speaker_id=speaker)
+ soundfile.write(
+ file=wav_file,
+ data=wave,
+ samplerate=sampling_rate,
+ format="WAV",
+ )
+ wav_file.seek(0)
+ zip_file.writestr(f"{str(i + 1).zfill(3)}.wav", wav_file.read())
+
+ return FileResponse(f.name, media_type="application/zip")
+
+ @app.post(
+ "/synthesis_morphing",
+ response_class=FileResponse,
+ responses={
+ 200: {
+ "content": {
+ "audio/wav": {"schema": {"type": "string", "format": "binary"}}
+ },
+ }
+ },
+ tags=["音声合成"],
+ summary="2人の話者でモーフィングした音声を合成する",
+ )
+ def _synthesis_morphing(
+ query: AudioQuery,
+ base_speaker: int,
+ target_speaker: int,
+ morph_rate: float = Query(..., ge=0.0, le=1.0), # noqa: B008
+ core_version: Optional[str] = None,
+ ):
+ """
+ 指定された2人の話者で音声を合成、指定した割合でモーフィングした音声を得ます。
+ モーフィングの割合は`morph_rate`で指定でき、0.0でベースの話者、1.0でターゲットの話者に近づきます。
+ """
+ engine = get_engine(core_version)
+
+ # 生成したパラメータはキャッシュされる
+ morph_param = synthesis_morphing_parameter(
+ engine=engine,
+ query=query,
+ base_speaker=base_speaker,
+ target_speaker=target_speaker,
+ )
+
+ morph_wave = synthesis_morphing(
+ morph_param=morph_param,
+ morph_rate=morph_rate,
+ output_stereo=query.outputStereo,
+ )
+
+ with NamedTemporaryFile(delete=False) as f:
+ soundfile.write(
+ file=f,
+ data=morph_wave,
+ samplerate=morph_param.fs,
+ format="WAV",
+ )
+
+ return FileResponse(f.name, media_type="audio/wav")
+
+ @app.post(
+ "/connect_waves",
+ response_class=FileResponse,
+ responses={
+ 200: {
+ "content": {
+ "audio/wav": {"schema": {"type": "string", "format": "binary"}}
+ },
+ }
+ },
+ tags=["その他"],
+ summary="base64エンコードされた複数のwavデータを一つに結合する",
+ )
+ def connect_waves(waves: List[str]):
+ """
+ base64エンコードされたwavデータを一纏めにし、wavファイルで返します。
+ """
+ try:
+ waves_nparray, sampling_rate = connect_base64_waves(waves)
+ except ConnectBase64WavesException as err:
+ return HTTPException(status_code=422, detail=str(err))
+
+ with NamedTemporaryFile(delete=False) as f:
+ soundfile.write(
+ file=f,
+ data=waves_nparray,
+ samplerate=sampling_rate,
+ format="WAV",
+ )
+
+ return FileResponse(f.name, media_type="audio/wav")
+
+ @app.get("/presets", response_model=List[Preset], tags=["その他"])
+ def get_presets():
+ """
+ エンジンが保持しているプリセットの設定を返します
+
+ Returns
+ -------
+ presets: List[Preset]
+ プリセットのリスト
+ """
+ presets, err_detail = preset_loader.load_presets()
+ if err_detail:
+ raise HTTPException(status_code=422, detail=err_detail)
+ return presets
+
+ @app.get("/version", tags=["その他"])
+ def version() -> str:
+ return __version__
+
+ @app.get("/core_versions", response_model=List[str], tags=["その他"])
+ def core_versions() -> List[str]:
+ return Response(
+ content=json.dumps(list(synthesis_engines.keys())),
+ media_type="application/json",
+ )
+
+ @app.get("/speakers", response_model=List[Speaker], tags=["その他"])
+ def speakers(
+ core_version: Optional[str] = None,
+ ):
+ engine = get_engine(core_version)
+ return Response(
+ content=engine.speakers,
+ media_type="application/json",
+ )
+
+ @app.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"])
+ def speaker_info(speaker_uuid: str, core_version: Optional[str] = None):
+ """
+ 指定されたspeaker_uuidに関する情報をjson形式で返します。
+ 画像や音声はbase64エンコードされたものが返されます。
+
+ Returns
+ -------
+ ret_data: SpeakerInfo
+ """
+ speakers = json.loads(get_engine(core_version).speakers)
+ for i in range(len(speakers)):
+ if speakers[i]["speaker_uuid"] == speaker_uuid:
+ speaker = speakers[i]
+ break
+ else:
+ raise HTTPException(status_code=404, detail="該当する話者が見つかりません")
+
+ try:
+ policy = (root_dir / f"speaker_info/{speaker_uuid}/policy.md").read_text(
+ "utf-8"
+ )
+ portrait = b64encode_str(
+ (root_dir / f"speaker_info/{speaker_uuid}/portrait.png").read_bytes()
+ )
+ style_infos = []
+ for style in speaker["styles"]:
+ id = style["id"]
+ icon = b64encode_str(
+ (
+ root_dir / f"speaker_info/{speaker_uuid}/icons/{id}.png"
+ ).read_bytes()
+ )
+ voice_samples = [
+ b64encode_str(
+ (
+ root_dir
+ / "speaker_info/{}/voice_samples/{}_{}.wav".format(
+ speaker_uuid, id, str(j + 1).zfill(3)
+ )
+ ).read_bytes()
+ )
+ for j in range(3)
+ ]
+ style_infos.append(
+ {"id": id, "icon": icon, "voice_samples": voice_samples}
+ )
+ except FileNotFoundError:
+ import traceback
+
+ traceback.print_exc()
+ raise HTTPException(status_code=500, detail="追加情報が見つかりませんでした")
+
+ ret_data = {"policy": policy, "portrait": portrait, "style_infos": style_infos}
+ return ret_data
+
+ @app.get("/supported_devices", response_model=SupportedDevicesInfo, tags=["その他"])
+ def supported_devices(
+ core_version: Optional[str] = None,
+ ):
+ supported_devices = get_engine(core_version).supported_devices
+ if supported_devices is None:
+ raise HTTPException(status_code=422, detail="非対応の機能です。")
+ return Response(
+ content=supported_devices,
+ media_type="application/json",
+ )
+
+ return app
+
+
+if __name__ == "__main__":
+ multiprocessing.freeze_support()
+ rospy.init_node('voicevox_server')
+
+ voicelib_dir = [Path(voicevox_lib_dir)]
+ use_gpu = False
+ host = rospy.get_param('~host', "127.0.0.1")
+ port = rospy.get_param('~port', 50021)
+ cpu_num_threads = rospy.get_param('~cpu_num_threads', None)
+ if cpu_num_threads is None:
+ cpu_num_threads = multiprocessing.cpu_count()
+
+ synthesis_engines = make_synthesis_engines(
+ use_gpu=use_gpu,
+ voicelib_dirs=voicelib_dir,
+ cpu_num_threads=cpu_num_threads,
+ )
+ if len(synthesis_engines) == 0:
+ rospy.logerr("音声合成エンジンがありません。")
+ latest_core_version = str(max([LooseVersion(ver)
+ for ver in synthesis_engines]))
+
+ uvicorn.run(
+ generate_app(synthesis_engines, latest_core_version),
+ host=host,
+ port=port,
+ )
diff --git a/3rdparty/voicevox/package.xml b/3rdparty/voicevox/package.xml
new file mode 100644
index 000000000..5240c3468
--- /dev/null
+++ b/3rdparty/voicevox/package.xml
@@ -0,0 +1,36 @@
+
+
+
+ voicevox
+ 0.0.1
+ VOICEVOX: AI speech synthesis
+ Iori Yanokura
+
+ MIT
+
+ http://ros.org/wiki/voicevox
+
+ Iori Yanokura
+
+ catkin
+ catkin_virtualenv
+
+ mk
+ roslib
+ rospack
+ unzip
+ wget
+
+ python3
+ python3-requests
+ sound_play
+ unzip
+ wget
+
+
+ requirements.txt
+
+
+
diff --git a/3rdparty/voicevox/python/voicevox/__init__.py b/3rdparty/voicevox/python/voicevox/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/3rdparty/voicevox/python/voicevox/filecheck_utils.py b/3rdparty/voicevox/python/voicevox/filecheck_utils.py
new file mode 100644
index 000000000..6c881b5f5
--- /dev/null
+++ b/3rdparty/voicevox/python/voicevox/filecheck_utils.py
@@ -0,0 +1,43 @@
+import hashlib
+import os
+
+
+def get_cache_dir():
+ """Return cache dir.
+
+ Returns
+ -------
+ cache_dir : str
+ cache directory.
+ """
+ ros_home = os.getenv('ROS_HOME', os.path.expanduser('~/.ros'))
+ pkg_ros_home = os.path.join(ros_home, 'voicevox_texttospeech')
+ default_cache_dir = os.path.join(pkg_ros_home, 'cache')
+ cache_dir = os.environ.get(
+ 'ROS_VOICEVOX_TEXTTOSPEECH_CACHE_DIR',
+ default_cache_dir)
+ if not os.path.exists(cache_dir):
+ os.makedirs(cache_dir)
+ return cache_dir
+
+
+def checksum_md5(filename, blocksize=8192):
+ """Calculate md5sum.
+
+ Parameters
+ ----------
+ filename : str or pathlib.Path
+ input filename.
+ blocksize : int
+ MD5 has 128-byte digest blocks (default: 8192 is 128x64).
+ Returns
+ -------
+ md5 : str
+ calculated md5sum.
+ """
+ filename = str(filename)
+ hash_factory = hashlib.md5()
+ with open(filename, 'rb') as f:
+ for chunk in iter(lambda: f.read(blocksize), b''):
+ hash_factory.update(chunk)
+ return hash_factory.hexdigest()
diff --git a/3rdparty/voicevox/requirements.in b/3rdparty/voicevox/requirements.in
new file mode 100644
index 000000000..c9cfd223a
--- /dev/null
+++ b/3rdparty/voicevox/requirements.in
@@ -0,0 +1,11 @@
+PyYAML
+aiofiles
+appdirs
+fastapi
+git+https://github.com/VOICEVOX/pyopenjtalk@a85521a0a0f298f08d9e9b24987b3c77eb4aaff5#egg=pyopenjtalk
+numpy
+python-multipart
+pyworld
+scipy
+soundfile
+uvicorn
diff --git a/3rdparty/voicevox/setup.py b/3rdparty/voicevox/setup.py
new file mode 100644
index 000000000..939174bc8
--- /dev/null
+++ b/3rdparty/voicevox/setup.py
@@ -0,0 +1,12 @@
+from distutils.core import setup
+
+from catkin_pkg.python_setup import generate_distutils_setup
+from setuptools import find_packages
+
+
+d = generate_distutils_setup(
+ packages=find_packages('python'),
+ package_dir={'': 'python'},
+)
+
+setup(**d)
diff --git a/dialogflow_task_executive/node_scripts/task_executive.py b/dialogflow_task_executive/node_scripts/task_executive.py
index 686327a55..e826d50d7 100644
--- a/dialogflow_task_executive/node_scripts/task_executive.py
+++ b/dialogflow_task_executive/node_scripts/task_executive.py
@@ -7,7 +7,9 @@
import rospy
from app_manager.msg import AppList
+from app_manager.msg import KeyValue
from app_manager.srv import StartApp
+from app_manager.srv import StartAppRequest
from app_manager.srv import StopApp
from std_srvs.srv import Empty
@@ -90,12 +92,16 @@ def available_apps(self):
return map(lambda a: a.name,
self._latest_msg.available_apps)
- def start_app(self, name):
+ def start_app(self, name, launch_args):
if name in self.running_apps:
raise RuntimeError("{} is already running".format(name))
elif name not in self.available_apps:
raise RuntimeError("{} is not available".format(name))
- res = self._srv_start_app(name=name)
+ req = StartAppRequest()
+ req.name = name
+ for key, value in launch_args.items():
+ req.args.append(KeyValue(key=key, value=value))
+ res = self._srv_start_app(req)
if res.started:
rospy.loginfo("{} successfully started".format(name))
return True
@@ -221,6 +227,12 @@ def dialog_cb(self, msg):
try:
params = json.loads(msg.parameters)
rospy.set_param("/action/parameters", params)
+ # set launch_args
+ launch_args = {}
+ for key, value in params.items():
+ launch_args[key.encode('utf-8')] = value.encode('utf-8')
+ except AttributeError as e:
+ rospy.logerr(e)
except ValueError:
rospy.logerr(
"Failed to parse parameters of action '{}'".format(msg.action))
@@ -228,7 +240,7 @@ def dialog_cb(self, msg):
rospy.loginfo(
"Starting '{}' with parameters '{}'"
.format(msg.action, msg.parameters))
- self.app_manager.start_app(action)
+ self.app_manager.start_app(action, launch_args)
def app_start_cb(self, name):
rospy.loginfo("{} started".format(name))
diff --git a/google_chat_ros/scripts/google_chat_ros_node.py b/google_chat_ros/scripts/google_chat_ros_node.py
index 64e6d011f..43830281b 100644
--- a/google_chat_ros/scripts/google_chat_ros_node.py
+++ b/google_chat_ros/scripts/google_chat_ros_node.py
@@ -86,7 +86,7 @@ def __init__(self):
rospy.logwarn("You cannot recieve Google Chat event because HTTPS server or Google Cloud Pub/Sub is not running.")
else:
- rospy.logerr("Please choose receiving_mode param from dialogflow, https, pubsub, none.")
+ rospy.logerr("Please choose receiving_mode param from dialogflow, url, pubsub, none.")
def killhttpd(self):
self._server.kill()
diff --git a/respeaker_ros/CMakeLists.txt b/respeaker_ros/CMakeLists.txt
index 390a82e0f..b4009cd1a 100644
--- a/respeaker_ros/CMakeLists.txt
+++ b/respeaker_ros/CMakeLists.txt
@@ -35,7 +35,9 @@ catkin_install_python(PROGRAMS ${PYTHON_SCRIPTS}
if(CATKIN_ENABLE_TESTING)
find_package(rostest REQUIRED)
+ find_package(roslaunch REQUIRED)
add_rostest(test/sample_respeaker.test
DEPENDENCIES ${PROJECT_NAME}_generate_virtualenv
)
+ roslaunch_add_file_check(launch/sample_respeaker.launch)
endif()
diff --git a/respeaker_ros/README.md b/respeaker_ros/README.md
index e42ba1202..247168ba5 100644
--- a/respeaker_ros/README.md
+++ b/respeaker_ros/README.md
@@ -92,6 +92,151 @@ A ROS Package for Respeaker Mic Array
a: 0.3"
```
+## Parameters for respeaker_node.py
+
+ - ### Publishing topics
+
+ - `audio` (`audio_common_msgs/AudioData`)
+
+ Processed audio for ASR. 1 channel.
+
+ - `audio_info` (`audio_common_msgs/AudioInfo`)
+
+ Audio info with respect to `~audio`.
+
+ - `audio_raw` (`audio_common_msgs/AudioData`)
+
+ Micarray audio data has 4-channels. Maybe you need to update respeaker firmware.
+
+ If the firmware isn't supported, this will not be output.
+
+ - `audio_info_raw` (`audio_common_msgs/AudioInfo`)
+
+ Audio info with respect to `~audio_raw`.
+
+ If the firmware isn't supported, this will not be output.
+
+ - `speech_audio` (`audio_common_msgs/AudioData`)
+
+ Audio data while a person is speaking using the VAD function.
+
+ - `speech_audio_raw` (`audio_common_msgs/AudioData`)
+
+ Audio data has 4-channels while a person is speaking using the VAD function.
+
+ If the firmware isn't supported, this will not be output.
+
+ - `audio_merged_playback` (`audio_common_msgs/AudioData`)
+
+ Data that combines the sound of mic and speaker.
+
+ If the firmware isn't supported, this will not be output.
+
+ For more detail, please see https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/
+
+ - `~is_speeching` (`std_msgs/Bool`)
+
+ Using VAD function, publish whether someone is speaking.
+
+ - `~sound_direction` (`std_msgs/Int32`)
+
+ Direction of sound.
+
+ - `~sound_localization` (`geometry_msgs/PoseStamped`)
+
+ Localized Sound Direction. The value of the position in the estimated direction with `~doa_offset` as the radius is obtained.
+
+ - ### Parameters
+
+ - `~update_rate` (`Double`, default: `10.0`)
+
+ Publishing info data such as `~is_speeching`, `~sound_direction`, `~sound_localization`, `~speech_audio` and `~speech_audio_raw`.
+
+ - `~sensor_frame_id` (`String`, default: `respeaker_base`)
+
+ Frame id.
+
+ - `~doa_xy_offset` (`Double`, default: `0.0`)
+
+ `~doa_offset` is a estimated sound direction's radius.
+
+ - `~doa_yaw_offset` (`Double`, default: `90.0`)
+
+ Estimated DoA angle offset.
+
+ - `~speech_prefetch` (`Double`, default: `0.5`)
+
+ Time to represent how long speech is pre-stored in buffer.
+
+ - `~speech_continuation` (`Double`, default: `0.5`)
+
+ If the time between the current time and the time when the speech is stopped is shorter than this time,
+ it is assumed that someone is speaking.
+
+ - `~speech_max_duration` (`Double`, default: `7.0`)
+
+ - `~speech_min_duration` (`Double`, default: `0.1`)
+
+ If the speaking interval is within these times, `~speech_audio` and `~speech_audio_raw` will be published.
+
+ - `~suppress_pyaudio_error` (`Bool`, default: `True`)
+
+ If this value is `True`, suppress error from pyaudio.
+
+## Parameters for speech_to_text.py
+
+ - ### Publishing topics
+
+ - `~speech_to_text` (`speech_recognition_msgs/SpeechRecognitionCandidates`)
+
+ Recognized text.
+
+ - ### Subscribing topics
+
+ - `audio` (`audio_common_msgs/AudioData`)
+
+ Input audio.
+
+ - ### Parameters
+
+ - `~audio_info` (`String`, default: ``)
+
+ audio_info (`audio_common_msgs/AudioInfo`) topic. If this value is specified, `~sample_rate`, `~sample_width` and `~channels` parameters are obtained from the topic.
+
+ - `~sample_rate` (`Int`, default: `16000`)
+
+ Sampling rate.
+
+ - `~sample_width` (`Int`, default: `2`)
+
+ Sample with.
+
+ - `~channels` (`Int`, default: `1`)
+
+ Number of channels.
+
+ - `~target_channel` (`Int`, default: `0`)
+
+ Target number of channel.
+
+ - `~language` (`String`, default: `ja-JP`)
+
+ language of speech to text service. For English users, you can specify `en-US`.
+
+ - `~self_cancellation` (`Bool`, default: `True`)
+
+ ignore voice input while the robot is speaking.
+
+ - `~tts_tolerance` (`String`, default: `1.0`)
+
+ time to assume as SPEAKING after tts service is finished.
+
+ - `~tts_action_names` (`List[String]`, default: `['sound_play']`)
+
+ If `~self_chancellation` is `True`, this value will be used.
+
+ When the actions are active, do nothing with the callback that subscribes to `audio`.
+
## Use cases
### Voice Recognition
diff --git a/respeaker_ros/launch/sample_respeaker.launch b/respeaker_ros/launch/sample_respeaker.launch
index 31d083608..e2c43c557 100644
--- a/respeaker_ros/launch/sample_respeaker.launch
+++ b/respeaker_ros/launch/sample_respeaker.launch
@@ -13,6 +13,8 @@
+
+
+ respawn="true" respawn_delay="10" >
+
@@ -30,6 +33,7 @@
+ audio_info: $(arg audio_info)
language: $(arg language)
self_cancellation: $(arg self_cancellation)
tts_tolerance: 0.5
diff --git a/respeaker_ros/package.xml b/respeaker_ros/package.xml
index ac83b898a..be16789a2 100644
--- a/respeaker_ros/package.xml
+++ b/respeaker_ros/package.xml
@@ -17,6 +17,7 @@
flac
geometry_msgs
std_msgs
+ sound_play
speech_recognition_msgs
tf
python-numpy
diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py
index bf14ad478..6b351251c 100644
--- a/respeaker_ros/scripts/respeaker_node.py
+++ b/respeaker_ros/scripts/respeaker_node.py
@@ -16,6 +16,13 @@
import sys
import time
from audio_common_msgs.msg import AudioData
+enable_audio_info = True
+try:
+ from audio_common_msgs.msg import AudioInfo
+except Exception as e:
+ rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.'
+ ' AudioInfo message will not be published.')
+ enable_audio_info = False
from geometry_msgs.msg import PoseStamped
from std_msgs.msg import Bool, Int32, ColorRGBA
from dynamic_reconfigure.server import Server
@@ -265,7 +272,6 @@ def __init__(self, on_audio, channel=0, suppress_error=True):
if self.channels != 6:
rospy.logwarn("%d channel is found for respeaker" % self.channels)
rospy.logwarn("You may have to update firmware.")
- self.channel = min(self.channels - 1, max(0, self.channel))
self.stream = self.pyaudio.open(
input=True, start=False,
@@ -295,9 +301,8 @@ def stream_callback(self, in_data, frame_count, time_info, status):
data = np.frombuffer(in_data, dtype=np.int16)
chunk_per_channel = int(len(data) / self.channels)
data = np.reshape(data, (chunk_per_channel, self.channels))
- chan_data = data[:, self.channel]
# invoke callback
- self.on_audio(chan_data.tobytes())
+ self.on_audio(data)
return None, pyaudio.paContinue
def start(self):
@@ -333,14 +338,24 @@ def __init__(self):
self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
+ if enable_audio_info is True:
+ self.pub_audio_info = rospy.Publisher("audio_info", AudioInfo,
+ queue_size=1, latch=True)
+ self.pub_audio_raw_info = rospy.Publisher("audio_info_raw", AudioInfo,
+ queue_size=1, latch=True)
self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
# init config
self.config = None
self.dyn_srv = Server(RespeakerConfig, self.on_config)
# start
self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error)
+ self.n_channel = self.respeaker_audio.channels
+
self.speech_prefetch_bytes = int(
- self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
+ 1
+ * self.speech_prefetch
+ * self.respeaker_audio.rate
+ * self.respeaker_audio.bitdepth / 8.0)
self.speech_prefetch_buffer = b""
self.respeaker_audio.start()
self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate),
@@ -348,6 +363,58 @@ def __init__(self):
self.timer_led = None
self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led)
+ # processed audio for ASR
+ if enable_audio_info is True:
+ info_msg = AudioInfo(
+ channels=1,
+ sample_rate=self.respeaker_audio.rate,
+ sample_format='S16LE',
+ bitrate=self.respeaker_audio.rate * self.respeaker_audio.bitdepth,
+ coding_format='WAVE')
+ self.pub_audio_info.publish(info_msg)
+
+ if self.n_channel > 1:
+ # The respeaker has 4 microphones.
+ # Multiple microphones can be used for
+ # beam forming (strengthening the sound in a specific direction)
+ # and sound localization (the respeaker outputs the azimuth
+ # direction, but the multichannel can estimate
+ # the elevation direction). etc.
+
+ # Channel 0: processed audio for ASR
+ # Channel 1: mic1 raw data
+ # Channel 2: mic2 raw data
+ # Channel 3: mic3 raw data
+ # Channel 4: mic4 raw data
+ # Channel 5: merged playback
+ # For more detail, please see
+ # https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/
+ # (self.n_channel - 2) = 4 channels are multiple microphones.
+ self.pub_audio_raw = rospy.Publisher("audio_raw", AudioData,
+ queue_size=10)
+ self.pub_audio_merged_playback = rospy.Publisher(
+ "audio_merged_playback", AudioData,
+ queue_size=10)
+ if enable_audio_info is True:
+ info_raw_msg = AudioInfo(
+ channels=self.n_channel - 2,
+ sample_rate=self.respeaker_audio.rate,
+ sample_format='S16LE',
+ bitrate=(self.respeaker_audio.rate *
+ self.respeaker_audio.bitdepth),
+ coding_format='WAVE')
+ self.pub_audio_raw_info.publish(info_raw_msg)
+
+ self.speech_audio_raw_buffer = b""
+ self.speech_raw_prefetch_buffer = b""
+ self.pub_speech_audio_raw = rospy.Publisher(
+ "speech_audio_raw", AudioData, queue_size=10)
+ self.speech_raw_prefetch_bytes = int(
+ (self.n_channel - 2)
+ * self.speech_prefetch
+ * self.respeaker_audio.rate
+ * self.respeaker_audio.bitdepth / 8.0)
+
def on_shutdown(self):
self.info_timer.shutdown()
try:
@@ -386,14 +453,30 @@ def on_status_led(self, msg):
oneshot=True)
def on_audio(self, data):
- self.pub_audio.publish(AudioData(data=data))
+ # take processed audio for ASR.
+ processed_data = data[:, 0].tobytes()
+ self.pub_audio.publish(AudioData(data=processed_data))
+ if self.n_channel > 1:
+ raw_audio_data = data[:, 1:5].reshape(-1).tobytes()
+ self.pub_audio_raw.publish(
+ AudioData(data=raw_audio_data))
+ self.pub_audio_merged_playback.publish(
+ AudioData(data=data[:, 5].tobytes()))
if self.is_speeching:
if len(self.speech_audio_buffer) == 0:
self.speech_audio_buffer = self.speech_prefetch_buffer
- self.speech_audio_buffer += data
+ if self.n_channel > 1:
+ self.speech_audio_raw_buffer = self.speech_raw_prefetch_buffer
+ self.speech_audio_buffer += processed_data
+ if self.n_channel > 1:
+ self.speech_audio_raw_buffer += raw_audio_data
else:
- self.speech_prefetch_buffer += data
+ self.speech_prefetch_buffer += processed_data
self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
+ if self.n_channel > 1:
+ self.speech_raw_prefetch_buffer += raw_audio_data
+ self.speech_raw_prefetch_buffer = self.speech_raw_prefetch_buffer[
+ -self.speech_raw_prefetch_bytes:]
def on_timer(self, event):
stamp = event.current_real or rospy.Time.now()
@@ -433,13 +516,15 @@ def on_timer(self, event):
elif self.is_speeching:
buf = self.speech_audio_buffer
self.speech_audio_buffer = b""
+ buf_raw = self.speech_audio_raw_buffer
+ self.speech_audio_raw_buffer = b""
self.is_speeching = False
duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
- duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
+ duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth / self.n_channel
rospy.loginfo("Speech detected for %.3f seconds" % duration)
if self.speech_min_duration <= duration < self.speech_max_duration:
-
self.pub_speech_audio.publish(AudioData(data=buf))
+ self.pub_speech_audio_raw.publish(AudioData(data=buf_raw))
if __name__ == '__main__':
diff --git a/respeaker_ros/scripts/speech_to_text.py b/respeaker_ros/scripts/speech_to_text.py
index 0974b2f65..6765e2e04 100644
--- a/respeaker_ros/scripts/speech_to_text.py
+++ b/respeaker_ros/scripts/speech_to_text.py
@@ -2,6 +2,10 @@
# -*- coding: utf-8 -*-
# Author: Yuki Furuta
+from __future__ import division
+
+import sys
+
import actionlib
import rospy
try:
@@ -9,8 +13,16 @@
except ImportError as e:
raise ImportError(str(e) + '\nplease try "pip install speechrecognition"')
+import numpy as np
from actionlib_msgs.msg import GoalStatus, GoalStatusArray
from audio_common_msgs.msg import AudioData
+enable_audio_info = True
+try:
+ from audio_common_msgs.msg import AudioInfo
+except Exception as e:
+ rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.'
+ ' AudioInfo message will not be published.')
+ enable_audio_info = False
from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
from speech_recognition_msgs.msg import SpeechRecognitionCandidates
@@ -18,8 +30,32 @@
class SpeechToText(object):
def __init__(self):
# format of input audio data
- self.sample_rate = rospy.get_param("~sample_rate", 16000)
- self.sample_width = rospy.get_param("~sample_width", 2)
+ audio_info_topic_name = rospy.get_param('~audio_info', '')
+ if len(audio_info_topic_name) > 0:
+ if enable_audio_info is False:
+ rospy.logerr(
+ 'audio_common_msgs/AudioInfo message is not exists.'
+ ' Giving ~audio_info is not valid in your environment.')
+ sys.exit(1)
+ rospy.loginfo('Extract audio info params from {}'.format(
+ audio_info_topic_name))
+ audio_info_msg = rospy.wait_for_message(
+ audio_info_topic_name, AudioInfo)
+ self.sample_rate = audio_info_msg.sample_rate
+ self.sample_width = audio_info_msg.bitrate // self.sample_rate // 8
+ self.channels = audio_info_msg.channels
+ else:
+ self.sample_rate = rospy.get_param("~sample_rate", 16000)
+ self.sample_width = rospy.get_param("~sample_width", 2)
+ self.channels = rospy.get_param("~channels", 1)
+ if self.sample_width == 2:
+ self.dtype = 'int16'
+ elif self.sample_width == 4:
+ self.dtype = 'int32'
+ else:
+ raise NotImplementedError('sample_width {} is not supported'
+ .format(self.sample_width))
+ self.target_channel = rospy.get_param("~target_channel", 0)
# language of STT service
self.language = rospy.get_param("~language", "ja-JP")
# ignore voice input while the robot is speaking
@@ -78,7 +114,11 @@ def audio_cb(self, msg):
if self.is_canceling:
rospy.loginfo("Speech is cancelled")
return
- data = SR.AudioData(msg.data, self.sample_rate, self.sample_width)
+
+ data = SR.AudioData(
+ np.frombuffer(msg.data, dtype=self.dtype)[
+ self.target_channel::self.channels].tobytes(),
+ self.sample_rate, self.sample_width)
try:
rospy.loginfo("Waiting for result %d" % len(data.get_raw_data()))
result = self.recognizer.recognize_google(
diff --git a/respeaker_ros/test/sample_respeaker.test b/respeaker_ros/test/sample_respeaker.test
index 5d51c220c..61f10fb7b 100644
--- a/respeaker_ros/test/sample_respeaker.test
+++ b/respeaker_ros/test/sample_respeaker.test
@@ -3,6 +3,7 @@
+
diff --git a/rostwitter/CMakeLists.txt b/rostwitter/CMakeLists.txt
index 39258afbd..82b81feae 100644
--- a/rostwitter/CMakeLists.txt
+++ b/rostwitter/CMakeLists.txt
@@ -40,7 +40,7 @@ else()
)
endif()
-install(DIRECTORY test resource
+install(DIRECTORY test resource launch
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS
)
diff --git a/rostwitter/README.md b/rostwitter/README.md
new file mode 100644
index 000000000..81750bf5b
--- /dev/null
+++ b/rostwitter/README.md
@@ -0,0 +1,116 @@
+# rostwitter
+
+This package is a ROS wrapper for Twitter. You can tweet via ROS.
+
+# How to use
+
+## Get access key for API.
+
+Please get access to the Twitter API. Please refer to the following URL.
+
+https://developer.twitter.com/en/docs/twitter-api/getting-started/getting-access-to-the-twitter-api
+
+After that, save the yaml file in the following format.
+
+```
+CKEY:
+CSECRET:
+AKEY:
+ASECRET:
+```
+
+## Launch tweet node
+
+```
+roslaunch rostwitter tweet.launch account_info:=
+```
+
+## Tweet text
+
+You can tweet by simply publish on the `/tweet` topic.
+
+```
+rostopic pub /tweet std_msgs/String "Hello. Tweet via rostwitter (https://github.com/jsk-ros-pkg/jsk_3rdparty)"
+```
+
+![](./doc/tweet-string.jpg)
+
+If the string to be tweeted exceeds 140 full-width characters or 280 half-width characters, it will be tweeted in the "thread" display.
+
+```
+rostopic pub /tweet std_msgs/String """The Zen of Python, by Tim Peters
+
+Beautiful is better than ugly.
+Explicit is better than implicit.
+Simple is better than complex.
+Complex is better than complicated.
+Flat is better than nested.
+Sparse is better than dense.
+Readability counts.
+Special cases aren't special enough to break the rules.
+Although practicality beats purity.
+Errors should never pass silently.
+Unless explicitly silenced.
+In the face of ambiguity, refuse the temptation to guess.
+There should be one-- and preferably only one --obvious way to do it.
+Although that way may not be obvious at first unless you're Dutch.
+Now is better than never.
+Although never is often better than *right* now.
+If the implementation is hard to explain, it's a bad idea.
+If the implementation is easy to explain, it may be a good idea.
+Namespaces are one honking great idea -- let's do more of those!
+"""
+```
+
+![](./doc/tweet-string-thread.jpg)
+
+## Tweet text with image
+
+You can also tweet along with your images.
+
+If a base64 or image path is inserted in the text, it will jump to the next reply in that section.
+
+### Image path
+
+```
+wget https://github.com/k-okada.png -O /tmp/k-okada.png
+rostopic pub /tweet std_msgs/String "/tmp/k-okada.png"
+```
+
+![](./doc/tweet-image-path.jpg)
+
+### Base64
+
+You can even tweet the image by encoding in base64. The following example is in python.
+
+Do not concatenate multiple base64 images without spaces.
+
+
+```python
+import rospy
+import cv2
+import std_msgs.msg
+import numpy as np
+import matplotlib.cm
+
+from rostwitter.cv_util import extract_media_from_text
+from rostwitter.cv_util import encode_image_cv2
+
+rospy.init_node('rostwitter_sample')
+pub = rospy.Publisher('/tweet', std_msgs.msg.String, queue_size=1)
+rospy.sleep(3.0)
+
+colormap = matplotlib.cm.get_cmap('hsv')
+
+text = 'Tweet with images. (https://github.com/jsk-ros-pkg/jsk_3rdparty/pull/375)\n'
+N = 12
+for i in range(N):
+ text += str(i)
+ color = colormap(1.0 * i / N)[:3]
+ img = color * np.ones((10, 10, 3), dtype=np.uint8) * 255
+ img = np.array(img, dtype=np.uint8)
+ text += encode_image_cv2(img) + ' '
+pub.publish(text)
+```
+
+[The result of the tweet.](https://twitter.com/pr2jsk/status/1561995909524705280)
diff --git a/rostwitter/doc/tweet-image-path.jpg b/rostwitter/doc/tweet-image-path.jpg
new file mode 100644
index 000000000..dffc9baec
Binary files /dev/null and b/rostwitter/doc/tweet-image-path.jpg differ
diff --git a/rostwitter/doc/tweet-string-thread.jpg b/rostwitter/doc/tweet-string-thread.jpg
new file mode 100644
index 000000000..13783eaef
Binary files /dev/null and b/rostwitter/doc/tweet-string-thread.jpg differ
diff --git a/rostwitter/doc/tweet-string.jpg b/rostwitter/doc/tweet-string.jpg
new file mode 100644
index 000000000..c41daa779
Binary files /dev/null and b/rostwitter/doc/tweet-string.jpg differ
diff --git a/rostwitter/launch/tweet.launch b/rostwitter/launch/tweet.launch
new file mode 100644
index 000000000..1d202a05e
--- /dev/null
+++ b/rostwitter/launch/tweet.launch
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/rostwitter/python/rostwitter/cv_util.py b/rostwitter/python/rostwitter/cv_util.py
new file mode 100644
index 000000000..ad284bc63
--- /dev/null
+++ b/rostwitter/python/rostwitter/cv_util.py
@@ -0,0 +1,80 @@
+import base64
+import imghdr
+import os.path
+import re
+
+import cv2
+import numpy as np
+import rospy
+
+
+base64_and_filepath_image_pattern = re.compile(r'((?:/9j/)(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)? ?|/\S+\.(?:jpeg|jpg|png|gif))')
+
+
+def encode_image_cv2(img, quality=90):
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+ result, encimg = cv2.imencode('.jpg', img, encode_param)
+ b64encoded = base64.b64encode(encimg).decode('ascii')
+ return b64encoded
+
+
+def decode_image_cv2(b64encoded):
+ bin = b64encoded.split(",")[-1]
+ bin = base64.b64decode(bin)
+ bin = np.frombuffer(bin, np.uint8)
+ img = cv2.imdecode(bin, cv2.IMREAD_COLOR)
+ return img
+
+
+def is_base64_image(b64encoded):
+ try:
+ decode_image_cv2(b64encoded)
+ except Exception as e:
+ rospy.logerr(str(e))
+ return False
+ return True
+
+
+def get_image_from_text(text):
+ if base64_and_filepath_image_pattern.match(text) is None:
+ return None
+
+ if os.path.exists(text):
+ path = text
+ if imghdr.what(path) in ['jpeg', 'png', 'gif']:
+ with open(path, 'rb') as f:
+ return f.read()
+ else:
+ succ = is_base64_image(text)
+ if succ:
+ bin = text.split(",")[-1]
+ bin = base64.b64decode(bin)
+ bin = np.frombuffer(bin, np.uint8)
+ return bin
+
+
+def extract_media_from_text(text):
+ texts = base64_and_filepath_image_pattern.split(text)
+ target_texts = list(filter(lambda x: x is not None and len(x.strip()) > 0, texts))
+
+ split_texts = ['']
+ imgs_list = []
+
+ texts = []
+ imgs = []
+ for text in target_texts:
+ img = get_image_from_text(text)
+ if img is None:
+ split_texts.append(text)
+ imgs_list.append(imgs)
+ imgs = []
+ else:
+ imgs.append(img)
+
+ if len(imgs) > 0:
+ imgs_list.append(imgs)
+ if len(split_texts) > 0:
+ if len(split_texts[0]) == 0 and len(imgs_list[0]) == 0:
+ split_texts = split_texts[1:]
+ imgs_list = imgs_list[1:]
+ return imgs_list, split_texts
diff --git a/rostwitter/python/rostwitter/twitter.py b/rostwitter/python/rostwitter/twitter.py
index cdb020e15..c56cf5289 100644
--- a/rostwitter/python/rostwitter/twitter.py
+++ b/rostwitter/python/rostwitter/twitter.py
@@ -1,16 +1,20 @@
# originally from https://raw.githubusercontent.com/bear/python-twitter/v1.1/twitter.py # NOQA
+import math
import json as simplejson
import requests
-from requests_oauthlib import OAuth1
-# https://stackoverflow.com/questions/11914472/stringio-in-python3
try:
- from StringIO import StringIO ## for Python 2
+ from itertools import zip_longest
except ImportError:
- from io import StringIO ## for Python 3
+ from itertools import izip_longest as zip_longest
+from requests_oauthlib import OAuth1
import rospy
+from rostwitter.util import count_tweet_text
+from rostwitter.util import split_tweet_text
+from rostwitter.cv_util import extract_media_from_text
+
class Twitter(object):
def __init__(
@@ -54,24 +58,80 @@ def _request_url(self, url, verb, data=None):
)
return 0 # if not a POST or GET request
- def post_update(self, status):
- if len(status) > 140:
- rospy.logwarn('tweet is too longer > 140 characters')
- status = status[:140]
- url = 'https://api.twitter.com/1.1/statuses/update.json'
- data = {'status': StringIO(status)}
- json = self._request_url(url, 'POST', data=data)
- data = simplejson.loads(json.content)
+ def _check_post_request(self, request):
+ valid = True
+ data = simplejson.loads(request.content)
+ if request.status_code != 200:
+ rospy.logwarn('post tweet failed. status_code: {}'
+ .format(request.status_code))
+ if 'errors' in data:
+ for error in data['errors']:
+ rospy.logwarn('Tweet error code: {}, message: {}'
+ .format(error['code'], error['message']))
+ valid = False
+ if valid:
+ return data
+
+ def _post_update_with_reply(self, texts, media_list=None,
+ in_reply_to_status_id=None):
+ split_media_list = []
+ media_list = media_list or []
+ for i in range(0, int(math.ceil(len(media_list) / 4.0))):
+ split_media_list.append(media_list[i * 4:(i + 1) * 4])
+ for text, media_list in zip_longest(texts, split_media_list):
+ text = text or ''
+ media_list = media_list or []
+ url = 'https://api.twitter.com/1.1/statuses/update.json'
+ data = {'status': text}
+ media_ids = self._upload_media(media_list)
+ if len(media_ids) > 0:
+ data['media_ids'] = media_ids
+ if in_reply_to_status_id is not None:
+ data['in_reply_to_status_id'] = in_reply_to_status_id
+ r = self._request_url(url, 'POST', data=data)
+ data = self._check_post_request(r)
+ if data is not None:
+ in_reply_to_status_id = data['id']
+ return data
+
+ def _upload_media(self, media_list):
+ url = 'https://upload.twitter.com/1.1/media/upload.json'
+ media_ids = []
+ for media in media_list:
+ data = {'media': media}
+ r = self._request_url(url, 'POST', data=data)
+ if r.status_code == 200:
+ rospy.loginfo('upload media success')
+ media_ids.append(str(r.json()['media_id']))
+ else:
+ rospy.logerr('upload media failed. status_code: {}'
+ .format(r.status_code))
+ media_ids = ','.join(media_ids)
+ return media_ids
+
+ def post_update(self, status, in_reply_to_status_id=None):
+ media_list, status_list = extract_media_from_text(status)
+ for text, mlist in zip_longest(status_list, media_list):
+ text = text or ''
+ texts = split_tweet_text(text)
+ data = self._post_update_with_reply(
+ texts,
+ media_list=mlist,
+ in_reply_to_status_id=in_reply_to_status_id)
+ if data is not None:
+ in_reply_to_status_id = data['id']
return data
- def post_media(self, status, media):
- # 116 = 140 - len("http://t.co/ssssssssss")
- if len(status) > 116:
- rospy.logwarn('tweet wit media is too longer > 116 characters')
- status = status[:116]
+ def post_media(self, status, media, in_reply_to_status_id=None):
+ texts = split_tweet_text(status)
+ status = texts[0]
url = 'https://api.twitter.com/1.1/statuses/update_with_media.json'
- data = {'status': StringIO(status)}
+ data = {'status': status}
data['media'] = open(str(media), 'rb').read()
- json = self._request_url(url, 'POST', data=data)
- data = simplejson.loads(json.content)
+ r = self._request_url(url, 'POST', data=data)
+ data = self._check_post_request(r)
+ if len(texts) > 1:
+ data = self._post_update_with_reply(
+ texts[1:],
+ in_reply_to_status_id=data['id'])
return data
diff --git a/rostwitter/python/rostwitter/util.py b/rostwitter/python/rostwitter/util.py
index 36a613b46..f5e51471c 100644
--- a/rostwitter/python/rostwitter/util.py
+++ b/rostwitter/python/rostwitter/util.py
@@ -1,4 +1,6 @@
import os
+import sys
+import unicodedata
import yaml
import rospy
@@ -16,9 +18,47 @@ def load_oauth_settings(yaml_path):
rospy.logerr("EOF")
return None, None, None, None
with open(yaml_path, 'r') as f:
- key = yaml.load(f)
+ key = yaml.load(f, Loader=yaml.SafeLoader)
ckey = key['CKEY']
csecret = key['CSECRET']
akey = key['AKEY']
asecret = key['ASECRET']
return ckey, csecret, akey, asecret
+
+
+def count_tweet_text(text):
+ count = 0
+ if sys.version_info.major <= 2:
+ text = text.decode('utf-8')
+ for c in text:
+ if unicodedata.east_asian_width(c) in 'FWA':
+ count += 2
+ else:
+ count += 1
+ return count
+
+
+def split_tweet_text(text, length=280):
+ texts = []
+ split_text = ''
+ count = 0
+ if sys.version_info.major <= 2:
+ text = text.decode('utf-8')
+ for c in text:
+ if count == 281:
+ # last word is zenkaku.
+ texts.append(split_text[:-1])
+ split_text = split_text[-1:]
+ count = 2
+ elif count == 280:
+ texts.append(split_text)
+ split_text = ''
+ count = 0
+ split_text += c
+ if unicodedata.east_asian_width(c) in 'FWA':
+ count += 2
+ else:
+ count += 1
+ if count != 0:
+ texts.append(split_text)
+ return texts
diff --git a/rostwitter/scripts/tweet.py b/rostwitter/scripts/tweet.py
index d4b666959..50c44cf48 100755
--- a/rostwitter/scripts/tweet.py
+++ b/rostwitter/scripts/tweet.py
@@ -32,29 +32,9 @@ def tweet_cb(self, msg):
rospy.loginfo(rospy.get_name() + " sending %s",
''.join([message] if len(message) < 128 else message[0:128]+'......'))
- # search word start from / and end with {.jpeg,.jpg,.png,.gif}
- m = re.search('/\S+\.(jpeg|jpg|png|gif)', message)
- ret = None
- if m:
- filename = m.group(0)
- message = re.sub(filename, "", message)
- if os.path.exists(filename):
- rospy.loginfo(
- rospy.get_name() + " tweet %s with file %s",
- message, filename)
- # 140 - len("http://t.co/ssssssssss")
- ret = self.api.post_media(message[0:116], filename)
- if 'errors' in ret:
- rospy.logerr('Failed to post: {}'.format(ret))
- # ret = self.api.post_update(message)
- else:
- rospy.logerr(rospy.get_name() + " %s could not find", filename)
- else:
- ret = self.api.post_update(message[0:140])
- if 'errors' in ret:
- rospy.logerr('Failed to post: {}'.format(ret))
- # seg faults if message is longer than 140 byte ???
- rospy.loginfo(rospy.get_name() + " receiving %s", ret)
+ ret = self.api.post_update(message)
+ if ret is not None:
+ rospy.loginfo(rospy.get_name() + " receiving %s", ret)
if __name__ == '__main__':