diff --git a/3rdparty/voicevox/.gitignore b/3rdparty/voicevox/.gitignore
new file mode 100644
index 000000000..8cb8e60a3
--- /dev/null
+++ b/3rdparty/voicevox/.gitignore
@@ -0,0 +1,6 @@
+build
+dict
+lib
+node_scripts/voicevox_engine
+requirements.txt
+!.gitignore
diff --git a/3rdparty/voicevox/CMakeLists.txt b/3rdparty/voicevox/CMakeLists.txt
new file mode 100644
index 000000000..631126026
--- /dev/null
+++ b/3rdparty/voicevox/CMakeLists.txt
@@ -0,0 +1,73 @@
+cmake_minimum_required(VERSION 2.8.3)
+project(voicevox)
+
+find_package(catkin REQUIRED
+  COMPONENTS
+  catkin_virtualenv
+)
+
+catkin_python_setup()
+
+set(INSTALL_DIR ${PROJECT_SOURCE_DIR})
+
+catkin_package()
+
+catkin_generate_virtualenv(
+  INPUT_REQUIREMENTS requirements.in
+  PYTHON_INTERPRETER python3
+  USE_SYSTEM_PACKAGES FALSE
+)
+
+add_custom_command(
+  OUTPUT voicevox_model_installed
+  COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.model
+  MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+  INSTALL_DIR=${INSTALL_DIR}
+)
+
+
+add_custom_command(
+  OUTPUT voicevox_core_installed
+  COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.core
+  MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+  INSTALL_DIR=${INSTALL_DIR}
+)
+
+add_custom_command(
+  OUTPUT voicevox_engine_installed
+  COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.engine
+  MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+  INSTALL_DIR=${INSTALL_DIR}
+)
+
+add_custom_command(
+  OUTPUT open_jtalk_dic_installed
+  COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.open_jtalk_dic
+  MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum
+  INSTALL_DIR=${INSTALL_DIR}
+)
+
+add_custom_target(all_installed ALL DEPENDS
+  voicevox_model_installed
+  voicevox_core_installed
+  voicevox_engine_installed
+  open_jtalk_dic_installed)
+
+file(GLOB NODE_SCRIPTS_FILES node_scripts/*.py)
+catkin_install_python(
+  PROGRAMS ${NODE_SCRIPTS_FILES}
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}/node_scripts/
+)
+install(DIRECTORY node_scripts/voicevox_engine
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/catkin_virtualenv_scripts/
+  USE_SOURCE_PERMISSIONS)
+install(DIRECTORY launch dict
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+  USE_SOURCE_PERMISSIONS)
+install(PROGRAMS bin/text2wave
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin)
+
+install(DIRECTORY
+  ${INSTALL_DIR}/lib
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+  USE_SOURCE_PERMISSIONS)
diff --git a/3rdparty/voicevox/Makefile b/3rdparty/voicevox/Makefile
new file mode 100644
index 000000000..a2c90f3bb
--- /dev/null
+++ b/3rdparty/voicevox/Makefile
@@ -0,0 +1,11 @@
+all:
+	make -f Makefile.core
+	make -f Makefile.model
+	make -f Makefile.engine
+	make -f Makefile.open_jtalk_dic
+clean:
+	make -f Makefile.core clean
+	make -f Makefile.model clean
+	make -f Makefile.engine clean
+	make -f Makefile.open_jtalk_dic clean
+	rm -rf build
diff --git a/3rdparty/voicevox/Makefile.core b/3rdparty/voicevox/Makefile.core
new file mode 100644
index 000000000..bac21eb0f
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.core
@@ -0,0 +1,28 @@
+# -*- makefile -*-
+
+all: installed.viocevox_core
+
+VERSION = 0.11.4
+FILENAME = core.zip
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/VOICEVOX/voicevox_core/releases/download/$(VERSION)/core.zip"
+SOURCE_DIR = build/core
+UNPACK_CMD = unzip
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum
+SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR =	'./'
+
+
+installed.viocevox_core: $(SOURCE_DIR)/unpacked
+	mkdir -p $(INSTALL_DIR)/lib
+	cp build/core/lib*.so $(INSTALL_DIR)/lib/
+	cp build/core/*.bin $(INSTALL_DIR)/lib/
+	cp build/core/metas.json $(INSTALL_DIR)/lib/metas.json
+
+clean:
+	rm -rf $(TARBALL)
+	rm -rf $(SOURCE_DIR)
+	rm -rf $(INSTALL_DIR)/lib
+	rm -rf build
diff --git a/3rdparty/voicevox/Makefile.engine b/3rdparty/voicevox/Makefile.engine
new file mode 100644
index 000000000..b3d6899fa
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.engine
@@ -0,0 +1,24 @@
+# -*- makefile -*-
+
+all: installed.voicevox_engine
+
+VERSION = 0.11.4
+FILENAME = $(VERSION).tar.gz
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/VOICEVOX/voicevox_engine/archive/refs/tags/$(FILENAME)"
+SOURCE_DIR = build/voicevox_engine-$(VERSION)
+UNPACK_CMD = tar xvzf
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/voicevox_engine.tar.gz.md5sum
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR =	'./'
+
+
+installed.voicevox_engine: $(SOURCE_DIR)/unpacked
+	cp -r build/voicevox_engine-$(VERSION) $(INSTALL_DIR)/node_scripts/voicevox_engine
+
+clean:
+	rm -rf $(TARBALL)
+	rm -rf $(SOURCE_DIR)
+	rm -rf $(INSTALL_DIR)/node_scripts/voicevox_engine
+	rm -rf build
diff --git a/3rdparty/voicevox/Makefile.model b/3rdparty/voicevox/Makefile.model
new file mode 100644
index 000000000..004028105
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.model
@@ -0,0 +1,26 @@
+# -*- makefile -*-
+
+all: installed.voicevox_model
+
+VERSION = 1.10.0
+FILENAME = onnxruntime-linux-x64-$(VERSION).tgz
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/microsoft/onnxruntime/releases/download/v$(VERSION)/$(FILENAME)"
+SOURCE_DIR = build/onnxruntime-linux-x64-$(VERSION)
+UNPACK_CMD = tar xvzf
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum
+SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR =	'./'
+
+
+installed.voicevox_model: $(SOURCE_DIR)/unpacked
+	mkdir -p $(INSTALL_DIR)/lib
+	cp build/onnxruntime-linux-x64-$(VERSION)/lib/* $(INSTALL_DIR)/lib
+
+clean:
+	rm -rf $(TARBALL)
+	rm -rf $(SOURCE_DIR)
+	rm -rf $(INSTALL_DIR)/lib
+	rm -rf build
diff --git a/3rdparty/voicevox/Makefile.open_jtalk_dic b/3rdparty/voicevox/Makefile.open_jtalk_dic
new file mode 100644
index 000000000..646921159
--- /dev/null
+++ b/3rdparty/voicevox/Makefile.open_jtalk_dic
@@ -0,0 +1,25 @@
+# -*- makefile -*-
+
+all: installed.open_jtalk_dic
+
+VERSION = 1.11.1
+FILENAME = open_jtalk_dic_utf_8-1.11.tar.gz
+TARBALL = build/$(FILENAME)
+TARBALL_URL = "https://github.com/r9y9/open_jtalk/releases/download/v$(VERSION)/$(FILENAME)"
+SOURCE_DIR = build/open_jtalk_dic_utf_8-1.11
+UNPACK_CMD = tar xvzf
+MD5SUM_DIR = $(CURDIR)/md5sum
+MD5SUM_FILE = $(MD5SUM_DIR)/open_jtalk_dic.tar.gz.md5sum
+include $(shell rospack find mk)/download_unpack_build.mk
+INSTALL_DIR =	'./'
+
+
+installed.open_jtalk_dic: $(SOURCE_DIR)/unpacked
+	mkdir -p $(INSTALL_DIR)/dict
+	cp -r build/open_jtalk_dic_utf_8-1.11 $(INSTALL_DIR)/dict
+
+clean:
+	rm -rf $(TARBALL)
+	rm -rf $(SOURCE_DIR)
+	rm -rf $(INSTALL_DIR)/dict/open_jtalk_dic_utf_8-1.11
+	rm -rf build
diff --git a/3rdparty/voicevox/README.md b/3rdparty/voicevox/README.md
new file mode 100644
index 000000000..d5602db71
--- /dev/null
+++ b/3rdparty/voicevox/README.md
@@ -0,0 +1,103 @@
+# voicevox
+
+ROS Interface for [VOICEVOX](https://voicevox.hiroshiba.jp/) (AI speech synthesis)
+
+## TERM
+
+[VOICEVOX](https://voicevox.hiroshiba.jp/) is basically free to use, but please check the terms of use below.
+
+[TERM](https://voicevox.hiroshiba.jp/term)
+
+Each voice synthesis character has its own rules. Please use this package according to those terms.
+
+| Character name  |  term link |
+| ---- | ---- |
+| 四国めたん | https://zunko.jp/con_ongen_kiyaku.html |
+| ずんだもん | https://zunko.jp/con_ongen_kiyaku.html |
+| 春日部つむぎ | https://tsukushinyoki10.wixsite.com/ktsumugiofficial/利用規約 |
+| 波音リツ | http://canon-voice.com/kiyaku.html |
+| 雨晴はう | https://amehau.com/?page_id=225 |
+| 玄野武宏 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
+| 白上虎太郎 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
+| 青山龍星 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 |
+| 冥鳴ひまり | https://kotoran8zunzun.wixsite.com/my-site/利用規約 |
+| 九州そら | https://zunko.jp/con_ongen_kiyaku.html |
+
+## Installation
+
+Build this package.
+
+```bash
+cd /path/to/catkin_workspace
+catkin build voicevox
+```
+
+## Usage
+
+### Launch sound_play with VOICEVOX Text-to-Speech
+
+```bash
+roslaunch voicevox voicevox_texttospeech.launch
+```
+
+<a id="saysomething"></a>
+### Say something
+
+#### For python users
+
+```python
+import rospy
+from sound_play.libsoundplay import SoundClient
+
+rospy.init_node('say_node')
+
+client = SoundClient(sound_action='robotsound_jp', sound_topic='robotsound_jp')
+
+client.say('こんにちは', voice='四国めたん-あまあま')
+```
+
+You can change the voice by changing the voice_name.
+You can also specify the speaker id.
+Look at the following tables for further details.
+
+|  speaker_id  |  voice_name  |
+| ---- | ---- |
+| 0 | 四国めたん-あまあま |
+| 1 | ずんだもん-あまあま |
+| 2 | 四国めたん-ノーマル |
+| 3 | ずんだもん-ノーマル |
+| 4 | 四国めたん-セクシー |
+| 5 | ずんだもん-セクシー |
+| 6 | 四国めたん-ツンツン |
+| 7 | ずんだもん-ツンツン |
+| 8 | 春日部つむぎ-ノーマル |
+| 9 | 波音リツ-ノーマル |
+| 10 | 雨晴はう-ノーマル |
+| 11 | 玄野武宏-ノーマル |
+| 12 | 白上虎太郎-ノーマル |
+| 13 | 青山龍星-ノーマル |
+| 14 | 冥鳴ひまり-ノーマル |
+| 15 | 九州そら-あまあま |
+| 16 | 九州そら-ノーマル |
+| 17 | 九州そら-セクシー |
+| 18 | 九州そら-ツンツン |
+| 19 | 九州そら-ささやき |
+
+#### For roseus users
+
+```
+$ roseus
+(load "package://pr2eus/speak.l")
+
+(ros::roseus "say_node")
+
+(speak "JSKへようこそ。" :lang "波音リツ" :wait t :topic-name "robotsound_jp")
+```
+
+### Tips
+
+Normally, the server for speech synthesis starts up at `http://localhost:50021`.
+You can change the url and port by setting values for `VOICEVOX_TEXTTOSPEECH_URL` and `VOICEVOX_TEXTTOSPEECH_PORT`.
+
+You can also set the default character by setting `VOICEVOX_DEFAULT_SPEAKER_ID`.
+Please refer to [here](#saysomething) for the speaker id.
diff --git a/3rdparty/voicevox/bin/text2wave b/3rdparty/voicevox/bin/text2wave
new file mode 100755
index 000000000..ca9630f39
--- /dev/null
+++ b/3rdparty/voicevox/bin/text2wave
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import argparse
+import os
+import shutil
+import sys
+
+import requests
+
+from voicevox.filecheck_utils import checksum_md5
+from voicevox.filecheck_utils import get_cache_dir
+
+
+speaker_id_to_name = {
+    '0': '四国めたん-あまあま',
+    '1': 'ずんだもん-あまあま',
+    '2': '四国めたん-ノーマル',
+    '3': 'ずんだもん-ノーマル',
+    '4': '四国めたん-セクシー',
+    '5': 'ずんだもん-セクシー',
+    '6': '四国めたん-ツンツン',
+    '7': 'ずんだもん-ツンツン',
+    '8': '春日部つむぎ-ノーマル',
+    '9': '波音リツ-ノーマル',
+    '10': '雨晴はう-ノーマル',
+    '11': '玄野武宏-ノーマル',
+    '12': '白上虎太郎-ノーマル',
+    '13': '青山龍星-ノーマル',
+    '14': '冥鳴ひまり-ノーマル',
+    '15': '九州そら-あまあま',
+    '16': '九州そら-ノーマル',
+    '17': '九州そら-セクシー',
+    '18': '九州そら-ツンツン',
+    '19': '九州そら-ささやき',
+}
+
+name_to_speaker_id = {
+    b: a for a, b in speaker_id_to_name.items()
+}
+
+
+DEFAULT_SPEAKER_ID = os.environ.get(
+    'VOICEVOX_DEFAULT_SPEAKER_ID', '2')
+if not DEFAULT_SPEAKER_ID.isdigit():
+    DEFAULT_SPEAKER_ID = name_to_speaker_id[DEFAULT_SPEAKER_ID]
+VOICEVOX_TEXTTOSPEECH_URL = os.environ.get(
+    'VOICEVOX_TEXTTOSPEECH_URL', 'localhost')
+VOICEVOX_TEXTTOSPEECH_PORT = os.environ.get(
+    'VOICEVOX_TEXTTOSPEECH_PORT', 50021)
+cache_enabled = os.environ.get(
+    'ROS_VOICEVOX_TEXTTOSPEECH_CACHE_ENABLED', True)
+cache_enabled = cache_enabled is True \
+    or cache_enabled == 'true'  # for launch env tag.
+
+
+def determine_voice_name(voice_name):
+    if len(voice_name) == 0:
+        speaker_id = DEFAULT_SPEAKER_ID
+    else:
+        if voice_name.isdigit():
+            if voice_name in speaker_id_to_name:
+                speaker_id = voice_name
+            else:
+                print(
+                    '[Text2Wave] Invalid speaker_id ({}). Use default voice.'
+                    .format(speaker_id_to_name[DEFAULT_SPEAKER_ID]))
+                speaker_id = DEFAULT_SPEAKER_ID
+        else:
+            candidates = list(filter(
+                lambda name: name.startswith(voice_name),
+                name_to_speaker_id))
+            if candidates:
+                speaker_id = name_to_speaker_id[candidates[0]]
+            else:
+                print('[Text2Wave] Invalid voice_name ({}). Use default voice.'
+                      .format(speaker_id_to_name[DEFAULT_SPEAKER_ID]))
+                speaker_id = DEFAULT_SPEAKER_ID
+    print('[Text2Wave] Speak using voice_name ({})..'.format(
+        speaker_id_to_name[speaker_id]))
+    return speaker_id
+
+
+def convert_to_str(x):
+    if isinstance(x, str):
+        pass
+    elif isinstance(x, bytes):
+        x = x.decode('utf-8')
+    else:
+        raise ValueError(
+            'Invalid input x type: {}'
+            .format(type(x)))
+    return x
+
+
+def request_synthesis(
+        sentence, output_path, speaker_id='1'):
+    headers = {'accept': 'application/json'}
+
+    sentence = convert_to_str(sentence)
+    speaker_id = convert_to_str(speaker_id)
+    params = {
+        'speaker': speaker_id,
+        'text': sentence,
+    }
+    base_url = 'http://{}:{}'.format(
+        VOICEVOX_TEXTTOSPEECH_URL,
+        VOICEVOX_TEXTTOSPEECH_PORT)
+    url = '{}/audio_query'.format(base_url)
+    response = requests.post(url, headers=headers,
+                             params=params)
+    data = response.json()
+    url = '{}/synthesis'.format(base_url)
+    response = requests.post(url, headers=headers,
+                             params=params,
+                             json=data)
+    with open(output_path, 'wb') as f:
+        f.write(response.content)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='')
+    parser.add_argument('-eval', '--evaluate')
+    parser.add_argument('-o', '--output')
+    parser.add_argument('text')
+    args = parser.parse_args()
+
+    with open(args.text, 'rb') as f:
+        speech_text = f.readline()
+
+    speaker_id = determine_voice_name(
+        args.evaluate.lstrip('(').rstrip(')'))
+
+    if cache_enabled:
+        cache_dir = get_cache_dir()
+        md5 = checksum_md5(args.text)
+        cache_filename = os.path.join(
+            cache_dir,
+            '--'.join([md5, speaker_id])
+            + '.wav')
+        if os.path.exists(cache_filename):
+            print('[Text2Wave] Using cached sound file ({}) for {}'
+                  .format(cache_filename, speech_text.decode('utf-8')))
+            shutil.copy(cache_filename, args.output)
+            sys.exit(0)
+
+    request_synthesis(speech_text,
+                      args.output,
+                      speaker_id)
+    if cache_enabled:
+        text_cache_filename = os.path.splitext(cache_filename)[0] + '.txt'
+        shutil.copy(args.text, text_cache_filename)
+        shutil.copy(args.output, cache_filename)
diff --git a/3rdparty/voicevox/launch/voicevox_texttospeech.launch b/3rdparty/voicevox/launch/voicevox_texttospeech.launch
new file mode 100644
index 000000000..d42d1961f
--- /dev/null
+++ b/3rdparty/voicevox/launch/voicevox_texttospeech.launch
@@ -0,0 +1,30 @@
+<launch>
+
+  <arg name="device" default="" />
+  <arg name="launch_sound_play" default="true" />
+  <arg name="sound_play_respawn" default="true"
+       doc="Respawn sound_play node or not (default: true)" />
+  <arg name="default_speaker" default="2"
+       doc="Default speaker for VOICEVOX" />
+  <arg name="cpu_num_threads" default="1"
+       doc="Number of cpu threads" />
+
+  <node name="voicevox_server"
+        pkg="voicevox" type="server.py"
+        respawn="$(arg sound_play_respawn)"
+        output="screen" >
+    <param name="cpu_num_threads" value="$(arg cpu_num_threads)" />
+  </node>
+
+  <node if="$(arg launch_sound_play)"
+        name="sound_play_jp"
+        pkg="sound_play" type="soundplay_node.py"
+        respawn="$(arg sound_play_respawn)"
+        output="screen" >
+    <remap from="robotsound" to="robotsound_jp"/>
+    <remap from="sound_play" to="robotsound_jp"/>
+    <env name="VOICEVOX_DEFAULT_SPEAKER_ID" value="$(arg default_speaker)" />
+    <env name="PATH" value="$(find voicevox)/bin:$(env PATH)" />
+    <env name="PYTHONIOENCODING" value="utf-8" />
+  </node>
+</launch>
diff --git a/3rdparty/voicevox/md5sum/core.zip.md5sum b/3rdparty/voicevox/md5sum/core.zip.md5sum
new file mode 100644
index 000000000..f5b5ac439
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/core.zip.md5sum
@@ -0,0 +1 @@
+96149a074d8ee093039321a88e00076d  core.zip
diff --git a/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum b/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum
new file mode 100644
index 000000000..817b68d89
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum
@@ -0,0 +1 @@
+9ca61e2009a16cf8a1e9ab9ad0655009  onnxruntime-linux-x64-1.10.0.tgz
diff --git a/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum b/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum
new file mode 100644
index 000000000..8ce4bb07b
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum
@@ -0,0 +1 @@
+ba02dac4143492c3790f949be224dfdf  open_jtalk_dic_utf_8-1.11.tar.gz
diff --git a/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum b/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum
new file mode 100644
index 000000000..5947e3633
--- /dev/null
+++ b/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum
@@ -0,0 +1 @@
+997bf9e915f7d6288c923ab1ff5f4ff6  0.11.4.tar.gz
diff --git a/3rdparty/voicevox/node_scripts/server.py b/3rdparty/voicevox/node_scripts/server.py
new file mode 100644
index 000000000..add596aff
--- /dev/null
+++ b/3rdparty/voicevox/node_scripts/server.py
@@ -0,0 +1,573 @@
+#!/usr/bin/env python3
+
+# This code was created based on the following link's code.
+# https://github.com/VOICEVOX/voicevox_engine/blob/0.11.4/run.py
+
+import base64
+from distutils.version import LooseVersion
+from functools import lru_cache
+import imp
+import json
+import multiprocessing
+import os
+import os.path as osp
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from tempfile import TemporaryFile
+from typing import Dict
+from typing import List
+from typing import Optional
+import zipfile
+
+from fastapi import FastAPI
+from fastapi import HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.params import Query
+from fastapi import Response
+import rospkg
+import rospy
+import soundfile
+from starlette.responses import FileResponse
+import uvicorn
+
+
+PKG_NAME = 'voicevox'
+abs_path = osp.dirname(osp.abspath(__file__))
+voicevox_engine = imp.load_package(
+    'voicevox_engine', osp.join(abs_path, 'voicevox_engine/voicevox_engine'))
+rospack = rospkg.RosPack()
+voicevox_dir = rospack.get_path(PKG_NAME)
+voicevox_lib_dir = osp.join(voicevox_dir, 'lib')
+# set pyopenjtalk's dic.tar.gz file
+os.environ['OPEN_JTALK_DICT_DIR'] = osp.join(
+    voicevox_dir, 'dict', 'open_jtalk_dic_utf_8-1.11')
+
+
+from voicevox_engine import __version__
+from voicevox_engine.kana_parser import create_kana
+from voicevox_engine.kana_parser import parse_kana
+from voicevox_engine.model import AccentPhrase
+from voicevox_engine.model import AudioQuery
+from voicevox_engine.model import ParseKanaBadRequest
+from voicevox_engine.model import ParseKanaError
+from voicevox_engine.model import Speaker
+from voicevox_engine.model import SpeakerInfo
+from voicevox_engine.model import SupportedDevicesInfo
+from voicevox_engine.morphing import \
+    synthesis_morphing_parameter as _synthesis_morphing_parameter
+from voicevox_engine.morphing import synthesis_morphing
+from voicevox_engine.preset import Preset
+from voicevox_engine.preset import PresetLoader
+from voicevox_engine.synthesis_engine import make_synthesis_engines
+from voicevox_engine.synthesis_engine import SynthesisEngineBase
+from voicevox_engine.user_dict import user_dict_startup_processing
+from voicevox_engine.utility import connect_base64_waves
+from voicevox_engine.utility import ConnectBase64WavesException
+from voicevox_engine.utility import engine_root
+
+
+def b64encode_str(s):
+    return base64.b64encode(s).decode("utf-8")
+
+
+def generate_app(
+    synthesis_engines: Dict[str, SynthesisEngineBase], latest_core_version: str
+) -> FastAPI:
+    root_dir = engine_root()
+
+    default_sampling_rate = synthesis_engines[latest_core_version].default_sampling_rate
+
+    app = FastAPI(
+        title="VOICEVOX ENGINE",
+        description="VOICEVOXの音声合成エンジンです。",
+        version=__version__,
+    )
+
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    preset_loader = PresetLoader(
+        preset_path=root_dir / "presets.yaml",
+    )
+
+    # キャッシュを有効化
+    # モジュール側でlru_cacheを指定するとキャッシュを制御しにくいため、HTTPサーバ側で指定する
+    # TODO: キャッシュを管理するモジュール側API・HTTP側APIを用意する
+    synthesis_morphing_parameter = lru_cache(maxsize=4)(_synthesis_morphing_parameter)
+
+    # @app.on_event("startup")
+    # async def start_catch_disconnection():
+    #     if args.enable_cancellable_synthesis:
+    #         loop = asyncio.get_event_loop()
+    #         _ = loop.create_task(cancellable_engine.catch_disconnection())
+
+    @app.on_event("startup")
+    def apply_user_dict():
+        user_dict_startup_processing()
+
+    def get_engine(core_version: Optional[str]) -> SynthesisEngineBase:
+        if core_version is None:
+            return synthesis_engines[latest_core_version]
+        if core_version in synthesis_engines:
+            return synthesis_engines[core_version]
+        raise HTTPException(status_code=422, detail="不明なバージョンです")
+
+    @app.post(
+        "/audio_query",
+        response_model=AudioQuery,
+        tags=["クエリ作成"],
+        summary="音声合成用のクエリを作成する",
+    )
+    def audio_query(text: str, speaker: int, core_version: Optional[str] = None):
+        """
+        クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
+        """
+        engine = get_engine(core_version)
+        accent_phrases = engine.create_accent_phrases(text, speaker_id=speaker)
+        return AudioQuery(
+            accent_phrases=accent_phrases,
+            speedScale=1,
+            pitchScale=0,
+            intonationScale=1,
+            volumeScale=1,
+            prePhonemeLength=0.1,
+            postPhonemeLength=0.1,
+            outputSamplingRate=default_sampling_rate,
+            outputStereo=False,
+            kana=create_kana(accent_phrases),
+        )
+
+    @app.post(
+        "/audio_query_from_preset",
+        response_model=AudioQuery,
+        tags=["クエリ作成"],
+        summary="音声合成用のクエリをプリセットを用いて作成する",
+    )
+    def audio_query_from_preset(
+        text: str, preset_id: int, core_version: Optional[str] = None
+    ):
+        """
+        クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
+        """
+        engine = get_engine(core_version)
+        presets, err_detail = preset_loader.load_presets()
+        if err_detail:
+            raise HTTPException(status_code=422, detail=err_detail)
+        for preset in presets:
+            if preset.id == preset_id:
+                selected_preset = preset
+                break
+        else:
+            raise HTTPException(status_code=422, detail="該当するプリセットIDが見つかりません")
+
+        accent_phrases = engine.create_accent_phrases(
+            text, speaker_id=selected_preset.style_id
+        )
+        return AudioQuery(
+            accent_phrases=accent_phrases,
+            speedScale=selected_preset.speedScale,
+            pitchScale=selected_preset.pitchScale,
+            intonationScale=selected_preset.intonationScale,
+            volumeScale=selected_preset.volumeScale,
+            prePhonemeLength=selected_preset.prePhonemeLength,
+            postPhonemeLength=selected_preset.postPhonemeLength,
+            outputSamplingRate=default_sampling_rate,
+            outputStereo=False,
+            kana=create_kana(accent_phrases),
+        )
+
+    @app.post(
+        "/accent_phrases",
+        response_model=List[AccentPhrase],
+        tags=["クエリ編集"],
+        summary="テキストからアクセント句を得る",
+        responses={
+            400: {
+                "description": "読み仮名のパースに失敗",
+                "model": ParseKanaBadRequest,
+            }
+        },
+    )
+    def accent_phrases(
+        text: str,
+        speaker: int,
+        is_kana: bool = False,
+        core_version: Optional[str] = None,
+    ):
+        """
+        テキストからアクセント句を得ます。
+        is_kanaが`true`のとき、テキストは次のようなAquesTalkライクな記法に従う読み仮名として処理されます。デフォルトは`false`です。
+        * 全てのカナはカタカナで記述される
+        * アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。
+        * カナの手前に`_`を入れるとそのカナは無声化される
+        * アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。
+        * アクセント句末に`？`(全角)を入れることにより疑問文の発音ができる。
+        """
+        engine = get_engine(core_version)
+        if is_kana:
+            try:
+                accent_phrases = parse_kana(text)
+            except ParseKanaError as err:
+                raise HTTPException(
+                    status_code=400,
+                    detail=ParseKanaBadRequest(err).dict(),
+                )
+            accent_phrases = engine.replace_mora_data(
+                accent_phrases=accent_phrases, speaker_id=speaker
+            )
+
+            return accent_phrases
+        else:
+            return engine.create_accent_phrases(text, speaker_id=speaker)
+
+    @app.post(
+        "/mora_data",
+        response_model=List[AccentPhrase],
+        tags=["クエリ編集"],
+        summary="アクセント句から音高・音素長を得る",
+    )
+    def mora_data(
+        accent_phrases: List[AccentPhrase],
+        speaker: int,
+        core_version: Optional[str] = None,
+    ):
+        engine = get_engine(core_version)
+        return engine.replace_mora_data(accent_phrases, speaker_id=speaker)
+
+    @app.post(
+        "/mora_length",
+        response_model=List[AccentPhrase],
+        tags=["クエリ編集"],
+        summary="アクセント句から音素長を得る",
+    )
+    def mora_length(
+        accent_phrases: List[AccentPhrase],
+        speaker: int,
+        core_version: Optional[str] = None,
+    ):
+        engine = get_engine(core_version)
+        return engine.replace_phoneme_length(
+            accent_phrases=accent_phrases, speaker_id=speaker
+        )
+
+    @app.post(
+        "/mora_pitch",
+        response_model=List[AccentPhrase],
+        tags=["クエリ編集"],
+        summary="アクセント句から音高を得る",
+    )
+    def mora_pitch(
+        accent_phrases: List[AccentPhrase],
+        speaker: int,
+        core_version: Optional[str] = None,
+    ):
+        engine = get_engine(core_version)
+        return engine.replace_mora_pitch(
+            accent_phrases=accent_phrases, speaker_id=speaker
+        )
+
+    @app.post(
+        "/synthesis",
+        response_class=FileResponse,
+        responses={
+            200: {
+                "content": {
+                    "audio/wav": {"schema": {"type": "string", "format": "binary"}}
+                },
+            }
+        },
+        tags=["音声合成"],
+        summary="音声合成する",
+    )
+    def synthesis(
+        query: AudioQuery,
+        speaker: int,
+        enable_interrogative_upspeak: bool = Query(  # noqa: B008
+            default=True,
+            description="疑問系のテキストが与えられたら語尾を自動調整する",
+        ),
+        core_version: Optional[str] = None,
+    ):
+        engine = get_engine(core_version)
+        wave = engine.synthesis(
+            query=query,
+            speaker_id=speaker,
+            enable_interrogative_upspeak=enable_interrogative_upspeak,
+        )
+
+        with NamedTemporaryFile(delete=False) as f:
+            soundfile.write(
+                file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
+            )
+
+        return FileResponse(f.name, media_type="audio/wav")
+
+    @app.post(
+        "/multi_synthesis",
+        response_class=FileResponse,
+        responses={
+            200: {
+                "content": {
+                    "application/zip": {
+                        "schema": {"type": "string", "format": "binary"}
+                    }
+                },
+            }
+        },
+        tags=["音声合成"],
+        summary="複数まとめて音声合成する",
+    )
+    def multi_synthesis(
+        queries: List[AudioQuery],
+        speaker: int,
+        core_version: Optional[str] = None,
+    ):
+        engine = get_engine(core_version)
+        sampling_rate = queries[0].outputSamplingRate
+
+        with NamedTemporaryFile(delete=False) as f:
+
+            with zipfile.ZipFile(f, mode="a") as zip_file:
+
+                for i in range(len(queries)):
+
+                    if queries[i].outputSamplingRate != sampling_rate:
+                        raise HTTPException(
+                            status_code=422, detail="サンプリングレートが異なるクエリがあります"
+                        )
+
+                    with TemporaryFile() as wav_file:
+
+                        wave = engine.synthesis(query=queries[i], speaker_id=speaker)
+                        soundfile.write(
+                            file=wav_file,
+                            data=wave,
+                            samplerate=sampling_rate,
+                            format="WAV",
+                        )
+                        wav_file.seek(0)
+                        zip_file.writestr(f"{str(i + 1).zfill(3)}.wav", wav_file.read())
+
+        return FileResponse(f.name, media_type="application/zip")
+
+    @app.post(
+        "/synthesis_morphing",
+        response_class=FileResponse,
+        responses={
+            200: {
+                "content": {
+                    "audio/wav": {"schema": {"type": "string", "format": "binary"}}
+                },
+            }
+        },
+        tags=["音声合成"],
+        summary="2人の話者でモーフィングした音声を合成する",
+    )
+    def _synthesis_morphing(
+        query: AudioQuery,
+        base_speaker: int,
+        target_speaker: int,
+        morph_rate: float = Query(..., ge=0.0, le=1.0),  # noqa: B008
+        core_version: Optional[str] = None,
+    ):
+        """
+        指定された2人の話者で音声を合成、指定した割合でモーフィングした音声を得ます。
+        モーフィングの割合は`morph_rate`で指定でき、0.0でベースの話者、1.0でターゲットの話者に近づきます。
+        """
+        engine = get_engine(core_version)
+
+        # 生成したパラメータはキャッシュされる
+        morph_param = synthesis_morphing_parameter(
+            engine=engine,
+            query=query,
+            base_speaker=base_speaker,
+            target_speaker=target_speaker,
+        )
+
+        morph_wave = synthesis_morphing(
+            morph_param=morph_param,
+            morph_rate=morph_rate,
+            output_stereo=query.outputStereo,
+        )
+
+        with NamedTemporaryFile(delete=False) as f:
+            soundfile.write(
+                file=f,
+                data=morph_wave,
+                samplerate=morph_param.fs,
+                format="WAV",
+            )
+
+        return FileResponse(f.name, media_type="audio/wav")
+
+    @app.post(
+        "/connect_waves",
+        response_class=FileResponse,
+        responses={
+            200: {
+                "content": {
+                    "audio/wav": {"schema": {"type": "string", "format": "binary"}}
+                },
+            }
+        },
+        tags=["その他"],
+        summary="base64エンコードされた複数のwavデータを一つに結合する",
+    )
+    def connect_waves(waves: List[str]):
+        """
+        base64エンコードされたwavデータを一纏めにし、wavファイルで返します。
+        """
+        try:
+            waves_nparray, sampling_rate = connect_base64_waves(waves)
+        except ConnectBase64WavesException as err:
+            return HTTPException(status_code=422, detail=str(err))
+
+        with NamedTemporaryFile(delete=False) as f:
+            soundfile.write(
+                file=f,
+                data=waves_nparray,
+                samplerate=sampling_rate,
+                format="WAV",
+            )
+
+            return FileResponse(f.name, media_type="audio/wav")
+
+    @app.get("/presets", response_model=List[Preset], tags=["その他"])
+    def get_presets():
+        """
+        エンジンが保持しているプリセットの設定を返します
+
+        Returns
+        -------
+        presets: List[Preset]
+            プリセットのリスト
+        """
+        presets, err_detail = preset_loader.load_presets()
+        if err_detail:
+            raise HTTPException(status_code=422, detail=err_detail)
+        return presets
+
+    @app.get("/version", tags=["その他"])
+    def version() -> str:
+        return __version__
+
+    @app.get("/core_versions", response_model=List[str], tags=["その他"])
+    def core_versions() -> List[str]:
+        return Response(
+            content=json.dumps(list(synthesis_engines.keys())),
+            media_type="application/json",
+        )
+
+    @app.get("/speakers", response_model=List[Speaker], tags=["その他"])
+    def speakers(
+        core_version: Optional[str] = None,
+    ):
+        engine = get_engine(core_version)
+        return Response(
+            content=engine.speakers,
+            media_type="application/json",
+        )
+
+    @app.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"])
+    def speaker_info(speaker_uuid: str, core_version: Optional[str] = None):
+        """
+        指定されたspeaker_uuidに関する情報をjson形式で返します。
+        画像や音声はbase64エンコードされたものが返されます。
+
+        Returns
+        -------
+        ret_data: SpeakerInfo
+        """
+        speakers = json.loads(get_engine(core_version).speakers)
+        for i in range(len(speakers)):
+            if speakers[i]["speaker_uuid"] == speaker_uuid:
+                speaker = speakers[i]
+                break
+        else:
+            raise HTTPException(status_code=404, detail="該当する話者が見つかりません")
+
+        try:
+            policy = (root_dir / f"speaker_info/{speaker_uuid}/policy.md").read_text(
+                "utf-8"
+            )
+            portrait = b64encode_str(
+                (root_dir / f"speaker_info/{speaker_uuid}/portrait.png").read_bytes()
+            )
+            style_infos = []
+            for style in speaker["styles"]:
+                id = style["id"]
+                icon = b64encode_str(
+                    (
+                        root_dir / f"speaker_info/{speaker_uuid}/icons/{id}.png"
+                    ).read_bytes()
+                )
+                voice_samples = [
+                    b64encode_str(
+                        (
+                            root_dir
+                            / "speaker_info/{}/voice_samples/{}_{}.wav".format(
+                                speaker_uuid, id, str(j + 1).zfill(3)
+                            )
+                        ).read_bytes()
+                    )
+                    for j in range(3)
+                ]
+                style_infos.append(
+                    {"id": id, "icon": icon, "voice_samples": voice_samples}
+                )
+        except FileNotFoundError:
+            import traceback
+
+            traceback.print_exc()
+            raise HTTPException(status_code=500, detail="追加情報が見つかりませんでした")
+
+        ret_data = {"policy": policy, "portrait": portrait, "style_infos": style_infos}
+        return ret_data
+
+    @app.get("/supported_devices", response_model=SupportedDevicesInfo, tags=["その他"])
+    def supported_devices(
+        core_version: Optional[str] = None,
+    ):
+        supported_devices = get_engine(core_version).supported_devices
+        if supported_devices is None:
+            raise HTTPException(status_code=422, detail="非対応の機能です。")
+        return Response(
+            content=supported_devices,
+            media_type="application/json",
+        )
+
+    return app
+
+
+if __name__ == "__main__":
+    multiprocessing.freeze_support()
+    rospy.init_node('voicevox_server')
+
+    voicelib_dir = [Path(voicevox_lib_dir)]
+    use_gpu = False
+    host = rospy.get_param('~host', "127.0.0.1")
+    port = rospy.get_param('~port', 50021)
+    cpu_num_threads = rospy.get_param('~cpu_num_threads', None)
+    if cpu_num_threads is None:
+        cpu_num_threads = multiprocessing.cpu_count()
+
+    synthesis_engines = make_synthesis_engines(
+        use_gpu=use_gpu,
+        voicelib_dirs=voicelib_dir,
+        cpu_num_threads=cpu_num_threads,
+    )
+    if len(synthesis_engines) == 0:
+        rospy.logerr("音声合成エンジンがありません。")
+    latest_core_version = str(max([LooseVersion(ver)
+                                   for ver in synthesis_engines]))
+
+    uvicorn.run(
+        generate_app(synthesis_engines, latest_core_version),
+        host=host,
+        port=port,
+    )
diff --git a/3rdparty/voicevox/package.xml b/3rdparty/voicevox/package.xml
new file mode 100644
index 000000000..5240c3468
--- /dev/null
+++ b/3rdparty/voicevox/package.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<?xml-model
+  href="http://download.ros.org/schema/package_format3.xsd"
+  schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>voicevox</name>
+  <version>0.0.1</version>
+  <description>VOICEVOX: AI speech synthesis</description>
+  <maintainer email="yanokura@jsk.imi.i.u-tokyo.ac.jp">Iori Yanokura</maintainer>
+
+  <license>MIT</license>
+
+  <url type="website">http://ros.org/wiki/voicevox</url>
+
+  <author>Iori Yanokura</author>
+
+  <buildtool_depend>catkin</buildtool_depend>
+  <build_depend>catkin_virtualenv</build_depend>
+
+  <build_depend>mk</build_depend>
+  <build_depend>roslib</build_depend>
+  <build_depend>rospack</build_depend>
+  <build_depend>unzip</build_depend>
+  <build_depend>wget</build_depend>
+
+  <exec_depend>python3</exec_depend>
+  <exec_depend>python3-requests</exec_depend>
+  <exec_depend>sound_play</exec_depend>
+  <exec_depend>unzip</exec_depend>
+  <exec_depend>wget</exec_depend>
+
+  <export>
+    <pip_requirements>requirements.txt</pip_requirements>
+  </export>
+
+</package>
diff --git a/3rdparty/voicevox/python/voicevox/__init__.py b/3rdparty/voicevox/python/voicevox/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/3rdparty/voicevox/python/voicevox/filecheck_utils.py b/3rdparty/voicevox/python/voicevox/filecheck_utils.py
new file mode 100644
index 000000000..6c881b5f5
--- /dev/null
+++ b/3rdparty/voicevox/python/voicevox/filecheck_utils.py
@@ -0,0 +1,43 @@
+import hashlib
+import os
+
+
+def get_cache_dir():
+    """Return cache dir.
+
+    Returns
+    -------
+    cache_dir : str
+        cache directory.
+    """
+    ros_home = os.getenv('ROS_HOME', os.path.expanduser('~/.ros'))
+    pkg_ros_home = os.path.join(ros_home, 'voicevox_texttospeech')
+    default_cache_dir = os.path.join(pkg_ros_home, 'cache')
+    cache_dir = os.environ.get(
+        'ROS_VOICEVOX_TEXTTOSPEECH_CACHE_DIR',
+        default_cache_dir)
+    if not os.path.exists(cache_dir):
+        os.makedirs(cache_dir)
+    return cache_dir
+
+
+def checksum_md5(filename, blocksize=8192):
+    """Calculate md5sum.
+
+    Parameters
+    ----------
+    filename : str or pathlib.Path
+        input filename.
+    blocksize : int
+        MD5 has 128-byte digest blocks (default: 8192 is 128x64).
+    Returns
+    -------
+    md5 : str
+        calculated md5sum.
+    """
+    filename = str(filename)
+    hash_factory = hashlib.md5()
+    with open(filename, 'rb') as f:
+        for chunk in iter(lambda: f.read(blocksize), b''):
+            hash_factory.update(chunk)
+    return hash_factory.hexdigest()
diff --git a/3rdparty/voicevox/requirements.in b/3rdparty/voicevox/requirements.in
new file mode 100644
index 000000000..c9cfd223a
--- /dev/null
+++ b/3rdparty/voicevox/requirements.in
@@ -0,0 +1,11 @@
+PyYAML
+aiofiles
+appdirs
+fastapi
+git+https://github.com/VOICEVOX/pyopenjtalk@a85521a0a0f298f08d9e9b24987b3c77eb4aaff5#egg=pyopenjtalk
+numpy
+python-multipart
+pyworld
+scipy
+soundfile
+uvicorn
diff --git a/3rdparty/voicevox/setup.py b/3rdparty/voicevox/setup.py
new file mode 100644
index 000000000..939174bc8
--- /dev/null
+++ b/3rdparty/voicevox/setup.py
@@ -0,0 +1,12 @@
+from distutils.core import setup
+
+from catkin_pkg.python_setup import generate_distutils_setup
+from setuptools import find_packages
+
+
+d = generate_distutils_setup(
+    packages=find_packages('python'),
+    package_dir={'': 'python'},
+)
+
+setup(**d)
diff --git a/dialogflow_task_executive/node_scripts/task_executive.py b/dialogflow_task_executive/node_scripts/task_executive.py
index 686327a55..e826d50d7 100644
--- a/dialogflow_task_executive/node_scripts/task_executive.py
+++ b/dialogflow_task_executive/node_scripts/task_executive.py
@@ -7,7 +7,9 @@
 import rospy
 
 from app_manager.msg import AppList
+from app_manager.msg import KeyValue
 from app_manager.srv import StartApp
+from app_manager.srv import StartAppRequest
 from app_manager.srv import StopApp
 from std_srvs.srv import Empty
 
@@ -90,12 +92,16 @@ def available_apps(self):
         return map(lambda a: a.name,
                    self._latest_msg.available_apps)
 
-    def start_app(self, name):
+    def start_app(self, name, launch_args):
         if name in self.running_apps:
             raise RuntimeError("{} is already running".format(name))
         elif name not in self.available_apps:
             raise RuntimeError("{} is not available".format(name))
-        res = self._srv_start_app(name=name)
+        req = StartAppRequest()
+        req.name = name
+        for key, value in launch_args.items():
+            req.args.append(KeyValue(key=key, value=value))
+        res = self._srv_start_app(req)
         if res.started:
             rospy.loginfo("{} successfully started".format(name))
             return True
@@ -221,6 +227,12 @@ def dialog_cb(self, msg):
         try:
             params = json.loads(msg.parameters)
             rospy.set_param("/action/parameters", params)
+            # set launch_args
+            launch_args = {}
+            for key, value in params.items():
+                launch_args[key.encode('utf-8')] = value.encode('utf-8')
+        except AttributeError as e:
+            rospy.logerr(e)
         except ValueError:
             rospy.logerr(
                 "Failed to parse parameters of action '{}'".format(msg.action))
@@ -228,7 +240,7 @@ def dialog_cb(self, msg):
         rospy.loginfo(
             "Starting '{}' with parameters '{}'"
             .format(msg.action, msg.parameters))
-        self.app_manager.start_app(action)
+        self.app_manager.start_app(action, launch_args)
 
     def app_start_cb(self, name):
         rospy.loginfo("{} started".format(name))
diff --git a/google_chat_ros/scripts/google_chat_ros_node.py b/google_chat_ros/scripts/google_chat_ros_node.py
index 64e6d011f..43830281b 100644
--- a/google_chat_ros/scripts/google_chat_ros_node.py
+++ b/google_chat_ros/scripts/google_chat_ros_node.py
@@ -86,7 +86,7 @@ def __init__(self):
             rospy.logwarn("You cannot recieve Google Chat event because HTTPS server or Google Cloud Pub/Sub is not running.")
 
         else:
-            rospy.logerr("Please choose receiving_mode param from dialogflow, https, pubsub, none.")
+            rospy.logerr("Please choose receiving_mode param from dialogflow, url, pubsub, none.")
 
     def killhttpd(self):
         self._server.kill()
diff --git a/respeaker_ros/CMakeLists.txt b/respeaker_ros/CMakeLists.txt
index 390a82e0f..b4009cd1a 100644
--- a/respeaker_ros/CMakeLists.txt
+++ b/respeaker_ros/CMakeLists.txt
@@ -35,7 +35,9 @@ catkin_install_python(PROGRAMS ${PYTHON_SCRIPTS}
 
 if(CATKIN_ENABLE_TESTING)
   find_package(rostest REQUIRED)
+  find_package(roslaunch REQUIRED)
   add_rostest(test/sample_respeaker.test
     DEPENDENCIES ${PROJECT_NAME}_generate_virtualenv
   )
+  roslaunch_add_file_check(launch/sample_respeaker.launch)
 endif()
diff --git a/respeaker_ros/README.md b/respeaker_ros/README.md
index e42ba1202..247168ba5 100644
--- a/respeaker_ros/README.md
+++ b/respeaker_ros/README.md
@@ -92,6 +92,151 @@ A ROS Package for Respeaker Mic Array
     a: 0.3"
     ```
 
+## Parameters for respeaker_node.py
+
+  - ### Publishing topics
+
+    - `audio` (`audio_common_msgs/AudioData`)
+
+      Processed audio for ASR. 1 channel.
+
+    - `audio_info` (`audio_common_msgs/AudioInfo`)
+
+      Audio info with respect to `~audio`.
+
+    - `audio_raw` (`audio_common_msgs/AudioData`)
+
+      Micarray audio data has 4-channels. Maybe you need to update respeaker firmware.
+
+      If the firmware isn't supported, this will not be output.
+
+    - `audio_info_raw` (`audio_common_msgs/AudioInfo`)
+
+      Audio info with respect to `~audio_raw`.
+
+      If the firmware isn't supported, this will not be output.
+
+    - `speech_audio` (`audio_common_msgs/AudioData`)
+
+      Audio data while a person is speaking using the VAD function.
+
+    - `speech_audio_raw` (`audio_common_msgs/AudioData`)
+
+      Audio data has 4-channels while a person is speaking using the VAD function.
+
+      If the firmware isn't supported, this will not be output.
+
+    - `audio_merged_playback` (`audio_common_msgs/AudioData`)
+
+      Data that combines the sound of mic and speaker.
+
+      If the firmware isn't supported, this will not be output.
+
+      For more detail, please see https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/
+
+    - `~is_speeching` (`std_msgs/Bool`)
+
+      Using VAD function, publish whether someone is speaking.
+
+    - `~sound_direction` (`std_msgs/Int32`)
+
+      Direction of sound.
+
+    - `~sound_localization` (`geometry_msgs/PoseStamped`)
+
+      Localized Sound Direction. The value of the position in the estimated direction with `~doa_offset` as the radius is obtained.
+
+  - ### Parameters
+
+    - `~update_rate` (`Double`, default: `10.0`)
+
+      Publishing info data such as `~is_speeching`, `~sound_direction`, `~sound_localization`, `~speech_audio` and `~speech_audio_raw`.
+
+    - `~sensor_frame_id` (`String`, default: `respeaker_base`)
+
+      Frame id.
+
+    - `~doa_xy_offset` (`Double`, default: `0.0`)
+
+      `~doa_offset` is a estimated sound direction's radius.
+
+    - `~doa_yaw_offset` (`Double`, default: `90.0`)
+
+      Estimated DoA angle offset.
+
+    - `~speech_prefetch` (`Double`, default: `0.5`)
+
+      Time to represent how long speech is pre-stored in buffer.
+
+    - `~speech_continuation` (`Double`, default: `0.5`)
+
+      If the time between the current time and the time when the speech is stopped is shorter than this time,
+      it is assumed that someone is speaking.
+
+    - `~speech_max_duration` (`Double`, default: `7.0`)
+
+    - `~speech_min_duration` (`Double`, default: `0.1`)
+
+       If the speaking interval is within these times, `~speech_audio` and `~speech_audio_raw` will be published.
+
+    - `~suppress_pyaudio_error` (`Bool`, default: `True`)
+
+      If this value is `True`, suppress error from pyaudio.
+
+## Parameters for speech_to_text.py
+
+  - ### Publishing topics
+
+    - `~speech_to_text` (`speech_recognition_msgs/SpeechRecognitionCandidates`)
+
+      Recognized text.
+
+  - ### Subscribing topics
+
+    - `audio` (`audio_common_msgs/AudioData`)
+
+      Input audio.
+
+  - ### Parameters
+
+    - `~audio_info` (`String`, default: ``)
+
+      audio_info (`audio_common_msgs/AudioInfo`) topic. If this value is specified, `~sample_rate`, `~sample_width` and `~channels` parameters are obtained from the topic.
+
+    - `~sample_rate` (`Int`, default: `16000`)
+
+      Sampling rate.
+
+    - `~sample_width` (`Int`, default: `2`)
+
+      Sample with.
+
+    - `~channels` (`Int`, default: `1`)
+
+      Number of channels.
+
+    - `~target_channel` (`Int`, default: `0`)
+
+      Target number of channel.
+
+    - `~language` (`String`, default: `ja-JP`)
+
+      language of speech to text service. For English users, you can specify `en-US`.
+
+    - `~self_cancellation` (`Bool`, default: `True`)
+
+      ignore voice input while the robot is speaking.
+
+    - `~tts_tolerance` (`String`, default: `1.0`)
+
+      time to assume as SPEAKING after tts service is finished.
+
+    - `~tts_action_names` (`List[String]`, default: `['sound_play']`)
+
+       If `~self_chancellation` is `True`, this value will be used.
+
+       When the actions are active, do nothing with the callback that subscribes to `audio`.
+
 ## Use cases
 
 ### Voice Recognition
diff --git a/respeaker_ros/launch/sample_respeaker.launch b/respeaker_ros/launch/sample_respeaker.launch
index 31d083608..e2c43c557 100644
--- a/respeaker_ros/launch/sample_respeaker.launch
+++ b/respeaker_ros/launch/sample_respeaker.launch
@@ -13,6 +13,8 @@
   <arg name="language" default="en-US"/>
   <!-- self cancellation -->
   <arg name="self_cancellation" default="true"/>
+  <!-- audio info topic name -->
+  <arg name="audio_info" default="audio_info"/>
 
   <node if="$(arg publish_tf)"
         name="static_transformer" pkg="tf" type="static_transform_publisher"
@@ -20,7 +22,8 @@
 
   <node if="$(arg launch_respeaker)"
         name="respeaker_node" pkg="respeaker_ros" type="respeaker_node.py"
-        respawn="true" respawn_delay="10" />
+        respawn="true" respawn_delay="10" >
+  </node>
 
   <node if="$(arg launch_soundplay)"
         name="sound_play" pkg="sound_play" type="soundplay_node.py"/>
@@ -30,6 +33,7 @@
     <remap from="audio" to="$(arg audio)"/>
     <remap from="speech_to_text" to="$(arg speech_to_text)"/>
     <rosparam subst_value="true">
+      audio_info: $(arg audio_info)
       language: $(arg language)
       self_cancellation: $(arg self_cancellation)
       tts_tolerance: 0.5
diff --git a/respeaker_ros/package.xml b/respeaker_ros/package.xml
index ac83b898a..be16789a2 100644
--- a/respeaker_ros/package.xml
+++ b/respeaker_ros/package.xml
@@ -17,6 +17,7 @@
   <exec_depend>flac</exec_depend>
   <exec_depend>geometry_msgs</exec_depend>
   <exec_depend>std_msgs</exec_depend>
+  <exec_depend>sound_play</exec_depend>
   <exec_depend>speech_recognition_msgs</exec_depend>
   <exec_depend>tf</exec_depend>
   <exec_depend condition="$ROS_PYTHON_VERSION == 2">python-numpy</exec_depend>
diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py
index bf14ad478..6b351251c 100644
--- a/respeaker_ros/scripts/respeaker_node.py
+++ b/respeaker_ros/scripts/respeaker_node.py
@@ -16,6 +16,13 @@
 import sys
 import time
 from audio_common_msgs.msg import AudioData
+enable_audio_info = True
+try:
+    from audio_common_msgs.msg import AudioInfo
+except Exception as e:
+    rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.'
+                  ' AudioInfo message will not be published.')
+    enable_audio_info = False
 from geometry_msgs.msg import PoseStamped
 from std_msgs.msg import Bool, Int32, ColorRGBA
 from dynamic_reconfigure.server import Server
@@ -265,7 +272,6 @@ def __init__(self, on_audio, channel=0, suppress_error=True):
         if self.channels != 6:
             rospy.logwarn("%d channel is found for respeaker" % self.channels)
             rospy.logwarn("You may have to update firmware.")
-        self.channel = min(self.channels - 1, max(0, self.channel))
 
         self.stream = self.pyaudio.open(
             input=True, start=False,
@@ -295,9 +301,8 @@ def stream_callback(self, in_data, frame_count, time_info, status):
         data = np.frombuffer(in_data, dtype=np.int16)
         chunk_per_channel = int(len(data) / self.channels)
         data = np.reshape(data, (chunk_per_channel, self.channels))
-        chan_data = data[:, self.channel]
         # invoke callback
-        self.on_audio(chan_data.tobytes())
+        self.on_audio(data)
         return None, pyaudio.paContinue
 
     def start(self):
@@ -333,14 +338,24 @@ def __init__(self):
         self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
         self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
         self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
+        if enable_audio_info is True:
+            self.pub_audio_info = rospy.Publisher("audio_info", AudioInfo,
+                                                  queue_size=1, latch=True)
+        self.pub_audio_raw_info = rospy.Publisher("audio_info_raw", AudioInfo,
+                                                  queue_size=1, latch=True)
         self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
         # init config
         self.config = None
         self.dyn_srv = Server(RespeakerConfig, self.on_config)
         # start
         self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error)
+        self.n_channel = self.respeaker_audio.channels
+
         self.speech_prefetch_bytes = int(
-            self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
+            1
+            * self.speech_prefetch
+            * self.respeaker_audio.rate
+            * self.respeaker_audio.bitdepth / 8.0)
         self.speech_prefetch_buffer = b""
         self.respeaker_audio.start()
         self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate),
@@ -348,6 +363,58 @@ def __init__(self):
         self.timer_led = None
         self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led)
 
+        # processed audio for ASR
+        if enable_audio_info is True:
+            info_msg = AudioInfo(
+                channels=1,
+                sample_rate=self.respeaker_audio.rate,
+                sample_format='S16LE',
+                bitrate=self.respeaker_audio.rate * self.respeaker_audio.bitdepth,
+                coding_format='WAVE')
+            self.pub_audio_info.publish(info_msg)
+
+        if self.n_channel > 1:
+            # The respeaker has 4 microphones.
+            # Multiple microphones can be used for
+            # beam forming (strengthening the sound in a specific direction)
+            # and sound localization (the respeaker outputs the azimuth
+            # direction, but the multichannel can estimate
+            # the elevation direction). etc.
+
+            # Channel 0: processed audio for ASR
+            # Channel 1: mic1 raw data
+            # Channel 2: mic2 raw data
+            # Channel 3: mic3 raw data
+            # Channel 4: mic4 raw data
+            # Channel 5: merged playback
+            # For more detail, please see
+            # https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/
+            # (self.n_channel - 2) = 4 channels are multiple microphones.
+            self.pub_audio_raw = rospy.Publisher("audio_raw", AudioData,
+                                                 queue_size=10)
+            self.pub_audio_merged_playback = rospy.Publisher(
+                "audio_merged_playback", AudioData,
+                queue_size=10)
+            if enable_audio_info is True:
+                info_raw_msg = AudioInfo(
+                    channels=self.n_channel - 2,
+                    sample_rate=self.respeaker_audio.rate,
+                    sample_format='S16LE',
+                    bitrate=(self.respeaker_audio.rate *
+                             self.respeaker_audio.bitdepth),
+                    coding_format='WAVE')
+                self.pub_audio_raw_info.publish(info_raw_msg)
+
+            self.speech_audio_raw_buffer = b""
+            self.speech_raw_prefetch_buffer = b""
+            self.pub_speech_audio_raw = rospy.Publisher(
+                "speech_audio_raw", AudioData, queue_size=10)
+            self.speech_raw_prefetch_bytes = int(
+                (self.n_channel - 2)
+                * self.speech_prefetch
+                * self.respeaker_audio.rate
+                * self.respeaker_audio.bitdepth / 8.0)
+
     def on_shutdown(self):
         self.info_timer.shutdown()
         try:
@@ -386,14 +453,30 @@ def on_status_led(self, msg):
                                        oneshot=True)
 
     def on_audio(self, data):
-        self.pub_audio.publish(AudioData(data=data))
+        # take processed audio for ASR.
+        processed_data = data[:, 0].tobytes()
+        self.pub_audio.publish(AudioData(data=processed_data))
+        if self.n_channel > 1:
+            raw_audio_data = data[:, 1:5].reshape(-1).tobytes()
+            self.pub_audio_raw.publish(
+                AudioData(data=raw_audio_data))
+            self.pub_audio_merged_playback.publish(
+                AudioData(data=data[:, 5].tobytes()))
         if self.is_speeching:
             if len(self.speech_audio_buffer) == 0:
                 self.speech_audio_buffer = self.speech_prefetch_buffer
-            self.speech_audio_buffer += data
+                if self.n_channel > 1:
+                    self.speech_audio_raw_buffer = self.speech_raw_prefetch_buffer
+            self.speech_audio_buffer += processed_data
+            if self.n_channel > 1:
+                self.speech_audio_raw_buffer += raw_audio_data
         else:
-            self.speech_prefetch_buffer += data
+            self.speech_prefetch_buffer += processed_data
             self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
+            if self.n_channel > 1:
+                self.speech_raw_prefetch_buffer += raw_audio_data
+                self.speech_raw_prefetch_buffer = self.speech_raw_prefetch_buffer[
+                    -self.speech_raw_prefetch_bytes:]
 
     def on_timer(self, event):
         stamp = event.current_real or rospy.Time.now()
@@ -433,13 +516,15 @@ def on_timer(self, event):
         elif self.is_speeching:
             buf = self.speech_audio_buffer
             self.speech_audio_buffer = b""
+            buf_raw = self.speech_audio_raw_buffer
+            self.speech_audio_raw_buffer = b""
             self.is_speeching = False
             duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
-            duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
+            duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth / self.n_channel
             rospy.loginfo("Speech detected for %.3f seconds" % duration)
             if self.speech_min_duration <= duration < self.speech_max_duration:
-
                 self.pub_speech_audio.publish(AudioData(data=buf))
+                self.pub_speech_audio_raw.publish(AudioData(data=buf_raw))
 
 
 if __name__ == '__main__':
diff --git a/respeaker_ros/scripts/speech_to_text.py b/respeaker_ros/scripts/speech_to_text.py
index 0974b2f65..6765e2e04 100644
--- a/respeaker_ros/scripts/speech_to_text.py
+++ b/respeaker_ros/scripts/speech_to_text.py
@@ -2,6 +2,10 @@
 # -*- coding: utf-8 -*-
 # Author: Yuki Furuta <furushchev@jsk.imi.i.u-tokyo.ac.jp>
 
+from __future__ import division
+
+import sys
+
 import actionlib
 import rospy
 try:
@@ -9,8 +13,16 @@
 except ImportError as e:
     raise ImportError(str(e) + '\nplease try "pip install speechrecognition"')
 
+import numpy as np
 from actionlib_msgs.msg import GoalStatus, GoalStatusArray
 from audio_common_msgs.msg import AudioData
+enable_audio_info = True
+try:
+    from audio_common_msgs.msg import AudioInfo
+except Exception as e:
+    rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.'
+                 ' AudioInfo message will not be published.')
+    enable_audio_info = False
 from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
 from speech_recognition_msgs.msg import SpeechRecognitionCandidates
 
@@ -18,8 +30,32 @@
 class SpeechToText(object):
     def __init__(self):
         # format of input audio data
-        self.sample_rate = rospy.get_param("~sample_rate", 16000)
-        self.sample_width = rospy.get_param("~sample_width", 2)
+        audio_info_topic_name = rospy.get_param('~audio_info', '')
+        if len(audio_info_topic_name) > 0:
+            if enable_audio_info is False:
+                rospy.logerr(
+                    'audio_common_msgs/AudioInfo message is not exists.'
+                    ' Giving ~audio_info is not valid in your environment.')
+                sys.exit(1)
+            rospy.loginfo('Extract audio info params from {}'.format(
+                audio_info_topic_name))
+            audio_info_msg = rospy.wait_for_message(
+                audio_info_topic_name, AudioInfo)
+            self.sample_rate = audio_info_msg.sample_rate
+            self.sample_width = audio_info_msg.bitrate // self.sample_rate // 8
+            self.channels = audio_info_msg.channels
+        else:
+            self.sample_rate = rospy.get_param("~sample_rate", 16000)
+            self.sample_width = rospy.get_param("~sample_width", 2)
+            self.channels = rospy.get_param("~channels", 1)
+        if self.sample_width == 2:
+            self.dtype = 'int16'
+        elif self.sample_width == 4:
+            self.dtype = 'int32'
+        else:
+            raise NotImplementedError('sample_width {} is not supported'
+                                      .format(self.sample_width))
+        self.target_channel = rospy.get_param("~target_channel", 0)
         # language of STT service
         self.language = rospy.get_param("~language", "ja-JP")
         # ignore voice input while the robot is speaking
@@ -78,7 +114,11 @@ def audio_cb(self, msg):
         if self.is_canceling:
             rospy.loginfo("Speech is cancelled")
             return
-        data = SR.AudioData(msg.data, self.sample_rate, self.sample_width)
+
+        data = SR.AudioData(
+            np.frombuffer(msg.data, dtype=self.dtype)[
+                self.target_channel::self.channels].tobytes(),
+            self.sample_rate, self.sample_width)
         try:
             rospy.loginfo("Waiting for result %d" % len(data.get_raw_data()))
             result = self.recognizer.recognize_google(
diff --git a/respeaker_ros/test/sample_respeaker.test b/respeaker_ros/test/sample_respeaker.test
index 5d51c220c..61f10fb7b 100644
--- a/respeaker_ros/test/sample_respeaker.test
+++ b/respeaker_ros/test/sample_respeaker.test
@@ -3,6 +3,7 @@
   <include file="$(find respeaker_ros)/launch/sample_respeaker.launch">
     <arg name="launch_respeaker" value="false" />
     <arg name="audio" value="speech_audio" />
+    <arg name="audio_info" value="''" />
     <arg name="language" value="ja-JP" />
   </include>
 
diff --git a/rostwitter/CMakeLists.txt b/rostwitter/CMakeLists.txt
index 39258afbd..82b81feae 100644
--- a/rostwitter/CMakeLists.txt
+++ b/rostwitter/CMakeLists.txt
@@ -40,7 +40,7 @@ else()
   )
 endif()
 
-install(DIRECTORY test resource
+install(DIRECTORY test resource launch
   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
   USE_SOURCE_PERMISSIONS
 )
diff --git a/rostwitter/README.md b/rostwitter/README.md
new file mode 100644
index 000000000..81750bf5b
--- /dev/null
+++ b/rostwitter/README.md
@@ -0,0 +1,116 @@
+# rostwitter
+
+This package is a ROS wrapper for Twitter. You can tweet via ROS.
+
+# How to use
+
+## Get access key for API.
+
+Please get access to the Twitter API. Please refer to the following URL.
+
+https://developer.twitter.com/en/docs/twitter-api/getting-started/getting-access-to-the-twitter-api
+
+After that, save the yaml file in the following format.
+
+```
+CKEY: <Your Consumer API Key>
+CSECRET: <Your Consumer SECRET API Key>
+AKEY: <Your API Key>
+ASECRET: <Your API Secret Key>
+```
+
+## Launch tweet node
+
+```
+roslaunch rostwitter tweet.launch account_info:=<PATH TO YOUR YAML FILE>
+```
+
+## Tweet text
+
+You can tweet by simply publish on the `/tweet` topic.
+
+```
+rostopic pub /tweet std_msgs/String "Hello. Tweet via rostwitter (https://github.com/jsk-ros-pkg/jsk_3rdparty)"
+```
+
+![](./doc/tweet-string.jpg)
+
+If the string to be tweeted exceeds 140 full-width characters or 280 half-width characters, it will be tweeted in the "thread" display.
+
+```
+rostopic pub /tweet std_msgs/String """The Zen of Python, by Tim Peters
+
+Beautiful is better than ugly.
+Explicit is better than implicit.
+Simple is better than complex.
+Complex is better than complicated.
+Flat is better than nested.
+Sparse is better than dense.
+Readability counts.
+Special cases aren't special enough to break the rules.
+Although practicality beats purity.
+Errors should never pass silently.
+Unless explicitly silenced.
+In the face of ambiguity, refuse the temptation to guess.
+There should be one-- and preferably only one --obvious way to do it.
+Although that way may not be obvious at first unless you're Dutch.
+Now is better than never.
+Although never is often better than *right* now.
+If the implementation is hard to explain, it's a bad idea.
+If the implementation is easy to explain, it may be a good idea.
+Namespaces are one honking great idea -- let's do more of those!
+"""
+```
+
+![](./doc/tweet-string-thread.jpg)
+
+## Tweet text with image
+
+You can also tweet along with your images.
+
+If a base64 or image path is inserted in the text, it will jump to the next reply in that section.
+
+### Image path
+
+```
+wget https://github.com/k-okada.png -O /tmp/k-okada.png
+rostopic pub /tweet std_msgs/String "/tmp/k-okada.png"
+```
+
+![](./doc/tweet-image-path.jpg)
+
+### Base64
+
+You can even tweet the image by encoding in base64. The following example is in python.
+
+Do not concatenate multiple base64 images without spaces.
+
+
+```python
+import rospy
+import cv2
+import std_msgs.msg
+import numpy as np
+import matplotlib.cm
+
+from rostwitter.cv_util import extract_media_from_text
+from rostwitter.cv_util import encode_image_cv2
+
+rospy.init_node('rostwitter_sample')
+pub = rospy.Publisher('/tweet', std_msgs.msg.String, queue_size=1)
+rospy.sleep(3.0)
+
+colormap = matplotlib.cm.get_cmap('hsv')
+
+text = 'Tweet with images. (https://github.com/jsk-ros-pkg/jsk_3rdparty/pull/375)\n'
+N = 12
+for i in range(N):
+    text += str(i)
+    color = colormap(1.0 * i / N)[:3]
+    img = color * np.ones((10, 10, 3), dtype=np.uint8) * 255
+    img = np.array(img, dtype=np.uint8)
+    text += encode_image_cv2(img) + ' '
+pub.publish(text)
+```
+
+[The result of the tweet.](https://twitter.com/pr2jsk/status/1561995909524705280)
diff --git a/rostwitter/doc/tweet-image-path.jpg b/rostwitter/doc/tweet-image-path.jpg
new file mode 100644
index 000000000..dffc9baec
Binary files /dev/null and b/rostwitter/doc/tweet-image-path.jpg differ
diff --git a/rostwitter/doc/tweet-string-thread.jpg b/rostwitter/doc/tweet-string-thread.jpg
new file mode 100644
index 000000000..13783eaef
Binary files /dev/null and b/rostwitter/doc/tweet-string-thread.jpg differ
diff --git a/rostwitter/doc/tweet-string.jpg b/rostwitter/doc/tweet-string.jpg
new file mode 100644
index 000000000..c41daa779
Binary files /dev/null and b/rostwitter/doc/tweet-string.jpg differ
diff --git a/rostwitter/launch/tweet.launch b/rostwitter/launch/tweet.launch
new file mode 100644
index 000000000..1d202a05e
--- /dev/null
+++ b/rostwitter/launch/tweet.launch
@@ -0,0 +1,12 @@
+<launch>
+
+  <arg name="account_info" />
+  <arg name="output" default="screen"/>
+
+  <param name="account_info" value="$(arg account_info)" />
+  <node name="tweet"
+        pkg="rostwitter" type="tweet.py"
+        output="$(arg output)" respawn="true" >
+  </node>
+
+</launch>
diff --git a/rostwitter/python/rostwitter/cv_util.py b/rostwitter/python/rostwitter/cv_util.py
new file mode 100644
index 000000000..ad284bc63
--- /dev/null
+++ b/rostwitter/python/rostwitter/cv_util.py
@@ -0,0 +1,80 @@
+import base64
+import imghdr
+import os.path
+import re
+
+import cv2
+import numpy as np
+import rospy
+
+
+base64_and_filepath_image_pattern = re.compile(r'((?:/9j/)(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)? ?|/\S+\.(?:jpeg|jpg|png|gif))')
+
+
+def encode_image_cv2(img, quality=90):
+    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+    result, encimg = cv2.imencode('.jpg', img, encode_param)
+    b64encoded = base64.b64encode(encimg).decode('ascii')
+    return b64encoded
+
+
+def decode_image_cv2(b64encoded):
+    bin = b64encoded.split(",")[-1]
+    bin = base64.b64decode(bin)
+    bin = np.frombuffer(bin, np.uint8)
+    img = cv2.imdecode(bin, cv2.IMREAD_COLOR)
+    return img
+
+
+def is_base64_image(b64encoded):
+    try:
+        decode_image_cv2(b64encoded)
+    except Exception as e:
+        rospy.logerr(str(e))
+        return False
+    return True
+
+
+def get_image_from_text(text):
+    if base64_and_filepath_image_pattern.match(text) is None:
+        return None
+
+    if os.path.exists(text):
+        path = text
+        if imghdr.what(path) in ['jpeg', 'png', 'gif']:
+            with open(path, 'rb') as f:
+                return f.read()
+    else:
+        succ = is_base64_image(text)
+        if succ:
+            bin = text.split(",")[-1]
+            bin = base64.b64decode(bin)
+            bin = np.frombuffer(bin, np.uint8)
+            return bin
+
+
+def extract_media_from_text(text):
+    texts = base64_and_filepath_image_pattern.split(text)
+    target_texts = list(filter(lambda x: x is not None and len(x.strip()) > 0, texts))
+
+    split_texts = ['']
+    imgs_list = []
+
+    texts = []
+    imgs = []
+    for text in target_texts:
+        img = get_image_from_text(text)
+        if img is None:
+            split_texts.append(text)
+            imgs_list.append(imgs)
+            imgs = []
+        else:
+            imgs.append(img)
+
+    if len(imgs) > 0:
+        imgs_list.append(imgs)
+    if len(split_texts) > 0:
+        if len(split_texts[0]) == 0 and len(imgs_list[0]) == 0:
+            split_texts = split_texts[1:]
+            imgs_list = imgs_list[1:]
+    return imgs_list, split_texts
diff --git a/rostwitter/python/rostwitter/twitter.py b/rostwitter/python/rostwitter/twitter.py
index cdb020e15..c56cf5289 100644
--- a/rostwitter/python/rostwitter/twitter.py
+++ b/rostwitter/python/rostwitter/twitter.py
@@ -1,16 +1,20 @@
 # originally from https://raw.githubusercontent.com/bear/python-twitter/v1.1/twitter.py  # NOQA
 
+import math
 import json as simplejson
 import requests
-from requests_oauthlib import OAuth1
-# https://stackoverflow.com/questions/11914472/stringio-in-python3
 try:
-    from StringIO import StringIO ## for Python 2
+    from itertools import zip_longest
 except ImportError:
-    from io import StringIO ## for Python 3
+    from itertools import izip_longest as zip_longest
+from requests_oauthlib import OAuth1
 
 import rospy
 
+from rostwitter.util import count_tweet_text
+from rostwitter.util import split_tweet_text
+from rostwitter.cv_util import extract_media_from_text
+
 
 class Twitter(object):
     def __init__(
@@ -54,24 +58,80 @@ def _request_url(self, url, verb, data=None):
             )
         return 0  # if not a POST or GET request
 
-    def post_update(self, status):
-        if len(status) > 140:
-            rospy.logwarn('tweet is too longer > 140 characters')
-            status = status[:140]
-        url = 'https://api.twitter.com/1.1/statuses/update.json'
-        data = {'status': StringIO(status)}
-        json = self._request_url(url, 'POST', data=data)
-        data = simplejson.loads(json.content)
+    def _check_post_request(self, request):
+        valid = True
+        data = simplejson.loads(request.content)
+        if request.status_code != 200:
+            rospy.logwarn('post tweet failed. status_code: {}'
+                          .format(request.status_code))
+            if 'errors' in data:
+                for error in data['errors']:
+                    rospy.logwarn('Tweet error code: {}, message: {}'
+                                  .format(error['code'], error['message']))
+            valid = False
+        if valid:
+            return data
+
+    def _post_update_with_reply(self, texts, media_list=None,
+                                in_reply_to_status_id=None):
+        split_media_list = []
+        media_list = media_list or []
+        for i in range(0, int(math.ceil(len(media_list) / 4.0))):
+            split_media_list.append(media_list[i * 4:(i + 1) * 4])
+        for text, media_list in zip_longest(texts, split_media_list):
+            text = text or ''
+            media_list = media_list or []
+            url = 'https://api.twitter.com/1.1/statuses/update.json'
+            data = {'status': text}
+            media_ids = self._upload_media(media_list)
+            if len(media_ids) > 0:
+                data['media_ids'] = media_ids
+            if in_reply_to_status_id is not None:
+                data['in_reply_to_status_id'] = in_reply_to_status_id
+            r = self._request_url(url, 'POST', data=data)
+            data = self._check_post_request(r)
+            if data is not None:
+                in_reply_to_status_id = data['id']
+        return data
+
+    def _upload_media(self, media_list):
+        url = 'https://upload.twitter.com/1.1/media/upload.json'
+        media_ids = []
+        for media in media_list:
+            data = {'media': media}
+            r = self._request_url(url, 'POST', data=data)
+            if r.status_code == 200:
+                rospy.loginfo('upload media success')
+                media_ids.append(str(r.json()['media_id']))
+            else:
+                rospy.logerr('upload media failed. status_code: {}'
+                             .format(r.status_code))
+        media_ids = ','.join(media_ids)
+        return media_ids
+
+    def post_update(self, status, in_reply_to_status_id=None):
+        media_list, status_list = extract_media_from_text(status)
+        for text, mlist in zip_longest(status_list, media_list):
+            text = text or ''
+            texts = split_tweet_text(text)
+            data = self._post_update_with_reply(
+                texts,
+                media_list=mlist,
+                in_reply_to_status_id=in_reply_to_status_id)
+            if data is not None:
+                in_reply_to_status_id = data['id']
         return data
 
-    def post_media(self, status, media):
-        # 116 = 140 - len("http://t.co/ssssssssss")
-        if len(status) > 116:
-            rospy.logwarn('tweet wit media is too longer > 116 characters')
-            status = status[:116]
+    def post_media(self, status, media, in_reply_to_status_id=None):
+        texts = split_tweet_text(status)
+        status = texts[0]
         url = 'https://api.twitter.com/1.1/statuses/update_with_media.json'
-        data = {'status': StringIO(status)}
+        data = {'status': status}
         data['media'] = open(str(media), 'rb').read()
-        json = self._request_url(url, 'POST', data=data)
-        data = simplejson.loads(json.content)
+        r = self._request_url(url, 'POST', data=data)
+        data = self._check_post_request(r)
+        if len(texts) > 1:
+            data = self._post_update_with_reply(
+                texts[1:],
+                in_reply_to_status_id=data['id'])
         return data
diff --git a/rostwitter/python/rostwitter/util.py b/rostwitter/python/rostwitter/util.py
index 36a613b46..f5e51471c 100644
--- a/rostwitter/python/rostwitter/util.py
+++ b/rostwitter/python/rostwitter/util.py
@@ -1,4 +1,6 @@
 import os
+import sys
+import unicodedata
 import yaml
 
 import rospy
@@ -16,9 +18,47 @@ def load_oauth_settings(yaml_path):
         rospy.logerr("EOF")
         return None, None, None, None
     with open(yaml_path, 'r') as f:
-        key = yaml.load(f)
+        key = yaml.load(f, Loader=yaml.SafeLoader)
         ckey = key['CKEY']
         csecret = key['CSECRET']
         akey = key['AKEY']
         asecret = key['ASECRET']
     return ckey, csecret, akey, asecret
+
+
+def count_tweet_text(text):
+    count = 0
+    if sys.version_info.major <= 2:
+        text = text.decode('utf-8')
+    for c in text:
+        if unicodedata.east_asian_width(c) in 'FWA':
+            count += 2
+        else:
+            count += 1
+    return count
+
+
+def split_tweet_text(text, length=280):
+    texts = []
+    split_text = ''
+    count = 0
+    if sys.version_info.major <= 2:
+        text = text.decode('utf-8')
+    for c in text:
+        if count == 281:
+            # last word is zenkaku.
+            texts.append(split_text[:-1])
+            split_text = split_text[-1:]
+            count = 2
+        elif count == 280:
+            texts.append(split_text)
+            split_text = ''
+            count = 0
+        split_text += c
+        if unicodedata.east_asian_width(c) in 'FWA':
+            count += 2
+        else:
+            count += 1
+    if count != 0:
+        texts.append(split_text)
+    return texts
diff --git a/rostwitter/scripts/tweet.py b/rostwitter/scripts/tweet.py
index d4b666959..50c44cf48 100755
--- a/rostwitter/scripts/tweet.py
+++ b/rostwitter/scripts/tweet.py
@@ -32,29 +32,9 @@ def tweet_cb(self, msg):
         rospy.loginfo(rospy.get_name() + " sending %s",
                       ''.join([message] if len(message) < 128 else message[0:128]+'......'))
 
-        # search word start from / and end with {.jpeg,.jpg,.png,.gif}
-        m = re.search('/\S+\.(jpeg|jpg|png|gif)', message)
-        ret = None
-        if m:
-            filename = m.group(0)
-            message = re.sub(filename, "", message)
-            if os.path.exists(filename):
-                rospy.loginfo(
-                    rospy.get_name() + " tweet %s with file %s",
-                    message, filename)
-                # 140 - len("http://t.co/ssssssssss")
-                ret = self.api.post_media(message[0:116], filename)
-                if 'errors' in ret:
-                    rospy.logerr('Failed to post: {}'.format(ret))
-                # ret = self.api.post_update(message)
-            else:
-                rospy.logerr(rospy.get_name() + " %s could not find", filename)
-        else:
-            ret = self.api.post_update(message[0:140])
-            if 'errors' in ret:
-                rospy.logerr('Failed to post: {}'.format(ret))
-        # seg faults if message is longer than 140 byte ???
-        rospy.loginfo(rospy.get_name() + " receiving %s", ret)
+        ret = self.api.post_update(message)
+        if ret is not None:
+            rospy.loginfo(rospy.get_name() + " receiving %s", ret)
 
 
 if __name__ == '__main__':