diff --git a/3rdparty/voicevox/.gitignore b/3rdparty/voicevox/.gitignore new file mode 100644 index 000000000..8cb8e60a3 --- /dev/null +++ b/3rdparty/voicevox/.gitignore @@ -0,0 +1,6 @@ +build +dict +lib +node_scripts/voicevox_engine +requirements.txt +!.gitignore diff --git a/3rdparty/voicevox/CMakeLists.txt b/3rdparty/voicevox/CMakeLists.txt new file mode 100644 index 000000000..631126026 --- /dev/null +++ b/3rdparty/voicevox/CMakeLists.txt @@ -0,0 +1,73 @@ +cmake_minimum_required(VERSION 2.8.3) +project(voicevox) + +find_package(catkin REQUIRED + COMPONENTS + catkin_virtualenv +) + +catkin_python_setup() + +set(INSTALL_DIR ${PROJECT_SOURCE_DIR}) + +catkin_package() + +catkin_generate_virtualenv( + INPUT_REQUIREMENTS requirements.in + PYTHON_INTERPRETER python3 + USE_SYSTEM_PACKAGES FALSE +) + +add_custom_command( + OUTPUT voicevox_model_installed + COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.model + MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum + INSTALL_DIR=${INSTALL_DIR} +) + + +add_custom_command( + OUTPUT voicevox_core_installed + COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.core + MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum + INSTALL_DIR=${INSTALL_DIR} +) + +add_custom_command( + OUTPUT voicevox_engine_installed + COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.engine + MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum + INSTALL_DIR=${INSTALL_DIR} +) + +add_custom_command( + OUTPUT open_jtalk_dic_installed + COMMAND make -f ${PROJECT_SOURCE_DIR}/Makefile.open_jtalk_dic + MD5SUM_DIR=${PROJECT_SOURCE_DIR}/md5sum + INSTALL_DIR=${INSTALL_DIR} +) + +add_custom_target(all_installed ALL DEPENDS + voicevox_model_installed + voicevox_core_installed + voicevox_engine_installed + open_jtalk_dic_installed) + +file(GLOB NODE_SCRIPTS_FILES node_scripts/*.py) +catkin_install_python( + PROGRAMS ${NODE_SCRIPTS_FILES} + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}/node_scripts/ +) +install(DIRECTORY node_scripts/voicevox_engine + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/catkin_virtualenv_scripts/ + USE_SOURCE_PERMISSIONS) +install(DIRECTORY launch dict + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} + USE_SOURCE_PERMISSIONS) +install(PROGRAMS bin/text2wave + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin) + +install(DIRECTORY + ${INSTALL_DIR}/lib + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} + USE_SOURCE_PERMISSIONS) diff --git a/3rdparty/voicevox/Makefile b/3rdparty/voicevox/Makefile new file mode 100644 index 000000000..a2c90f3bb --- /dev/null +++ b/3rdparty/voicevox/Makefile @@ -0,0 +1,11 @@ +all: + make -f Makefile.core + make -f Makefile.model + make -f Makefile.engine + make -f Makefile.open_jtalk_dic +clean: + make -f Makefile.core clean + make -f Makefile.model clean + make -f Makefile.engine clean + make -f Makefile.open_jtalk_dic clean + rm -rf build diff --git a/3rdparty/voicevox/Makefile.core b/3rdparty/voicevox/Makefile.core new file mode 100644 index 000000000..bac21eb0f --- /dev/null +++ b/3rdparty/voicevox/Makefile.core @@ -0,0 +1,28 @@ +# -*- makefile -*- + +all: installed.viocevox_core + +VERSION = 0.11.4 +FILENAME = core.zip +TARBALL = build/$(FILENAME) +TARBALL_URL = "https://github.com/VOICEVOX/voicevox_core/releases/download/$(VERSION)/core.zip" +SOURCE_DIR = build/core +UNPACK_CMD = unzip +MD5SUM_DIR = $(CURDIR)/md5sum +MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum +SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +include $(shell rospack find mk)/download_unpack_build.mk +INSTALL_DIR = './' + + +installed.viocevox_core: $(SOURCE_DIR)/unpacked + mkdir -p $(INSTALL_DIR)/lib + cp build/core/lib*.so $(INSTALL_DIR)/lib/ + cp build/core/*.bin $(INSTALL_DIR)/lib/ + cp build/core/metas.json $(INSTALL_DIR)/lib/metas.json + +clean: + rm -rf $(TARBALL) + rm -rf $(SOURCE_DIR) + rm -rf $(INSTALL_DIR)/lib + rm -rf build diff --git a/3rdparty/voicevox/Makefile.engine b/3rdparty/voicevox/Makefile.engine new file mode 100644 index 000000000..b3d6899fa --- /dev/null +++ b/3rdparty/voicevox/Makefile.engine @@ -0,0 +1,24 @@ +# -*- makefile -*- + +all: installed.voicevox_engine + +VERSION = 0.11.4 +FILENAME = $(VERSION).tar.gz +TARBALL = build/$(FILENAME) +TARBALL_URL = "https://github.com/VOICEVOX/voicevox_engine/archive/refs/tags/$(FILENAME)" +SOURCE_DIR = build/voicevox_engine-$(VERSION) +UNPACK_CMD = tar xvzf +MD5SUM_DIR = $(CURDIR)/md5sum +MD5SUM_FILE = $(MD5SUM_DIR)/voicevox_engine.tar.gz.md5sum +include $(shell rospack find mk)/download_unpack_build.mk +INSTALL_DIR = './' + + +installed.voicevox_engine: $(SOURCE_DIR)/unpacked + cp -r build/voicevox_engine-$(VERSION) $(INSTALL_DIR)/node_scripts/voicevox_engine + +clean: + rm -rf $(TARBALL) + rm -rf $(SOURCE_DIR) + rm -rf $(INSTALL_DIR)/node_scripts/voicevox_engine + rm -rf build diff --git a/3rdparty/voicevox/Makefile.model b/3rdparty/voicevox/Makefile.model new file mode 100644 index 000000000..004028105 --- /dev/null +++ b/3rdparty/voicevox/Makefile.model @@ -0,0 +1,26 @@ +# -*- makefile -*- + +all: installed.voicevox_model + +VERSION = 1.10.0 +FILENAME = onnxruntime-linux-x64-$(VERSION).tgz +TARBALL = build/$(FILENAME) +TARBALL_URL = "https://github.com/microsoft/onnxruntime/releases/download/v$(VERSION)/$(FILENAME)" +SOURCE_DIR = build/onnxruntime-linux-x64-$(VERSION) +UNPACK_CMD = tar xvzf +MD5SUM_DIR = $(CURDIR)/md5sum +MD5SUM_FILE = $(MD5SUM_DIR)/$(FILENAME).md5sum +SCRIPT_DIR = $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +include $(shell rospack find mk)/download_unpack_build.mk +INSTALL_DIR = './' + + +installed.voicevox_model: $(SOURCE_DIR)/unpacked + mkdir -p $(INSTALL_DIR)/lib + cp build/onnxruntime-linux-x64-$(VERSION)/lib/* $(INSTALL_DIR)/lib + +clean: + rm -rf $(TARBALL) + rm -rf $(SOURCE_DIR) + rm -rf $(INSTALL_DIR)/lib + rm -rf build diff --git a/3rdparty/voicevox/Makefile.open_jtalk_dic b/3rdparty/voicevox/Makefile.open_jtalk_dic new file mode 100644 index 000000000..646921159 --- /dev/null +++ b/3rdparty/voicevox/Makefile.open_jtalk_dic @@ -0,0 +1,25 @@ +# -*- makefile -*- + +all: installed.open_jtalk_dic + +VERSION = 1.11.1 +FILENAME = open_jtalk_dic_utf_8-1.11.tar.gz +TARBALL = build/$(FILENAME) +TARBALL_URL = "https://github.com/r9y9/open_jtalk/releases/download/v$(VERSION)/$(FILENAME)" +SOURCE_DIR = build/open_jtalk_dic_utf_8-1.11 +UNPACK_CMD = tar xvzf +MD5SUM_DIR = $(CURDIR)/md5sum +MD5SUM_FILE = $(MD5SUM_DIR)/open_jtalk_dic.tar.gz.md5sum +include $(shell rospack find mk)/download_unpack_build.mk +INSTALL_DIR = './' + + +installed.open_jtalk_dic: $(SOURCE_DIR)/unpacked + mkdir -p $(INSTALL_DIR)/dict + cp -r build/open_jtalk_dic_utf_8-1.11 $(INSTALL_DIR)/dict + +clean: + rm -rf $(TARBALL) + rm -rf $(SOURCE_DIR) + rm -rf $(INSTALL_DIR)/dict/open_jtalk_dic_utf_8-1.11 + rm -rf build diff --git a/3rdparty/voicevox/README.md b/3rdparty/voicevox/README.md new file mode 100644 index 000000000..d5602db71 --- /dev/null +++ b/3rdparty/voicevox/README.md @@ -0,0 +1,103 @@ +# voicevox + +ROS Interface for [VOICEVOX](https://voicevox.hiroshiba.jp/) (AI speech synthesis) + +## TERM + +[VOICEVOX](https://voicevox.hiroshiba.jp/) is basically free to use, but please check the terms of use below. + +[TERM](https://voicevox.hiroshiba.jp/term) + +Each voice synthesis character has its own rules. Please use this package according to those terms. + +| Character name | term link | +| ---- | ---- | +| 四国めたん | https://zunko.jp/con_ongen_kiyaku.html | +| ずんだもん | https://zunko.jp/con_ongen_kiyaku.html | +| 春日部つむぎ | https://tsukushinyoki10.wixsite.com/ktsumugiofficial/利用規約 | +| 波音リツ | http://canon-voice.com/kiyaku.html | +| 雨晴はう | https://amehau.com/?page_id=225 | +| 玄野武宏 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 | +| 白上虎太郎 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 | +| 青山龍星 | https://virvoxproject.wixsite.com/official/voicevoxの利用規約 | +| 冥鳴ひまり | https://kotoran8zunzun.wixsite.com/my-site/利用規約 | +| 九州そら | https://zunko.jp/con_ongen_kiyaku.html | + +## Installation + +Build this package. + +```bash +cd /path/to/catkin_workspace +catkin build voicevox +``` + +## Usage + +### Launch sound_play with VOICEVOX Text-to-Speech + +```bash +roslaunch voicevox voicevox_texttospeech.launch +``` + + +### Say something + +#### For python users + +```python +import rospy +from sound_play.libsoundplay import SoundClient + +rospy.init_node('say_node') + +client = SoundClient(sound_action='robotsound_jp', sound_topic='robotsound_jp') + +client.say('こんにちは', voice='四国めたん-あまあま') +``` + +You can change the voice by changing the voice_name. +You can also specify the speaker id. +Look at the following tables for further details. + +| speaker_id | voice_name | +| ---- | ---- | +| 0 | 四国めたん-あまあま | +| 1 | ずんだもん-あまあま | +| 2 | 四国めたん-ノーマル | +| 3 | ずんだもん-ノーマル | +| 4 | 四国めたん-セクシー | +| 5 | ずんだもん-セクシー | +| 6 | 四国めたん-ツンツン | +| 7 | ずんだもん-ツンツン | +| 8 | 春日部つむぎ-ノーマル | +| 9 | 波音リツ-ノーマル | +| 10 | 雨晴はう-ノーマル | +| 11 | 玄野武宏-ノーマル | +| 12 | 白上虎太郎-ノーマル | +| 13 | 青山龍星-ノーマル | +| 14 | 冥鳴ひまり-ノーマル | +| 15 | 九州そら-あまあま | +| 16 | 九州そら-ノーマル | +| 17 | 九州そら-セクシー | +| 18 | 九州そら-ツンツン | +| 19 | 九州そら-ささやき | + +#### For roseus users + +``` +$ roseus +(load "package://pr2eus/speak.l") + +(ros::roseus "say_node") + +(speak "JSKへようこそ。" :lang "波音リツ" :wait t :topic-name "robotsound_jp") +``` + +### Tips + +Normally, the server for speech synthesis starts up at `http://localhost:50021`. +You can change the url and port by setting values for `VOICEVOX_TEXTTOSPEECH_URL` and `VOICEVOX_TEXTTOSPEECH_PORT`. + +You can also set the default character by setting `VOICEVOX_DEFAULT_SPEAKER_ID`. +Please refer to [here](#saysomething) for the speaker id. diff --git a/3rdparty/voicevox/bin/text2wave b/3rdparty/voicevox/bin/text2wave new file mode 100755 index 000000000..ca9630f39 --- /dev/null +++ b/3rdparty/voicevox/bin/text2wave @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import argparse +import os +import shutil +import sys + +import requests + +from voicevox.filecheck_utils import checksum_md5 +from voicevox.filecheck_utils import get_cache_dir + + +speaker_id_to_name = { + '0': '四国めたん-あまあま', + '1': 'ずんだもん-あまあま', + '2': '四国めたん-ノーマル', + '3': 'ずんだもん-ノーマル', + '4': '四国めたん-セクシー', + '5': 'ずんだもん-セクシー', + '6': '四国めたん-ツンツン', + '7': 'ずんだもん-ツンツン', + '8': '春日部つむぎ-ノーマル', + '9': '波音リツ-ノーマル', + '10': '雨晴はう-ノーマル', + '11': '玄野武宏-ノーマル', + '12': '白上虎太郎-ノーマル', + '13': '青山龍星-ノーマル', + '14': '冥鳴ひまり-ノーマル', + '15': '九州そら-あまあま', + '16': '九州そら-ノーマル', + '17': '九州そら-セクシー', + '18': '九州そら-ツンツン', + '19': '九州そら-ささやき', +} + +name_to_speaker_id = { + b: a for a, b in speaker_id_to_name.items() +} + + +DEFAULT_SPEAKER_ID = os.environ.get( + 'VOICEVOX_DEFAULT_SPEAKER_ID', '2') +if not DEFAULT_SPEAKER_ID.isdigit(): + DEFAULT_SPEAKER_ID = name_to_speaker_id[DEFAULT_SPEAKER_ID] +VOICEVOX_TEXTTOSPEECH_URL = os.environ.get( + 'VOICEVOX_TEXTTOSPEECH_URL', 'localhost') +VOICEVOX_TEXTTOSPEECH_PORT = os.environ.get( + 'VOICEVOX_TEXTTOSPEECH_PORT', 50021) +cache_enabled = os.environ.get( + 'ROS_VOICEVOX_TEXTTOSPEECH_CACHE_ENABLED', True) +cache_enabled = cache_enabled is True \ + or cache_enabled == 'true' # for launch env tag. + + +def determine_voice_name(voice_name): + if len(voice_name) == 0: + speaker_id = DEFAULT_SPEAKER_ID + else: + if voice_name.isdigit(): + if voice_name in speaker_id_to_name: + speaker_id = voice_name + else: + print( + '[Text2Wave] Invalid speaker_id ({}). Use default voice.' + .format(speaker_id_to_name[DEFAULT_SPEAKER_ID])) + speaker_id = DEFAULT_SPEAKER_ID + else: + candidates = list(filter( + lambda name: name.startswith(voice_name), + name_to_speaker_id)) + if candidates: + speaker_id = name_to_speaker_id[candidates[0]] + else: + print('[Text2Wave] Invalid voice_name ({}). Use default voice.' + .format(speaker_id_to_name[DEFAULT_SPEAKER_ID])) + speaker_id = DEFAULT_SPEAKER_ID + print('[Text2Wave] Speak using voice_name ({})..'.format( + speaker_id_to_name[speaker_id])) + return speaker_id + + +def convert_to_str(x): + if isinstance(x, str): + pass + elif isinstance(x, bytes): + x = x.decode('utf-8') + else: + raise ValueError( + 'Invalid input x type: {}' + .format(type(x))) + return x + + +def request_synthesis( + sentence, output_path, speaker_id='1'): + headers = {'accept': 'application/json'} + + sentence = convert_to_str(sentence) + speaker_id = convert_to_str(speaker_id) + params = { + 'speaker': speaker_id, + 'text': sentence, + } + base_url = 'http://{}:{}'.format( + VOICEVOX_TEXTTOSPEECH_URL, + VOICEVOX_TEXTTOSPEECH_PORT) + url = '{}/audio_query'.format(base_url) + response = requests.post(url, headers=headers, + params=params) + data = response.json() + url = '{}/synthesis'.format(base_url) + response = requests.post(url, headers=headers, + params=params, + json=data) + with open(output_path, 'wb') as f: + f.write(response.content) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='') + parser.add_argument('-eval', '--evaluate') + parser.add_argument('-o', '--output') + parser.add_argument('text') + args = parser.parse_args() + + with open(args.text, 'rb') as f: + speech_text = f.readline() + + speaker_id = determine_voice_name( + args.evaluate.lstrip('(').rstrip(')')) + + if cache_enabled: + cache_dir = get_cache_dir() + md5 = checksum_md5(args.text) + cache_filename = os.path.join( + cache_dir, + '--'.join([md5, speaker_id]) + + '.wav') + if os.path.exists(cache_filename): + print('[Text2Wave] Using cached sound file ({}) for {}' + .format(cache_filename, speech_text.decode('utf-8'))) + shutil.copy(cache_filename, args.output) + sys.exit(0) + + request_synthesis(speech_text, + args.output, + speaker_id) + if cache_enabled: + text_cache_filename = os.path.splitext(cache_filename)[0] + '.txt' + shutil.copy(args.text, text_cache_filename) + shutil.copy(args.output, cache_filename) diff --git a/3rdparty/voicevox/launch/voicevox_texttospeech.launch b/3rdparty/voicevox/launch/voicevox_texttospeech.launch new file mode 100644 index 000000000..d42d1961f --- /dev/null +++ b/3rdparty/voicevox/launch/voicevox_texttospeech.launch @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/3rdparty/voicevox/md5sum/core.zip.md5sum b/3rdparty/voicevox/md5sum/core.zip.md5sum new file mode 100644 index 000000000..f5b5ac439 --- /dev/null +++ b/3rdparty/voicevox/md5sum/core.zip.md5sum @@ -0,0 +1 @@ +96149a074d8ee093039321a88e00076d core.zip diff --git a/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum b/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum new file mode 100644 index 000000000..817b68d89 --- /dev/null +++ b/3rdparty/voicevox/md5sum/onnxruntime-linux-x64-1.10.0.tgz.md5sum @@ -0,0 +1 @@ +9ca61e2009a16cf8a1e9ab9ad0655009 onnxruntime-linux-x64-1.10.0.tgz diff --git a/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum b/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum new file mode 100644 index 000000000..8ce4bb07b --- /dev/null +++ b/3rdparty/voicevox/md5sum/open_jtalk_dic.tar.gz.md5sum @@ -0,0 +1 @@ +ba02dac4143492c3790f949be224dfdf open_jtalk_dic_utf_8-1.11.tar.gz diff --git a/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum b/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum new file mode 100644 index 000000000..5947e3633 --- /dev/null +++ b/3rdparty/voicevox/md5sum/voicevox_engine.tar.gz.md5sum @@ -0,0 +1 @@ +997bf9e915f7d6288c923ab1ff5f4ff6 0.11.4.tar.gz diff --git a/3rdparty/voicevox/node_scripts/server.py b/3rdparty/voicevox/node_scripts/server.py new file mode 100644 index 000000000..add596aff --- /dev/null +++ b/3rdparty/voicevox/node_scripts/server.py @@ -0,0 +1,573 @@ +#!/usr/bin/env python3 + +# This code was created based on the following link's code. +# https://github.com/VOICEVOX/voicevox_engine/blob/0.11.4/run.py + +import base64 +from distutils.version import LooseVersion +from functools import lru_cache +import imp +import json +import multiprocessing +import os +import os.path as osp +from pathlib import Path +from tempfile import NamedTemporaryFile +from tempfile import TemporaryFile +from typing import Dict +from typing import List +from typing import Optional +import zipfile + +from fastapi import FastAPI +from fastapi import HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.params import Query +from fastapi import Response +import rospkg +import rospy +import soundfile +from starlette.responses import FileResponse +import uvicorn + + +PKG_NAME = 'voicevox' +abs_path = osp.dirname(osp.abspath(__file__)) +voicevox_engine = imp.load_package( + 'voicevox_engine', osp.join(abs_path, 'voicevox_engine/voicevox_engine')) +rospack = rospkg.RosPack() +voicevox_dir = rospack.get_path(PKG_NAME) +voicevox_lib_dir = osp.join(voicevox_dir, 'lib') +# set pyopenjtalk's dic.tar.gz file +os.environ['OPEN_JTALK_DICT_DIR'] = osp.join( + voicevox_dir, 'dict', 'open_jtalk_dic_utf_8-1.11') + + +from voicevox_engine import __version__ +from voicevox_engine.kana_parser import create_kana +from voicevox_engine.kana_parser import parse_kana +from voicevox_engine.model import AccentPhrase +from voicevox_engine.model import AudioQuery +from voicevox_engine.model import ParseKanaBadRequest +from voicevox_engine.model import ParseKanaError +from voicevox_engine.model import Speaker +from voicevox_engine.model import SpeakerInfo +from voicevox_engine.model import SupportedDevicesInfo +from voicevox_engine.morphing import \ + synthesis_morphing_parameter as _synthesis_morphing_parameter +from voicevox_engine.morphing import synthesis_morphing +from voicevox_engine.preset import Preset +from voicevox_engine.preset import PresetLoader +from voicevox_engine.synthesis_engine import make_synthesis_engines +from voicevox_engine.synthesis_engine import SynthesisEngineBase +from voicevox_engine.user_dict import user_dict_startup_processing +from voicevox_engine.utility import connect_base64_waves +from voicevox_engine.utility import ConnectBase64WavesException +from voicevox_engine.utility import engine_root + + +def b64encode_str(s): + return base64.b64encode(s).decode("utf-8") + + +def generate_app( + synthesis_engines: Dict[str, SynthesisEngineBase], latest_core_version: str +) -> FastAPI: + root_dir = engine_root() + + default_sampling_rate = synthesis_engines[latest_core_version].default_sampling_rate + + app = FastAPI( + title="VOICEVOX ENGINE", + description="VOICEVOXの音声合成エンジンです。", + version=__version__, + ) + + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + preset_loader = PresetLoader( + preset_path=root_dir / "presets.yaml", + ) + + # キャッシュを有効化 + # モジュール側でlru_cacheを指定するとキャッシュを制御しにくいため、HTTPサーバ側で指定する + # TODO: キャッシュを管理するモジュール側API・HTTP側APIを用意する + synthesis_morphing_parameter = lru_cache(maxsize=4)(_synthesis_morphing_parameter) + + # @app.on_event("startup") + # async def start_catch_disconnection(): + # if args.enable_cancellable_synthesis: + # loop = asyncio.get_event_loop() + # _ = loop.create_task(cancellable_engine.catch_disconnection()) + + @app.on_event("startup") + def apply_user_dict(): + user_dict_startup_processing() + + def get_engine(core_version: Optional[str]) -> SynthesisEngineBase: + if core_version is None: + return synthesis_engines[latest_core_version] + if core_version in synthesis_engines: + return synthesis_engines[core_version] + raise HTTPException(status_code=422, detail="不明なバージョンです") + + @app.post( + "/audio_query", + response_model=AudioQuery, + tags=["クエリ作成"], + summary="音声合成用のクエリを作成する", + ) + def audio_query(text: str, speaker: int, core_version: Optional[str] = None): + """ + クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。 + """ + engine = get_engine(core_version) + accent_phrases = engine.create_accent_phrases(text, speaker_id=speaker) + return AudioQuery( + accent_phrases=accent_phrases, + speedScale=1, + pitchScale=0, + intonationScale=1, + volumeScale=1, + prePhonemeLength=0.1, + postPhonemeLength=0.1, + outputSamplingRate=default_sampling_rate, + outputStereo=False, + kana=create_kana(accent_phrases), + ) + + @app.post( + "/audio_query_from_preset", + response_model=AudioQuery, + tags=["クエリ作成"], + summary="音声合成用のクエリをプリセットを用いて作成する", + ) + def audio_query_from_preset( + text: str, preset_id: int, core_version: Optional[str] = None + ): + """ + クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。 + """ + engine = get_engine(core_version) + presets, err_detail = preset_loader.load_presets() + if err_detail: + raise HTTPException(status_code=422, detail=err_detail) + for preset in presets: + if preset.id == preset_id: + selected_preset = preset + break + else: + raise HTTPException(status_code=422, detail="該当するプリセットIDが見つかりません") + + accent_phrases = engine.create_accent_phrases( + text, speaker_id=selected_preset.style_id + ) + return AudioQuery( + accent_phrases=accent_phrases, + speedScale=selected_preset.speedScale, + pitchScale=selected_preset.pitchScale, + intonationScale=selected_preset.intonationScale, + volumeScale=selected_preset.volumeScale, + prePhonemeLength=selected_preset.prePhonemeLength, + postPhonemeLength=selected_preset.postPhonemeLength, + outputSamplingRate=default_sampling_rate, + outputStereo=False, + kana=create_kana(accent_phrases), + ) + + @app.post( + "/accent_phrases", + response_model=List[AccentPhrase], + tags=["クエリ編集"], + summary="テキストからアクセント句を得る", + responses={ + 400: { + "description": "読み仮名のパースに失敗", + "model": ParseKanaBadRequest, + } + }, + ) + def accent_phrases( + text: str, + speaker: int, + is_kana: bool = False, + core_version: Optional[str] = None, + ): + """ + テキストからアクセント句を得ます。 + is_kanaが`true`のとき、テキストは次のようなAquesTalkライクな記法に従う読み仮名として処理されます。デフォルトは`false`です。 + * 全てのカナはカタカナで記述される + * アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。 + * カナの手前に`_`を入れるとそのカナは無声化される + * アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。 + * アクセント句末に`?`(全角)を入れることにより疑問文の発音ができる。 + """ + engine = get_engine(core_version) + if is_kana: + try: + accent_phrases = parse_kana(text) + except ParseKanaError as err: + raise HTTPException( + status_code=400, + detail=ParseKanaBadRequest(err).dict(), + ) + accent_phrases = engine.replace_mora_data( + accent_phrases=accent_phrases, speaker_id=speaker + ) + + return accent_phrases + else: + return engine.create_accent_phrases(text, speaker_id=speaker) + + @app.post( + "/mora_data", + response_model=List[AccentPhrase], + tags=["クエリ編集"], + summary="アクセント句から音高・音素長を得る", + ) + def mora_data( + accent_phrases: List[AccentPhrase], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) + return engine.replace_mora_data(accent_phrases, speaker_id=speaker) + + @app.post( + "/mora_length", + response_model=List[AccentPhrase], + tags=["クエリ編集"], + summary="アクセント句から音素長を得る", + ) + def mora_length( + accent_phrases: List[AccentPhrase], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) + return engine.replace_phoneme_length( + accent_phrases=accent_phrases, speaker_id=speaker + ) + + @app.post( + "/mora_pitch", + response_model=List[AccentPhrase], + tags=["クエリ編集"], + summary="アクセント句から音高を得る", + ) + def mora_pitch( + accent_phrases: List[AccentPhrase], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) + return engine.replace_mora_pitch( + accent_phrases=accent_phrases, speaker_id=speaker + ) + + @app.post( + "/synthesis", + response_class=FileResponse, + responses={ + 200: { + "content": { + "audio/wav": {"schema": {"type": "string", "format": "binary"}} + }, + } + }, + tags=["音声合成"], + summary="音声合成する", + ) + def synthesis( + query: AudioQuery, + speaker: int, + enable_interrogative_upspeak: bool = Query( # noqa: B008 + default=True, + description="疑問系のテキストが与えられたら語尾を自動調整する", + ), + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) + wave = engine.synthesis( + query=query, + speaker_id=speaker, + enable_interrogative_upspeak=enable_interrogative_upspeak, + ) + + with NamedTemporaryFile(delete=False) as f: + soundfile.write( + file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV" + ) + + return FileResponse(f.name, media_type="audio/wav") + + @app.post( + "/multi_synthesis", + response_class=FileResponse, + responses={ + 200: { + "content": { + "application/zip": { + "schema": {"type": "string", "format": "binary"} + } + }, + } + }, + tags=["音声合成"], + summary="複数まとめて音声合成する", + ) + def multi_synthesis( + queries: List[AudioQuery], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) + sampling_rate = queries[0].outputSamplingRate + + with NamedTemporaryFile(delete=False) as f: + + with zipfile.ZipFile(f, mode="a") as zip_file: + + for i in range(len(queries)): + + if queries[i].outputSamplingRate != sampling_rate: + raise HTTPException( + status_code=422, detail="サンプリングレートが異なるクエリがあります" + ) + + with TemporaryFile() as wav_file: + + wave = engine.synthesis(query=queries[i], speaker_id=speaker) + soundfile.write( + file=wav_file, + data=wave, + samplerate=sampling_rate, + format="WAV", + ) + wav_file.seek(0) + zip_file.writestr(f"{str(i + 1).zfill(3)}.wav", wav_file.read()) + + return FileResponse(f.name, media_type="application/zip") + + @app.post( + "/synthesis_morphing", + response_class=FileResponse, + responses={ + 200: { + "content": { + "audio/wav": {"schema": {"type": "string", "format": "binary"}} + }, + } + }, + tags=["音声合成"], + summary="2人の話者でモーフィングした音声を合成する", + ) + def _synthesis_morphing( + query: AudioQuery, + base_speaker: int, + target_speaker: int, + morph_rate: float = Query(..., ge=0.0, le=1.0), # noqa: B008 + core_version: Optional[str] = None, + ): + """ + 指定された2人の話者で音声を合成、指定した割合でモーフィングした音声を得ます。 + モーフィングの割合は`morph_rate`で指定でき、0.0でベースの話者、1.0でターゲットの話者に近づきます。 + """ + engine = get_engine(core_version) + + # 生成したパラメータはキャッシュされる + morph_param = synthesis_morphing_parameter( + engine=engine, + query=query, + base_speaker=base_speaker, + target_speaker=target_speaker, + ) + + morph_wave = synthesis_morphing( + morph_param=morph_param, + morph_rate=morph_rate, + output_stereo=query.outputStereo, + ) + + with NamedTemporaryFile(delete=False) as f: + soundfile.write( + file=f, + data=morph_wave, + samplerate=morph_param.fs, + format="WAV", + ) + + return FileResponse(f.name, media_type="audio/wav") + + @app.post( + "/connect_waves", + response_class=FileResponse, + responses={ + 200: { + "content": { + "audio/wav": {"schema": {"type": "string", "format": "binary"}} + }, + } + }, + tags=["その他"], + summary="base64エンコードされた複数のwavデータを一つに結合する", + ) + def connect_waves(waves: List[str]): + """ + base64エンコードされたwavデータを一纏めにし、wavファイルで返します。 + """ + try: + waves_nparray, sampling_rate = connect_base64_waves(waves) + except ConnectBase64WavesException as err: + return HTTPException(status_code=422, detail=str(err)) + + with NamedTemporaryFile(delete=False) as f: + soundfile.write( + file=f, + data=waves_nparray, + samplerate=sampling_rate, + format="WAV", + ) + + return FileResponse(f.name, media_type="audio/wav") + + @app.get("/presets", response_model=List[Preset], tags=["その他"]) + def get_presets(): + """ + エンジンが保持しているプリセットの設定を返します + + Returns + ------- + presets: List[Preset] + プリセットのリスト + """ + presets, err_detail = preset_loader.load_presets() + if err_detail: + raise HTTPException(status_code=422, detail=err_detail) + return presets + + @app.get("/version", tags=["その他"]) + def version() -> str: + return __version__ + + @app.get("/core_versions", response_model=List[str], tags=["その他"]) + def core_versions() -> List[str]: + return Response( + content=json.dumps(list(synthesis_engines.keys())), + media_type="application/json", + ) + + @app.get("/speakers", response_model=List[Speaker], tags=["その他"]) + def speakers( + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) + return Response( + content=engine.speakers, + media_type="application/json", + ) + + @app.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"]) + def speaker_info(speaker_uuid: str, core_version: Optional[str] = None): + """ + 指定されたspeaker_uuidに関する情報をjson形式で返します。 + 画像や音声はbase64エンコードされたものが返されます。 + + Returns + ------- + ret_data: SpeakerInfo + """ + speakers = json.loads(get_engine(core_version).speakers) + for i in range(len(speakers)): + if speakers[i]["speaker_uuid"] == speaker_uuid: + speaker = speakers[i] + break + else: + raise HTTPException(status_code=404, detail="該当する話者が見つかりません") + + try: + policy = (root_dir / f"speaker_info/{speaker_uuid}/policy.md").read_text( + "utf-8" + ) + portrait = b64encode_str( + (root_dir / f"speaker_info/{speaker_uuid}/portrait.png").read_bytes() + ) + style_infos = [] + for style in speaker["styles"]: + id = style["id"] + icon = b64encode_str( + ( + root_dir / f"speaker_info/{speaker_uuid}/icons/{id}.png" + ).read_bytes() + ) + voice_samples = [ + b64encode_str( + ( + root_dir + / "speaker_info/{}/voice_samples/{}_{}.wav".format( + speaker_uuid, id, str(j + 1).zfill(3) + ) + ).read_bytes() + ) + for j in range(3) + ] + style_infos.append( + {"id": id, "icon": icon, "voice_samples": voice_samples} + ) + except FileNotFoundError: + import traceback + + traceback.print_exc() + raise HTTPException(status_code=500, detail="追加情報が見つかりませんでした") + + ret_data = {"policy": policy, "portrait": portrait, "style_infos": style_infos} + return ret_data + + @app.get("/supported_devices", response_model=SupportedDevicesInfo, tags=["その他"]) + def supported_devices( + core_version: Optional[str] = None, + ): + supported_devices = get_engine(core_version).supported_devices + if supported_devices is None: + raise HTTPException(status_code=422, detail="非対応の機能です。") + return Response( + content=supported_devices, + media_type="application/json", + ) + + return app + + +if __name__ == "__main__": + multiprocessing.freeze_support() + rospy.init_node('voicevox_server') + + voicelib_dir = [Path(voicevox_lib_dir)] + use_gpu = False + host = rospy.get_param('~host', "127.0.0.1") + port = rospy.get_param('~port', 50021) + cpu_num_threads = rospy.get_param('~cpu_num_threads', None) + if cpu_num_threads is None: + cpu_num_threads = multiprocessing.cpu_count() + + synthesis_engines = make_synthesis_engines( + use_gpu=use_gpu, + voicelib_dirs=voicelib_dir, + cpu_num_threads=cpu_num_threads, + ) + if len(synthesis_engines) == 0: + rospy.logerr("音声合成エンジンがありません。") + latest_core_version = str(max([LooseVersion(ver) + for ver in synthesis_engines])) + + uvicorn.run( + generate_app(synthesis_engines, latest_core_version), + host=host, + port=port, + ) diff --git a/3rdparty/voicevox/package.xml b/3rdparty/voicevox/package.xml new file mode 100644 index 000000000..5240c3468 --- /dev/null +++ b/3rdparty/voicevox/package.xml @@ -0,0 +1,36 @@ + + + + voicevox + 0.0.1 + VOICEVOX: AI speech synthesis + Iori Yanokura + + MIT + + http://ros.org/wiki/voicevox + + Iori Yanokura + + catkin + catkin_virtualenv + + mk + roslib + rospack + unzip + wget + + python3 + python3-requests + sound_play + unzip + wget + + + requirements.txt + + + diff --git a/3rdparty/voicevox/python/voicevox/__init__.py b/3rdparty/voicevox/python/voicevox/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/3rdparty/voicevox/python/voicevox/filecheck_utils.py b/3rdparty/voicevox/python/voicevox/filecheck_utils.py new file mode 100644 index 000000000..6c881b5f5 --- /dev/null +++ b/3rdparty/voicevox/python/voicevox/filecheck_utils.py @@ -0,0 +1,43 @@ +import hashlib +import os + + +def get_cache_dir(): + """Return cache dir. + + Returns + ------- + cache_dir : str + cache directory. + """ + ros_home = os.getenv('ROS_HOME', os.path.expanduser('~/.ros')) + pkg_ros_home = os.path.join(ros_home, 'voicevox_texttospeech') + default_cache_dir = os.path.join(pkg_ros_home, 'cache') + cache_dir = os.environ.get( + 'ROS_VOICEVOX_TEXTTOSPEECH_CACHE_DIR', + default_cache_dir) + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + return cache_dir + + +def checksum_md5(filename, blocksize=8192): + """Calculate md5sum. + + Parameters + ---------- + filename : str or pathlib.Path + input filename. + blocksize : int + MD5 has 128-byte digest blocks (default: 8192 is 128x64). + Returns + ------- + md5 : str + calculated md5sum. + """ + filename = str(filename) + hash_factory = hashlib.md5() + with open(filename, 'rb') as f: + for chunk in iter(lambda: f.read(blocksize), b''): + hash_factory.update(chunk) + return hash_factory.hexdigest() diff --git a/3rdparty/voicevox/requirements.in b/3rdparty/voicevox/requirements.in new file mode 100644 index 000000000..c9cfd223a --- /dev/null +++ b/3rdparty/voicevox/requirements.in @@ -0,0 +1,11 @@ +PyYAML +aiofiles +appdirs +fastapi +git+https://github.com/VOICEVOX/pyopenjtalk@a85521a0a0f298f08d9e9b24987b3c77eb4aaff5#egg=pyopenjtalk +numpy +python-multipart +pyworld +scipy +soundfile +uvicorn diff --git a/3rdparty/voicevox/setup.py b/3rdparty/voicevox/setup.py new file mode 100644 index 000000000..939174bc8 --- /dev/null +++ b/3rdparty/voicevox/setup.py @@ -0,0 +1,12 @@ +from distutils.core import setup + +from catkin_pkg.python_setup import generate_distutils_setup +from setuptools import find_packages + + +d = generate_distutils_setup( + packages=find_packages('python'), + package_dir={'': 'python'}, +) + +setup(**d) diff --git a/dialogflow_task_executive/node_scripts/task_executive.py b/dialogflow_task_executive/node_scripts/task_executive.py index 686327a55..e826d50d7 100644 --- a/dialogflow_task_executive/node_scripts/task_executive.py +++ b/dialogflow_task_executive/node_scripts/task_executive.py @@ -7,7 +7,9 @@ import rospy from app_manager.msg import AppList +from app_manager.msg import KeyValue from app_manager.srv import StartApp +from app_manager.srv import StartAppRequest from app_manager.srv import StopApp from std_srvs.srv import Empty @@ -90,12 +92,16 @@ def available_apps(self): return map(lambda a: a.name, self._latest_msg.available_apps) - def start_app(self, name): + def start_app(self, name, launch_args): if name in self.running_apps: raise RuntimeError("{} is already running".format(name)) elif name not in self.available_apps: raise RuntimeError("{} is not available".format(name)) - res = self._srv_start_app(name=name) + req = StartAppRequest() + req.name = name + for key, value in launch_args.items(): + req.args.append(KeyValue(key=key, value=value)) + res = self._srv_start_app(req) if res.started: rospy.loginfo("{} successfully started".format(name)) return True @@ -221,6 +227,12 @@ def dialog_cb(self, msg): try: params = json.loads(msg.parameters) rospy.set_param("/action/parameters", params) + # set launch_args + launch_args = {} + for key, value in params.items(): + launch_args[key.encode('utf-8')] = value.encode('utf-8') + except AttributeError as e: + rospy.logerr(e) except ValueError: rospy.logerr( "Failed to parse parameters of action '{}'".format(msg.action)) @@ -228,7 +240,7 @@ def dialog_cb(self, msg): rospy.loginfo( "Starting '{}' with parameters '{}'" .format(msg.action, msg.parameters)) - self.app_manager.start_app(action) + self.app_manager.start_app(action, launch_args) def app_start_cb(self, name): rospy.loginfo("{} started".format(name)) diff --git a/google_chat_ros/scripts/google_chat_ros_node.py b/google_chat_ros/scripts/google_chat_ros_node.py index 64e6d011f..43830281b 100644 --- a/google_chat_ros/scripts/google_chat_ros_node.py +++ b/google_chat_ros/scripts/google_chat_ros_node.py @@ -86,7 +86,7 @@ def __init__(self): rospy.logwarn("You cannot recieve Google Chat event because HTTPS server or Google Cloud Pub/Sub is not running.") else: - rospy.logerr("Please choose receiving_mode param from dialogflow, https, pubsub, none.") + rospy.logerr("Please choose receiving_mode param from dialogflow, url, pubsub, none.") def killhttpd(self): self._server.kill() diff --git a/respeaker_ros/CMakeLists.txt b/respeaker_ros/CMakeLists.txt index 390a82e0f..b4009cd1a 100644 --- a/respeaker_ros/CMakeLists.txt +++ b/respeaker_ros/CMakeLists.txt @@ -35,7 +35,9 @@ catkin_install_python(PROGRAMS ${PYTHON_SCRIPTS} if(CATKIN_ENABLE_TESTING) find_package(rostest REQUIRED) + find_package(roslaunch REQUIRED) add_rostest(test/sample_respeaker.test DEPENDENCIES ${PROJECT_NAME}_generate_virtualenv ) + roslaunch_add_file_check(launch/sample_respeaker.launch) endif() diff --git a/respeaker_ros/README.md b/respeaker_ros/README.md index e42ba1202..247168ba5 100644 --- a/respeaker_ros/README.md +++ b/respeaker_ros/README.md @@ -92,6 +92,151 @@ A ROS Package for Respeaker Mic Array a: 0.3" ``` +## Parameters for respeaker_node.py + + - ### Publishing topics + + - `audio` (`audio_common_msgs/AudioData`) + + Processed audio for ASR. 1 channel. + + - `audio_info` (`audio_common_msgs/AudioInfo`) + + Audio info with respect to `~audio`. + + - `audio_raw` (`audio_common_msgs/AudioData`) + + Micarray audio data has 4-channels. Maybe you need to update respeaker firmware. + + If the firmware isn't supported, this will not be output. + + - `audio_info_raw` (`audio_common_msgs/AudioInfo`) + + Audio info with respect to `~audio_raw`. + + If the firmware isn't supported, this will not be output. + + - `speech_audio` (`audio_common_msgs/AudioData`) + + Audio data while a person is speaking using the VAD function. + + - `speech_audio_raw` (`audio_common_msgs/AudioData`) + + Audio data has 4-channels while a person is speaking using the VAD function. + + If the firmware isn't supported, this will not be output. + + - `audio_merged_playback` (`audio_common_msgs/AudioData`) + + Data that combines the sound of mic and speaker. + + If the firmware isn't supported, this will not be output. + + For more detail, please see https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/ + + - `~is_speeching` (`std_msgs/Bool`) + + Using VAD function, publish whether someone is speaking. + + - `~sound_direction` (`std_msgs/Int32`) + + Direction of sound. + + - `~sound_localization` (`geometry_msgs/PoseStamped`) + + Localized Sound Direction. The value of the position in the estimated direction with `~doa_offset` as the radius is obtained. + + - ### Parameters + + - `~update_rate` (`Double`, default: `10.0`) + + Publishing info data such as `~is_speeching`, `~sound_direction`, `~sound_localization`, `~speech_audio` and `~speech_audio_raw`. + + - `~sensor_frame_id` (`String`, default: `respeaker_base`) + + Frame id. + + - `~doa_xy_offset` (`Double`, default: `0.0`) + + `~doa_offset` is a estimated sound direction's radius. + + - `~doa_yaw_offset` (`Double`, default: `90.0`) + + Estimated DoA angle offset. + + - `~speech_prefetch` (`Double`, default: `0.5`) + + Time to represent how long speech is pre-stored in buffer. + + - `~speech_continuation` (`Double`, default: `0.5`) + + If the time between the current time and the time when the speech is stopped is shorter than this time, + it is assumed that someone is speaking. + + - `~speech_max_duration` (`Double`, default: `7.0`) + + - `~speech_min_duration` (`Double`, default: `0.1`) + + If the speaking interval is within these times, `~speech_audio` and `~speech_audio_raw` will be published. + + - `~suppress_pyaudio_error` (`Bool`, default: `True`) + + If this value is `True`, suppress error from pyaudio. + +## Parameters for speech_to_text.py + + - ### Publishing topics + + - `~speech_to_text` (`speech_recognition_msgs/SpeechRecognitionCandidates`) + + Recognized text. + + - ### Subscribing topics + + - `audio` (`audio_common_msgs/AudioData`) + + Input audio. + + - ### Parameters + + - `~audio_info` (`String`, default: ``) + + audio_info (`audio_common_msgs/AudioInfo`) topic. If this value is specified, `~sample_rate`, `~sample_width` and `~channels` parameters are obtained from the topic. + + - `~sample_rate` (`Int`, default: `16000`) + + Sampling rate. + + - `~sample_width` (`Int`, default: `2`) + + Sample with. + + - `~channels` (`Int`, default: `1`) + + Number of channels. + + - `~target_channel` (`Int`, default: `0`) + + Target number of channel. + + - `~language` (`String`, default: `ja-JP`) + + language of speech to text service. For English users, you can specify `en-US`. + + - `~self_cancellation` (`Bool`, default: `True`) + + ignore voice input while the robot is speaking. + + - `~tts_tolerance` (`String`, default: `1.0`) + + time to assume as SPEAKING after tts service is finished. + + - `~tts_action_names` (`List[String]`, default: `['sound_play']`) + + If `~self_chancellation` is `True`, this value will be used. + + When the actions are active, do nothing with the callback that subscribes to `audio`. + ## Use cases ### Voice Recognition diff --git a/respeaker_ros/launch/sample_respeaker.launch b/respeaker_ros/launch/sample_respeaker.launch index 31d083608..e2c43c557 100644 --- a/respeaker_ros/launch/sample_respeaker.launch +++ b/respeaker_ros/launch/sample_respeaker.launch @@ -13,6 +13,8 @@ + + + respawn="true" respawn_delay="10" > + @@ -30,6 +33,7 @@ + audio_info: $(arg audio_info) language: $(arg language) self_cancellation: $(arg self_cancellation) tts_tolerance: 0.5 diff --git a/respeaker_ros/package.xml b/respeaker_ros/package.xml index ac83b898a..be16789a2 100644 --- a/respeaker_ros/package.xml +++ b/respeaker_ros/package.xml @@ -17,6 +17,7 @@ flac geometry_msgs std_msgs + sound_play speech_recognition_msgs tf python-numpy diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index bf14ad478..6b351251c 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -16,6 +16,13 @@ import sys import time from audio_common_msgs.msg import AudioData +enable_audio_info = True +try: + from audio_common_msgs.msg import AudioInfo +except Exception as e: + rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.' + ' AudioInfo message will not be published.') + enable_audio_info = False from geometry_msgs.msg import PoseStamped from std_msgs.msg import Bool, Int32, ColorRGBA from dynamic_reconfigure.server import Server @@ -265,7 +272,6 @@ def __init__(self, on_audio, channel=0, suppress_error=True): if self.channels != 6: rospy.logwarn("%d channel is found for respeaker" % self.channels) rospy.logwarn("You may have to update firmware.") - self.channel = min(self.channels - 1, max(0, self.channel)) self.stream = self.pyaudio.open( input=True, start=False, @@ -295,9 +301,8 @@ def stream_callback(self, in_data, frame_count, time_info, status): data = np.frombuffer(in_data, dtype=np.int16) chunk_per_channel = int(len(data) / self.channels) data = np.reshape(data, (chunk_per_channel, self.channels)) - chan_data = data[:, self.channel] # invoke callback - self.on_audio(chan_data.tobytes()) + self.on_audio(data) return None, pyaudio.paContinue def start(self): @@ -333,14 +338,24 @@ def __init__(self): self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True) self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True) self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10) + if enable_audio_info is True: + self.pub_audio_info = rospy.Publisher("audio_info", AudioInfo, + queue_size=1, latch=True) + self.pub_audio_raw_info = rospy.Publisher("audio_info_raw", AudioInfo, + queue_size=1, latch=True) self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10) # init config self.config = None self.dyn_srv = Server(RespeakerConfig, self.on_config) # start self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error) + self.n_channel = self.respeaker_audio.channels + self.speech_prefetch_bytes = int( - self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0) + 1 + * self.speech_prefetch + * self.respeaker_audio.rate + * self.respeaker_audio.bitdepth / 8.0) self.speech_prefetch_buffer = b"" self.respeaker_audio.start() self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate), @@ -348,6 +363,58 @@ def __init__(self): self.timer_led = None self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led) + # processed audio for ASR + if enable_audio_info is True: + info_msg = AudioInfo( + channels=1, + sample_rate=self.respeaker_audio.rate, + sample_format='S16LE', + bitrate=self.respeaker_audio.rate * self.respeaker_audio.bitdepth, + coding_format='WAVE') + self.pub_audio_info.publish(info_msg) + + if self.n_channel > 1: + # The respeaker has 4 microphones. + # Multiple microphones can be used for + # beam forming (strengthening the sound in a specific direction) + # and sound localization (the respeaker outputs the azimuth + # direction, but the multichannel can estimate + # the elevation direction). etc. + + # Channel 0: processed audio for ASR + # Channel 1: mic1 raw data + # Channel 2: mic2 raw data + # Channel 3: mic3 raw data + # Channel 4: mic4 raw data + # Channel 5: merged playback + # For more detail, please see + # https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/ + # (self.n_channel - 2) = 4 channels are multiple microphones. + self.pub_audio_raw = rospy.Publisher("audio_raw", AudioData, + queue_size=10) + self.pub_audio_merged_playback = rospy.Publisher( + "audio_merged_playback", AudioData, + queue_size=10) + if enable_audio_info is True: + info_raw_msg = AudioInfo( + channels=self.n_channel - 2, + sample_rate=self.respeaker_audio.rate, + sample_format='S16LE', + bitrate=(self.respeaker_audio.rate * + self.respeaker_audio.bitdepth), + coding_format='WAVE') + self.pub_audio_raw_info.publish(info_raw_msg) + + self.speech_audio_raw_buffer = b"" + self.speech_raw_prefetch_buffer = b"" + self.pub_speech_audio_raw = rospy.Publisher( + "speech_audio_raw", AudioData, queue_size=10) + self.speech_raw_prefetch_bytes = int( + (self.n_channel - 2) + * self.speech_prefetch + * self.respeaker_audio.rate + * self.respeaker_audio.bitdepth / 8.0) + def on_shutdown(self): self.info_timer.shutdown() try: @@ -386,14 +453,30 @@ def on_status_led(self, msg): oneshot=True) def on_audio(self, data): - self.pub_audio.publish(AudioData(data=data)) + # take processed audio for ASR. + processed_data = data[:, 0].tobytes() + self.pub_audio.publish(AudioData(data=processed_data)) + if self.n_channel > 1: + raw_audio_data = data[:, 1:5].reshape(-1).tobytes() + self.pub_audio_raw.publish( + AudioData(data=raw_audio_data)) + self.pub_audio_merged_playback.publish( + AudioData(data=data[:, 5].tobytes())) if self.is_speeching: if len(self.speech_audio_buffer) == 0: self.speech_audio_buffer = self.speech_prefetch_buffer - self.speech_audio_buffer += data + if self.n_channel > 1: + self.speech_audio_raw_buffer = self.speech_raw_prefetch_buffer + self.speech_audio_buffer += processed_data + if self.n_channel > 1: + self.speech_audio_raw_buffer += raw_audio_data else: - self.speech_prefetch_buffer += data + self.speech_prefetch_buffer += processed_data self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:] + if self.n_channel > 1: + self.speech_raw_prefetch_buffer += raw_audio_data + self.speech_raw_prefetch_buffer = self.speech_raw_prefetch_buffer[ + -self.speech_raw_prefetch_bytes:] def on_timer(self, event): stamp = event.current_real or rospy.Time.now() @@ -433,13 +516,15 @@ def on_timer(self, event): elif self.is_speeching: buf = self.speech_audio_buffer self.speech_audio_buffer = b"" + buf_raw = self.speech_audio_raw_buffer + self.speech_audio_raw_buffer = b"" self.is_speeching = False duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth - duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth + duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth / self.n_channel rospy.loginfo("Speech detected for %.3f seconds" % duration) if self.speech_min_duration <= duration < self.speech_max_duration: - self.pub_speech_audio.publish(AudioData(data=buf)) + self.pub_speech_audio_raw.publish(AudioData(data=buf_raw)) if __name__ == '__main__': diff --git a/respeaker_ros/scripts/speech_to_text.py b/respeaker_ros/scripts/speech_to_text.py index 0974b2f65..6765e2e04 100644 --- a/respeaker_ros/scripts/speech_to_text.py +++ b/respeaker_ros/scripts/speech_to_text.py @@ -2,6 +2,10 @@ # -*- coding: utf-8 -*- # Author: Yuki Furuta +from __future__ import division + +import sys + import actionlib import rospy try: @@ -9,8 +13,16 @@ except ImportError as e: raise ImportError(str(e) + '\nplease try "pip install speechrecognition"') +import numpy as np from actionlib_msgs.msg import GoalStatus, GoalStatusArray from audio_common_msgs.msg import AudioData +enable_audio_info = True +try: + from audio_common_msgs.msg import AudioInfo +except Exception as e: + rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.' + ' AudioInfo message will not be published.') + enable_audio_info = False from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal from speech_recognition_msgs.msg import SpeechRecognitionCandidates @@ -18,8 +30,32 @@ class SpeechToText(object): def __init__(self): # format of input audio data - self.sample_rate = rospy.get_param("~sample_rate", 16000) - self.sample_width = rospy.get_param("~sample_width", 2) + audio_info_topic_name = rospy.get_param('~audio_info', '') + if len(audio_info_topic_name) > 0: + if enable_audio_info is False: + rospy.logerr( + 'audio_common_msgs/AudioInfo message is not exists.' + ' Giving ~audio_info is not valid in your environment.') + sys.exit(1) + rospy.loginfo('Extract audio info params from {}'.format( + audio_info_topic_name)) + audio_info_msg = rospy.wait_for_message( + audio_info_topic_name, AudioInfo) + self.sample_rate = audio_info_msg.sample_rate + self.sample_width = audio_info_msg.bitrate // self.sample_rate // 8 + self.channels = audio_info_msg.channels + else: + self.sample_rate = rospy.get_param("~sample_rate", 16000) + self.sample_width = rospy.get_param("~sample_width", 2) + self.channels = rospy.get_param("~channels", 1) + if self.sample_width == 2: + self.dtype = 'int16' + elif self.sample_width == 4: + self.dtype = 'int32' + else: + raise NotImplementedError('sample_width {} is not supported' + .format(self.sample_width)) + self.target_channel = rospy.get_param("~target_channel", 0) # language of STT service self.language = rospy.get_param("~language", "ja-JP") # ignore voice input while the robot is speaking @@ -78,7 +114,11 @@ def audio_cb(self, msg): if self.is_canceling: rospy.loginfo("Speech is cancelled") return - data = SR.AudioData(msg.data, self.sample_rate, self.sample_width) + + data = SR.AudioData( + np.frombuffer(msg.data, dtype=self.dtype)[ + self.target_channel::self.channels].tobytes(), + self.sample_rate, self.sample_width) try: rospy.loginfo("Waiting for result %d" % len(data.get_raw_data())) result = self.recognizer.recognize_google( diff --git a/respeaker_ros/test/sample_respeaker.test b/respeaker_ros/test/sample_respeaker.test index 5d51c220c..61f10fb7b 100644 --- a/respeaker_ros/test/sample_respeaker.test +++ b/respeaker_ros/test/sample_respeaker.test @@ -3,6 +3,7 @@ + diff --git a/rostwitter/CMakeLists.txt b/rostwitter/CMakeLists.txt index 39258afbd..82b81feae 100644 --- a/rostwitter/CMakeLists.txt +++ b/rostwitter/CMakeLists.txt @@ -40,7 +40,7 @@ else() ) endif() -install(DIRECTORY test resource +install(DIRECTORY test resource launch DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} USE_SOURCE_PERMISSIONS ) diff --git a/rostwitter/README.md b/rostwitter/README.md new file mode 100644 index 000000000..81750bf5b --- /dev/null +++ b/rostwitter/README.md @@ -0,0 +1,116 @@ +# rostwitter + +This package is a ROS wrapper for Twitter. You can tweet via ROS. + +# How to use + +## Get access key for API. + +Please get access to the Twitter API. Please refer to the following URL. + +https://developer.twitter.com/en/docs/twitter-api/getting-started/getting-access-to-the-twitter-api + +After that, save the yaml file in the following format. + +``` +CKEY: +CSECRET: +AKEY: +ASECRET: +``` + +## Launch tweet node + +``` +roslaunch rostwitter tweet.launch account_info:= +``` + +## Tweet text + +You can tweet by simply publish on the `/tweet` topic. + +``` +rostopic pub /tweet std_msgs/String "Hello. Tweet via rostwitter (https://github.com/jsk-ros-pkg/jsk_3rdparty)" +``` + +![](./doc/tweet-string.jpg) + +If the string to be tweeted exceeds 140 full-width characters or 280 half-width characters, it will be tweeted in the "thread" display. + +``` +rostopic pub /tweet std_msgs/String """The Zen of Python, by Tim Peters + +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those! +""" +``` + +![](./doc/tweet-string-thread.jpg) + +## Tweet text with image + +You can also tweet along with your images. + +If a base64 or image path is inserted in the text, it will jump to the next reply in that section. + +### Image path + +``` +wget https://github.com/k-okada.png -O /tmp/k-okada.png +rostopic pub /tweet std_msgs/String "/tmp/k-okada.png" +``` + +![](./doc/tweet-image-path.jpg) + +### Base64 + +You can even tweet the image by encoding in base64. The following example is in python. + +Do not concatenate multiple base64 images without spaces. + + +```python +import rospy +import cv2 +import std_msgs.msg +import numpy as np +import matplotlib.cm + +from rostwitter.cv_util import extract_media_from_text +from rostwitter.cv_util import encode_image_cv2 + +rospy.init_node('rostwitter_sample') +pub = rospy.Publisher('/tweet', std_msgs.msg.String, queue_size=1) +rospy.sleep(3.0) + +colormap = matplotlib.cm.get_cmap('hsv') + +text = 'Tweet with images. (https://github.com/jsk-ros-pkg/jsk_3rdparty/pull/375)\n' +N = 12 +for i in range(N): + text += str(i) + color = colormap(1.0 * i / N)[:3] + img = color * np.ones((10, 10, 3), dtype=np.uint8) * 255 + img = np.array(img, dtype=np.uint8) + text += encode_image_cv2(img) + ' ' +pub.publish(text) +``` + +[The result of the tweet.](https://twitter.com/pr2jsk/status/1561995909524705280) diff --git a/rostwitter/doc/tweet-image-path.jpg b/rostwitter/doc/tweet-image-path.jpg new file mode 100644 index 000000000..dffc9baec Binary files /dev/null and b/rostwitter/doc/tweet-image-path.jpg differ diff --git a/rostwitter/doc/tweet-string-thread.jpg b/rostwitter/doc/tweet-string-thread.jpg new file mode 100644 index 000000000..13783eaef Binary files /dev/null and b/rostwitter/doc/tweet-string-thread.jpg differ diff --git a/rostwitter/doc/tweet-string.jpg b/rostwitter/doc/tweet-string.jpg new file mode 100644 index 000000000..c41daa779 Binary files /dev/null and b/rostwitter/doc/tweet-string.jpg differ diff --git a/rostwitter/launch/tweet.launch b/rostwitter/launch/tweet.launch new file mode 100644 index 000000000..1d202a05e --- /dev/null +++ b/rostwitter/launch/tweet.launch @@ -0,0 +1,12 @@ + + + + + + + + + + diff --git a/rostwitter/python/rostwitter/cv_util.py b/rostwitter/python/rostwitter/cv_util.py new file mode 100644 index 000000000..ad284bc63 --- /dev/null +++ b/rostwitter/python/rostwitter/cv_util.py @@ -0,0 +1,80 @@ +import base64 +import imghdr +import os.path +import re + +import cv2 +import numpy as np +import rospy + + +base64_and_filepath_image_pattern = re.compile(r'((?:/9j/)(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)? ?|/\S+\.(?:jpeg|jpg|png|gif))') + + +def encode_image_cv2(img, quality=90): + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality] + result, encimg = cv2.imencode('.jpg', img, encode_param) + b64encoded = base64.b64encode(encimg).decode('ascii') + return b64encoded + + +def decode_image_cv2(b64encoded): + bin = b64encoded.split(",")[-1] + bin = base64.b64decode(bin) + bin = np.frombuffer(bin, np.uint8) + img = cv2.imdecode(bin, cv2.IMREAD_COLOR) + return img + + +def is_base64_image(b64encoded): + try: + decode_image_cv2(b64encoded) + except Exception as e: + rospy.logerr(str(e)) + return False + return True + + +def get_image_from_text(text): + if base64_and_filepath_image_pattern.match(text) is None: + return None + + if os.path.exists(text): + path = text + if imghdr.what(path) in ['jpeg', 'png', 'gif']: + with open(path, 'rb') as f: + return f.read() + else: + succ = is_base64_image(text) + if succ: + bin = text.split(",")[-1] + bin = base64.b64decode(bin) + bin = np.frombuffer(bin, np.uint8) + return bin + + +def extract_media_from_text(text): + texts = base64_and_filepath_image_pattern.split(text) + target_texts = list(filter(lambda x: x is not None and len(x.strip()) > 0, texts)) + + split_texts = [''] + imgs_list = [] + + texts = [] + imgs = [] + for text in target_texts: + img = get_image_from_text(text) + if img is None: + split_texts.append(text) + imgs_list.append(imgs) + imgs = [] + else: + imgs.append(img) + + if len(imgs) > 0: + imgs_list.append(imgs) + if len(split_texts) > 0: + if len(split_texts[0]) == 0 and len(imgs_list[0]) == 0: + split_texts = split_texts[1:] + imgs_list = imgs_list[1:] + return imgs_list, split_texts diff --git a/rostwitter/python/rostwitter/twitter.py b/rostwitter/python/rostwitter/twitter.py index cdb020e15..c56cf5289 100644 --- a/rostwitter/python/rostwitter/twitter.py +++ b/rostwitter/python/rostwitter/twitter.py @@ -1,16 +1,20 @@ # originally from https://raw.githubusercontent.com/bear/python-twitter/v1.1/twitter.py # NOQA +import math import json as simplejson import requests -from requests_oauthlib import OAuth1 -# https://stackoverflow.com/questions/11914472/stringio-in-python3 try: - from StringIO import StringIO ## for Python 2 + from itertools import zip_longest except ImportError: - from io import StringIO ## for Python 3 + from itertools import izip_longest as zip_longest +from requests_oauthlib import OAuth1 import rospy +from rostwitter.util import count_tweet_text +from rostwitter.util import split_tweet_text +from rostwitter.cv_util import extract_media_from_text + class Twitter(object): def __init__( @@ -54,24 +58,80 @@ def _request_url(self, url, verb, data=None): ) return 0 # if not a POST or GET request - def post_update(self, status): - if len(status) > 140: - rospy.logwarn('tweet is too longer > 140 characters') - status = status[:140] - url = 'https://api.twitter.com/1.1/statuses/update.json' - data = {'status': StringIO(status)} - json = self._request_url(url, 'POST', data=data) - data = simplejson.loads(json.content) + def _check_post_request(self, request): + valid = True + data = simplejson.loads(request.content) + if request.status_code != 200: + rospy.logwarn('post tweet failed. status_code: {}' + .format(request.status_code)) + if 'errors' in data: + for error in data['errors']: + rospy.logwarn('Tweet error code: {}, message: {}' + .format(error['code'], error['message'])) + valid = False + if valid: + return data + + def _post_update_with_reply(self, texts, media_list=None, + in_reply_to_status_id=None): + split_media_list = [] + media_list = media_list or [] + for i in range(0, int(math.ceil(len(media_list) / 4.0))): + split_media_list.append(media_list[i * 4:(i + 1) * 4]) + for text, media_list in zip_longest(texts, split_media_list): + text = text or '' + media_list = media_list or [] + url = 'https://api.twitter.com/1.1/statuses/update.json' + data = {'status': text} + media_ids = self._upload_media(media_list) + if len(media_ids) > 0: + data['media_ids'] = media_ids + if in_reply_to_status_id is not None: + data['in_reply_to_status_id'] = in_reply_to_status_id + r = self._request_url(url, 'POST', data=data) + data = self._check_post_request(r) + if data is not None: + in_reply_to_status_id = data['id'] + return data + + def _upload_media(self, media_list): + url = 'https://upload.twitter.com/1.1/media/upload.json' + media_ids = [] + for media in media_list: + data = {'media': media} + r = self._request_url(url, 'POST', data=data) + if r.status_code == 200: + rospy.loginfo('upload media success') + media_ids.append(str(r.json()['media_id'])) + else: + rospy.logerr('upload media failed. status_code: {}' + .format(r.status_code)) + media_ids = ','.join(media_ids) + return media_ids + + def post_update(self, status, in_reply_to_status_id=None): + media_list, status_list = extract_media_from_text(status) + for text, mlist in zip_longest(status_list, media_list): + text = text or '' + texts = split_tweet_text(text) + data = self._post_update_with_reply( + texts, + media_list=mlist, + in_reply_to_status_id=in_reply_to_status_id) + if data is not None: + in_reply_to_status_id = data['id'] return data - def post_media(self, status, media): - # 116 = 140 - len("http://t.co/ssssssssss") - if len(status) > 116: - rospy.logwarn('tweet wit media is too longer > 116 characters') - status = status[:116] + def post_media(self, status, media, in_reply_to_status_id=None): + texts = split_tweet_text(status) + status = texts[0] url = 'https://api.twitter.com/1.1/statuses/update_with_media.json' - data = {'status': StringIO(status)} + data = {'status': status} data['media'] = open(str(media), 'rb').read() - json = self._request_url(url, 'POST', data=data) - data = simplejson.loads(json.content) + r = self._request_url(url, 'POST', data=data) + data = self._check_post_request(r) + if len(texts) > 1: + data = self._post_update_with_reply( + texts[1:], + in_reply_to_status_id=data['id']) return data diff --git a/rostwitter/python/rostwitter/util.py b/rostwitter/python/rostwitter/util.py index 36a613b46..f5e51471c 100644 --- a/rostwitter/python/rostwitter/util.py +++ b/rostwitter/python/rostwitter/util.py @@ -1,4 +1,6 @@ import os +import sys +import unicodedata import yaml import rospy @@ -16,9 +18,47 @@ def load_oauth_settings(yaml_path): rospy.logerr("EOF") return None, None, None, None with open(yaml_path, 'r') as f: - key = yaml.load(f) + key = yaml.load(f, Loader=yaml.SafeLoader) ckey = key['CKEY'] csecret = key['CSECRET'] akey = key['AKEY'] asecret = key['ASECRET'] return ckey, csecret, akey, asecret + + +def count_tweet_text(text): + count = 0 + if sys.version_info.major <= 2: + text = text.decode('utf-8') + for c in text: + if unicodedata.east_asian_width(c) in 'FWA': + count += 2 + else: + count += 1 + return count + + +def split_tweet_text(text, length=280): + texts = [] + split_text = '' + count = 0 + if sys.version_info.major <= 2: + text = text.decode('utf-8') + for c in text: + if count == 281: + # last word is zenkaku. + texts.append(split_text[:-1]) + split_text = split_text[-1:] + count = 2 + elif count == 280: + texts.append(split_text) + split_text = '' + count = 0 + split_text += c + if unicodedata.east_asian_width(c) in 'FWA': + count += 2 + else: + count += 1 + if count != 0: + texts.append(split_text) + return texts diff --git a/rostwitter/scripts/tweet.py b/rostwitter/scripts/tweet.py index d4b666959..50c44cf48 100755 --- a/rostwitter/scripts/tweet.py +++ b/rostwitter/scripts/tweet.py @@ -32,29 +32,9 @@ def tweet_cb(self, msg): rospy.loginfo(rospy.get_name() + " sending %s", ''.join([message] if len(message) < 128 else message[0:128]+'......')) - # search word start from / and end with {.jpeg,.jpg,.png,.gif} - m = re.search('/\S+\.(jpeg|jpg|png|gif)', message) - ret = None - if m: - filename = m.group(0) - message = re.sub(filename, "", message) - if os.path.exists(filename): - rospy.loginfo( - rospy.get_name() + " tweet %s with file %s", - message, filename) - # 140 - len("http://t.co/ssssssssss") - ret = self.api.post_media(message[0:116], filename) - if 'errors' in ret: - rospy.logerr('Failed to post: {}'.format(ret)) - # ret = self.api.post_update(message) - else: - rospy.logerr(rospy.get_name() + " %s could not find", filename) - else: - ret = self.api.post_update(message[0:140]) - if 'errors' in ret: - rospy.logerr('Failed to post: {}'.format(ret)) - # seg faults if message is longer than 140 byte ??? - rospy.loginfo(rospy.get_name() + " receiving %s", ret) + ret = self.api.post_update(message) + if ret is not None: + rospy.loginfo(rospy.get_name() + " receiving %s", ret) if __name__ == '__main__':