Skip to content

Commit

Permalink
✨ tts、stt
Browse files Browse the repository at this point in the history
  • Loading branch information
twelvet-s committed Dec 20, 2024
1 parent 5ae31a7 commit f2bb9e9
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ target/
*.ipr

# log
/logs
logs

### NetBeans ###
nbproject/private/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package com.twelvet.server.ai.controller;

import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisResponse;
import com.twelvet.api.ai.domain.dto.MessageDTO;
import com.twelvet.api.ai.domain.vo.MessageVO;
import com.twelvet.server.ai.service.AIChatService;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Flux;

Expand All @@ -34,7 +37,7 @@ public class AIChatController {
@Operation(summary = "回答用户问题")
@PreAuthorize("@role.hasPermi('ai:chat')")
@PostMapping(produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<MessageVO> chatStream(@RequestBody MessageDTO messageDTO) {
public Flux<MessageVO> chatStream(@Validated @RequestBody MessageDTO messageDTO) {
return aiChatService.chatStream(messageDTO);
}

Expand All @@ -46,7 +49,7 @@ public Flux<MessageVO> chatStream(@RequestBody MessageDTO messageDTO) {
@Operation(summary = "回答用户问题")
@PreAuthorize("@role.hasPermi('ai:chat')")
@PostMapping("/tts")
public Flux<MessageVO> tts(@RequestBody MessageDTO messageDTO) {
public SpeechSynthesisResponse tts(@RequestBody MessageDTO messageDTO) {
return aiChatService.tts(messageDTO);
}

Expand All @@ -58,7 +61,7 @@ public Flux<MessageVO> tts(@RequestBody MessageDTO messageDTO) {
@Operation(summary = "回答用户问题")
@PreAuthorize("@role.hasPermi('ai:chat')")
@PostMapping("/stt")
public Flux<MessageVO> stt(@RequestBody MessageDTO messageDTO) {
public AudioTranscriptionResponse stt(@RequestBody MessageDTO messageDTO) {
return aiChatService.stt(messageDTO);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package com.twelvet.server.ai.service;

import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisResponse;
import com.twelvet.api.ai.domain.dto.MessageDTO;
import com.twelvet.api.ai.domain.vo.MessageVO;
import com.twelvet.server.ai.fun.vo.ActorsFilms;
import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
import reactor.core.publisher.Flux;

import java.util.List;

/**
* AI助手服务
*
Expand All @@ -28,13 +27,13 @@ public interface AIChatService {
* @param messageDTO MessageDTO
* @return 流式数据返回
*/
Flux<MessageVO> tts(MessageDTO messageDTO);
SpeechSynthesisResponse tts(MessageDTO messageDTO);

/**
* stt语音转文字
* @param messageDTO MessageDTO
* @return 流式数据返回
*/
Flux<MessageVO> stt(MessageDTO messageDTO);
AudioTranscriptionResponse stt(MessageDTO messageDTO);

}
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
package com.twelvet.server.ai.service.impl;

import cn.hutool.core.collection.CollectionUtil;
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioSpeechOptions;
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioTranscriptionOptions;
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisModel;
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisPrompt;
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisResponse;
import com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel;
import com.alibaba.cloud.ai.dashscope.chat.DashScopeChatModel;
import org.springframework.core.io.UrlResource;
import com.baomidou.dynamic.datasource.toolkit.DynamicDataSourceContextHolder;
import com.github.yitter.idgen.YitIdHelper;
import com.twelvet.api.ai.constant.RAGEnums;
Expand All @@ -24,8 +29,11 @@
import com.twelvet.server.ai.mapper.AiModelMapper;
import com.twelvet.server.ai.service.AIChatService;
import com.twelvet.server.ai.service.IAiChatHistoryService;
import lombok.SneakyThrows;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.messages.AssistantMessage;
import org.springframework.ai.chat.messages.Message;
Expand Down Expand Up @@ -69,14 +77,25 @@ public class AIChatServiceImpl implements AIChatService {

private final IAiChatHistoryService aiChatHistoryService;

private final SpeechSynthesisModel speechSynthesisModel;

private final AudioTranscriptionModel transcriptionModel;

/**
* stt音频地址
*/
private static final String AUDIO_RESOURCES_URL = "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav";

public AIChatServiceImpl(DashScopeChatModel dashScopeChatModel, VectorStore vectorStore,
AiModelMapper aiModelMapper, AiDocSliceMapper aiDocSliceMapper,
IAiChatHistoryService aiChatHistoryService) {
AiModelMapper aiModelMapper, AiDocSliceMapper aiDocSliceMapper, IAiChatHistoryService aiChatHistoryService,
SpeechSynthesisModel speechSynthesisModel, AudioTranscriptionModel transcriptionModel) {
this.dashScopeChatModel = dashScopeChatModel;
this.vectorStore = vectorStore;
this.aiModelMapper = aiModelMapper;
this.aiDocSliceMapper = aiDocSliceMapper;
this.aiChatHistoryService = aiChatHistoryService;
this.speechSynthesisModel = speechSynthesisModel;
this.transcriptionModel = transcriptionModel;
}

/**
Expand Down Expand Up @@ -277,23 +296,26 @@ else if (RAGEnums.UserTypeEnums.AI.equals(createByType)) {
* @return 流式数据返回
*/
@Override
public Flux<MessageVO> tts(MessageDTO messageDTO) {
DashScopeAudioSpeechOptions dashScopeAudioSpeechOptions = new DashScopeAudioSpeechOptions.Builder()
.withModel("cosyvoice-v1")
.withText(messageDTO.getContent())
.build();
public SpeechSynthesisResponse tts(MessageDTO messageDTO) {
SpeechSynthesisResponse response = speechSynthesisModel
.call(new SpeechSynthesisPrompt(messageDTO.getContent()));

return null;
return response;
}

/**
* stt语音转文字
* @param messageDTO MessageDTO
* @return 流式数据返回
*/
@SneakyThrows
@Override
public Flux<MessageVO> stt(MessageDTO messageDTO) {
return null;
public AudioTranscriptionResponse stt(MessageDTO messageDTO) {
AudioTranscriptionResponse response = transcriptionModel
.call(new AudioTranscriptionPrompt(new UrlResource(AUDIO_RESOURCES_URL),
DashScopeAudioTranscriptionOptions.builder().withModel("sensevoice-v1").build()));

return response;
}

}
4 changes: 2 additions & 2 deletions twelvet/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@
<idempotent-spring-boot-starter.version>0.4.0</idempotent-spring-boot-starter.version>
<sensitive-word.version>0.22.0</sensitive-word.version>

<spring-ai-bom.version>1.0.0-M3</spring-ai-bom.version>
<ai-alibaba.version>1.0.0-M3.1</ai-alibaba.version>
<spring-ai-bom.version>1.0.0-M4</spring-ai-bom.version>
<ai-alibaba.version>1.0.0-M3.2</ai-alibaba.version>

<docker.plugin.version>0.32.0</docker.plugin.version>
<spring.checkstyle.plugin>0.0.39</spring.checkstyle.plugin>
Expand Down

0 comments on commit f2bb9e9

Please sign in to comment.