Commit de9fb195 by lijinqi

TTS websocket 测试提交

parent 8ed3fd93
...@@ -21,6 +21,9 @@ public class TenantContextWebFilter extends OncePerRequestFilter { ...@@ -21,6 +21,9 @@ public class TenantContextWebFilter extends OncePerRequestFilter {
@Override @Override
protected void doFilterInternal(HttpServletRequest request, HttpServletResponse response, FilterChain chain) protected void doFilterInternal(HttpServletRequest request, HttpServletResponse response, FilterChain chain)
throws ServletException, IOException { throws ServletException, IOException {
if (request.getRequestURI().startsWith("/ws")) {
TenantContextHolder.setIgnore(true);
}
// 设置 // 设置
Long tenantId = WebFrameworkUtils.getTenantId(request); Long tenantId = WebFrameworkUtils.getTenantId(request);
if (tenantId != null) { if (tenantId != null) {
......
...@@ -38,6 +38,8 @@ ...@@ -38,6 +38,8 @@
<artifactId>spring-boot-starter-websocket</artifactId> <artifactId>spring-boot-starter-websocket</artifactId>
</dependency> </dependency>
<!-- 消息队列相关 --> <!-- 消息队列相关 -->
<dependency> <dependency>
<groupId>com.luhu</groupId> <groupId>com.luhu</groupId>
...@@ -70,4 +72,4 @@ ...@@ -70,4 +72,4 @@
</dependency> </dependency>
</dependencies> </dependencies>
</project> </project>
\ No newline at end of file
...@@ -29,6 +29,6 @@ public class WebSocketProperties { ...@@ -29,6 +29,6 @@ public class WebSocketProperties {
* 可选值:local、redis、rocketmq、kafka、rabbitmq * 可选值:local、redis、rocketmq、kafka、rabbitmq
*/ */
@NotNull(message = "WebSocket 的消息发送者不能为空") @NotNull(message = "WebSocket 的消息发送者不能为空")
private String senderType = "local"; private String senderType = "redis";
} }
...@@ -27,6 +27,12 @@ ...@@ -27,6 +27,12 @@
<artifactId>spring-boot-starter-webflux</artifactId> <artifactId>spring-boot-starter-webflux</artifactId>
</dependency> </dependency>
<!-- WebSocket server side (for TTS streaming endpoint) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-websocket</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId> <artifactId>spring-boot-starter-validation</artifactId>
...@@ -80,5 +86,17 @@ ...@@ -80,5 +86,17 @@
<artifactId>computility-module-apihub-api</artifactId> <artifactId>computility-module-apihub-api</artifactId>
<version>${revision}</version> <version>${revision}</version>
</dependency> </dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>dashscope-sdk-java</artifactId>
<version>2.22.7</version>
</dependency>
<dependency>
<groupId>com.openai</groupId>
<artifactId>openai-java</artifactId>
<version>3.5.0</version>
</dependency>
</dependencies> </dependencies>
</project> </project>
package com.luhu.computility.module.external.test;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Base64;
/**
* @version 1.0
* @Author ljq
* @Date 2026/3/30
* @注释
*/
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
public class CreateVoice {
public static void main(String[] args) {
CreateVoice example = new CreateVoice();
example.createVoice();
}
public void createVoice() {
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
// 若没有配置环境变量,请用百炼API Key将下行替换为:String apiKey = "sk-xxx"
String apiKey = System.getenv("DASHSCOPE_API_KEY");
// 创建JSON请求体字符串
String jsonBody = "{\n" +
" \"model\": \"qwen-voice-design\",\n" +
" \"input\": {\n" +
" \"action\": \"create\",\n" +
" \"target_model\": \"qwen3-tts-vd-realtime-2026-01-15\",\n" +
" \"voice_prompt\": \"和理学大师朱熹的声音一样,大概在70岁左右说古文。古代老夫子理学思想阐述、书院授课、古籍朗读等场景,声音低沉,不怒自威。\",\n" +
" \"preview_text\": \"吾名朱熹,字元晦,号晦庵,别称紫阳先生,南宋著名理学家、思想家、教育家,闽学派的代表人物,世称“朱子”。是中国封建社会后期影响最大的哲学家和思想家之一,其学说对后世影响深远,特别是在元、明、清三代,朱熹的理学被确立为官方哲学。\",\n" +
" \"preferred_name\": \"announcer\",\n" +
" \"language\": \"zh\"\n" +
" },\n" +
" \"parameters\": {\n" +
" \"sample_rate\": 24000,\n" +
" \"response_format\": \"wav\"\n" +
" }\n" +
"}";
HttpURLConnection connection = null;
try {
// 以下为北京地域url,若使用新加坡地域的模型,需将url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/audio/tts/customization
URL url = new URL("https://dashscope.aliyuncs.com/api/v1/services/audio/tts/customization");
connection = (HttpURLConnection) url.openConnection();
// 设置请求方法和头部
connection.setRequestMethod("POST");
connection.setRequestProperty("Authorization", "Bearer " + "sk-48cdaf52853147ea8cb1799cc87fefa5");
connection.setRequestProperty("Content-Type", "application/json");
connection.setDoOutput(true);
connection.setDoInput(true);
// 发送请求体
try (OutputStream os = connection.getOutputStream()) {
byte[] input = jsonBody.getBytes("UTF-8");
os.write(input, 0, input.length);
os.flush();
}
// 获取响应
int responseCode = connection.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) {
// 读取响应内容
StringBuilder response = new StringBuilder();
try (BufferedReader br = new BufferedReader(
new InputStreamReader(connection.getInputStream(), "UTF-8"))) {
String responseLine;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
}
// 解析JSON响应
JsonObject jsonResponse = JsonParser.parseString(response.toString()).getAsJsonObject();
JsonObject outputObj = jsonResponse.getAsJsonObject("output");
JsonObject previewAudioObj = outputObj.getAsJsonObject("preview_audio");
// 获取音色名称
String voiceName = outputObj.get("voice").getAsString();
System.out.println("音色名称: " + voiceName);
// 获取Base64编码的音频数据
String base64Audio = previewAudioObj.get("data").getAsString();
// 解码Base64音频数据
byte[] audioBytes = Base64.getDecoder().decode(base64Audio);
// 保存音频到本地文件
String filename = voiceName + "_preview.wav";
saveAudioToFile(audioBytes, filename);
System.out.println("音频已保存到本地文件: " + filename);
} else {
// 读取错误响应
StringBuilder errorResponse = new StringBuilder();
try (BufferedReader br = new BufferedReader(
new InputStreamReader(connection.getErrorStream(), "UTF-8"))) {
String responseLine;
while ((responseLine = br.readLine()) != null) {
errorResponse.append(responseLine.trim());
}
}
System.out.println("请求失败,状态码: " + responseCode);
System.out.println("错误响应: " + errorResponse.toString());
}
} catch (Exception e) {
System.err.println("请求发生错误: " + e.getMessage());
e.printStackTrace();
} finally {
if (connection != null) {
connection.disconnect();
}
}
}
private void saveAudioToFile(byte[] audioBytes, String filename) {
try {
File file = new File(filename);
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(audioBytes);
}
System.out.println("音频已保存到: " + file.getAbsolutePath());
} catch (IOException e) {
System.err.println("保存音频文件时发生错误: " + e.getMessage());
e.printStackTrace();
}
}
}
package com.luhu.computility.module.external.test;
/**
* @version 1.0
* @Author ljq
* @Date 2026/3/30
* @注释
*/
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class GetVoiceList {
public static void main(String[] args) {
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
// 若没有配置环境变量,请用百炼API Key将下行替换为:String apiKey = "sk-xxx"
String apiKey = System.getenv("");
// 以下为北京地域url,若使用新加坡地域的模型,需将url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/audio/tts/customization
String apiUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/customization";
// JSON 请求体(旧版本 Java 无 """ 多行字符串)
String jsonPayload =
"{"
+ "\"model\": \"qwen-voice-design\"," // 不要修改该值
+ "\"input\": {"
+ "\"action\": \"list\","
+ "\"page_size\": 10,"
+ "\"page_index\": 0"
+ "}"
+ "}";
try {
HttpURLConnection con = (HttpURLConnection) new URL(apiUrl).openConnection();
con.setRequestMethod("POST");
con.setRequestProperty("Authorization", "Bearer " + "sk-48cdaf52853147ea8cb1799cc87fefa5");
con.setRequestProperty("Content-Type", "application/json");
con.setDoOutput(true);
try (OutputStream os = con.getOutputStream()) {
os.write(jsonPayload.getBytes("UTF-8"));
}
int status = con.getResponseCode();
BufferedReader br = new BufferedReader(new InputStreamReader(
status >= 200 && status < 300 ? con.getInputStream() : con.getErrorStream(), "UTF-8"));
StringBuilder response = new StringBuilder();
String line;
while ((line = br.readLine()) != null) {
response.append(line);
}
br.close();
System.out.println("HTTP 状态码: " + status);
System.out.println("返回 JSON: " + response.toString());
if (status == 200) {
Gson gson = new Gson();
JsonObject jsonObj = gson.fromJson(response.toString(), JsonObject.class);
JsonArray voiceList = jsonObj.getAsJsonObject("output").getAsJsonArray("voice_list");
System.out.println("\n 查询到的音色列表:");
for (int i = 0; i < voiceList.size(); i++) {
JsonObject voiceItem = voiceList.get(i).getAsJsonObject();
String voice = voiceItem.get("voice").getAsString();
String gmtCreate = voiceItem.get("gmt_create").getAsString();
String targetModel = voiceItem.get("target_model").getAsString();
System.out.printf("- 音色: %s 创建时间: %s 模型: %s\n",
voice, gmtCreate, targetModel);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
package com.luhu.computility.module.external.test;
/**
* @version 1.0
* @Author ljq
* @Date 2026/3/30
* @注释
*/
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.utils.Constants;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
public class LibaiV2NoStream {
// 模型
private static String model = "cosyvoice-v2";
// 音色
private static String voice = "libai_v2";
public static void streamAudioDataToSpeaker() {
// 请求参数
SpeechSynthesisParam param =
SpeechSynthesisParam.builder()
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
.apiKey("sk-48cdaf52853147ea8cb1799cc87fefa5")
.model(model) // 模型
.voice(voice) // 音色
.build();
// 同步模式:禁用回调(第二个参数为null)
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null);
ByteBuffer audio = null;
try {
// 阻塞直至音频返回
audio = synthesizer.call("今天天气怎么样?");
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
// 任务结束关闭websocket连接
synthesizer.getDuplexApi().close(1000, "bye");
}
if (audio != null) {
// 将音频数据保存到本地文件“output.mp3”中
File file = new File("output.mp3");
// 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时
System.out.println(
"[Metric] requestId为:"
+ synthesizer.getLastRequestId()
+ "首包延迟(毫秒)为:"
+ synthesizer.getFirstPackageDelay());
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(audio.array());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
public static void main(String[] args) {
// 以下为北京地域url,若使用新加坡地域的模型,需将url替换为:wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference
Constants.baseWebsocketApiUrl = "wss://dashscope.aliyuncs.com/api-ws/v1/inference";
streamAudioDataToSpeaker();
System.exit(0);
}
}
package com.luhu.computility.module.external.test;
/**
* @version 1.0
* @Author ljq
* @Date 2026/3/30
* @注释
*/
import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.common.ResultCallback;
import com.alibaba.dashscope.utils.Constants;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Date;
import java.util.concurrent.CountDownLatch;
import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.common.ResultCallback;
import com.alibaba.dashscope.utils.Constants;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.concurrent.CountDownLatch;
class TimeUtils {
private static final DateTimeFormatter formatter =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS");
public static String getTimestamp() {
return LocalDateTime.now().format(formatter);
}
}
public class LibaiV2SimpleStream {
// 模型
private static String model = "cosyvoice-v2";
// 音色
private static String voice = "libai_v2";
public static void streamAudioDataToSpeaker() {
CountDownLatch latch = new CountDownLatch(1);
ResultCallback<SpeechSynthesisResult> callback = new ResultCallback<SpeechSynthesisResult>() {
private FileOutputStream audioFileOutputStream;
private String audioFilePath;
{
// 初始化:创建音频文件保存路径
try {
String audioDir = System.getProperty("user.dir") + "/generated_audio/";
File dir = new File(audioDir);
if (!dir.exists()) {
dir.mkdirs();
}
String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss_SSS").format(new Date());
audioFilePath = audioDir + "tts_audio_" + timestamp + ".pcm";
audioFileOutputStream = new FileOutputStream(audioFilePath);
System.out.println(TimeUtils.getTimestamp() + " 音频文件将保存到: " + audioFilePath);
} catch (FileNotFoundException e) {
System.err.println("创建音频文件失败: " + e.getMessage());
}
}
@Override
public void onEvent(SpeechSynthesisResult result) {
if (result.getAudioFrame() != null) {
// 正确方式:从 ByteBuffer 中获取字节数组
ByteBuffer audioBuffer = result.getAudioFrame();
byte[] audioData = new byte[audioBuffer.remaining()];
audioBuffer.get(audioData);
try {
audioFileOutputStream.write(audioData);
audioFileOutputStream.flush();
System.out.println(TimeUtils.getTimestamp() + " 收到音频,大小: " + audioData.length + " bytes");
} catch (IOException e) {
System.err.println("保存音频数据失败: " + e.getMessage());
}
}
}
@Override
public void onComplete() {
System.out.println(TimeUtils.getTimestamp() + " 收到Complete,语音合成结束");
if (audioFileOutputStream != null) {
try {
audioFileOutputStream.close();
System.out.println(TimeUtils.getTimestamp() + " 音频文件已保存完成: " + audioFilePath);
} catch (IOException e) {
System.err.println("关闭文件流失败: " + e.getMessage());
}
}
latch.countDown();
}
@Override
public void onError(Exception e) {
System.out.println("出现异常:" + e.toString());
if (audioFileOutputStream != null) {
try {
audioFileOutputStream.close();
} catch (IOException ioException) {
System.err.println("关闭文件流失败: " + ioException.getMessage());
}
}
latch.countDown();
}
};
// 请求参数
SpeechSynthesisParam param =
SpeechSynthesisParam.builder()
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
.apiKey("sk-48cdaf52853147ea8cb1799cc87fefa5")
.model(model) // 模型
.voice(voice) // 音色
.build();
// 第二个参数“callback”传入回调即启用异步模式
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback);
// 非阻塞调用,立即返回null(实际结果通过回调接口异步传递),在回调接口的onEvent方法中实时获取二进制音频
try {
synthesizer.call("吾名朱熹,字元晦,号晦庵,别称紫阳先生,南宋著名理学家、思想家、教育家,闽学派的代表人物,世称“朱子”。是中国封建社会后期影响最大的哲学家和思想家之一,其学说对后世影响深远,特别是在元、明、清三代,朱熹的理学被确立为官方哲学。");
// 等待合成完成
latch.await();
// 等待播放线程全部播放完
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
// 任务结束后关闭websocket连接
synthesizer.getDuplexApi().close(1000, "bye");
}
// 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时
System.out.println(
"[Metric] requestId为:"
+ synthesizer.getLastRequestId()
+ ",首包延迟(毫秒)为:"
+ synthesizer.getFirstPackageDelay());
}
public static void main(String[] args) {
// 以下为北京地域url,若使用新加坡地域的模型,需将url替换为:wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference
Constants.baseWebsocketApiUrl = "wss://dashscope.aliyuncs.com/api-ws/v1/inference";
streamAudioDataToSpeaker();
System.exit(0);
}
}
package com.luhu.computility.module.external.test;
import com.alibaba.dashscope.audio.qwen_tts_realtime.*;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.google.gson.JsonObject;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.SourceDataLine;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.AudioSystem;
import java.io.*;
import java.util.Base64;
import java.util.Queue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
public class TestVoice {
static String[] textToSynthesize = {
"吾名朱熹,字元晦,号晦庵,“," ,
"别称紫阳先生,南宋著名理学家、思想家、教育家",
"闽学派的代表人物,世称“朱子”。",
"是中国封建社会后期影响最大的哲学家和思想家之一,",
"其学说对后世影响深远,特别是在元、明、清三代,朱熹的理学被确立为官方哲学。"
};
public static QwenTtsRealtimeAudioFormat ttsFormat = QwenTtsRealtimeAudioFormat.PCM_24000HZ_MONO_16BIT;
// 实时PCM音频播放器类
public static class RealtimePcmPlayer {
private int sampleRate;
private SourceDataLine line;
private AudioFormat audioFormat;
private Thread decoderThread;
private Thread playerThread;
private AtomicBoolean stopped = new AtomicBoolean(false);
private Queue<String> b64AudioBuffer = new ConcurrentLinkedQueue<>();
private Queue<byte[]> RawAudioBuffer = new ConcurrentLinkedQueue<>();
private ByteArrayOutputStream totalAudioStream = new ByteArrayOutputStream();
// 构造函数初始化音频格式和音频线路
public RealtimePcmPlayer(int sampleRate) throws LineUnavailableException {
this.sampleRate = sampleRate;
this.audioFormat = new AudioFormat(this.sampleRate, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(SourceDataLine.class, audioFormat);
line = (SourceDataLine) AudioSystem.getLine(info);
line.open(audioFormat);
line.start();
decoderThread = new Thread(new Runnable() {
@Override
public void run() {
while (!stopped.get()) {
String b64Audio = b64AudioBuffer.poll();
if (b64Audio != null) {
byte[] rawAudio = Base64.getDecoder().decode(b64Audio);
RawAudioBuffer.add(rawAudio);
// 将音频数据写入 totalAudioStream
try {
totalAudioStream.write(rawAudio);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
}
});
playerThread = new Thread(new Runnable() {
@Override
public void run() {
while (!stopped.get()) {
byte[] rawAudio = RawAudioBuffer.poll();
if (rawAudio != null) {
try {
playChunk(rawAudio);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
} else {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
}
});
decoderThread.start();
playerThread.start();
}
// 播放一个音频块并阻塞直到播放完成
private void playChunk(byte[] chunk) throws IOException, InterruptedException {
if (chunk == null || chunk.length == 0) return;
int bytesWritten = 0;
while (bytesWritten < chunk.length) {
bytesWritten += line.write(chunk, bytesWritten, chunk.length - bytesWritten);
}
int audioLength = chunk.length / (this.sampleRate*2/1000);
// 等待缓冲区中的音频播放完成
Thread.sleep(audioLength - 10);
}
public void write(String b64Audio) {
b64AudioBuffer.add(b64Audio);
}
public void cancel() {
b64AudioBuffer.clear();
RawAudioBuffer.clear();
}
public void waitForComplete() throws InterruptedException {
while (!b64AudioBuffer.isEmpty() || !RawAudioBuffer.isEmpty()) {
Thread.sleep(100);
}
line.drain();
}
public void shutdown() throws InterruptedException, IOException {
stopped.set(true);
decoderThread.join();
playerThread.join();
// 保存完整音频文件
File file = new File("TotalAudio_"+ttsFormat.getSampleRate()+"."+ttsFormat.getFormat());
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(totalAudioStream.toByteArray());
}
if (line != null && line.isRunning()) {
line.drain();
line.close();
}
}
}
public static void main(String[] args) throws InterruptedException, LineUnavailableException, IOException {
QwenTtsRealtimeParam param = QwenTtsRealtimeParam.builder()
// 如需使用指令控制功能,请将model替换为qwen3-tts-instruct-flash-realtime
.model("qwen3-tts-vd-realtime-2026-01-15")
// 以下为北京地域url,若使用新加坡地域的模型,需将url替换为:wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime
.url("wss://dashscope.aliyuncs.com/api-ws/v1/realtime")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apikey("sk-48cdaf52853147ea8cb1799cc87fefa5")
.build();
AtomicReference<CountDownLatch> completeLatch = new AtomicReference<>(new CountDownLatch(1));
final AtomicReference<QwenTtsRealtime> qwenTtsRef = new AtomicReference<>(null);
// 创建实时音频播放器实例
RealtimePcmPlayer audioPlayer = new RealtimePcmPlayer(24000);
QwenTtsRealtime qwenTtsRealtime = new QwenTtsRealtime(param, new QwenTtsRealtimeCallback() {
@Override
public void onOpen() {
// 连接建立时的处理
}
@Override
public void onEvent(JsonObject message) {
String type = message.get("type").getAsString();
switch(type) {
case "session.created":
// 会话创建时的处理
if (message.has("session")) {
String eventId = message.get("event_id").getAsString();
String sessionId = message.get("session").getAsJsonObject().get("id").getAsString();
System.out.println("[onEvent] session.created, session_id: "
+ sessionId + ", event_id: " + eventId);
}
break;
case "response.audio.delta":
String recvAudioB64 = message.get("delta").getAsString();
// 实时播放音频
audioPlayer.write(recvAudioB64);
break;
case "response.done":
// 响应完成时的处理
break;
case "session.finished":
// 会话结束时的处理
completeLatch.get().countDown();
default:
break;
}
}
@Override
public void onClose(int code, String reason) {
// 连接关闭时的处理
}
});
qwenTtsRef.set(qwenTtsRealtime);
try {
qwenTtsRealtime.connect();
} catch (NoApiKeyException e) {
throw new RuntimeException(e);
}
QwenTtsRealtimeConfig config = QwenTtsRealtimeConfig.builder()
.voice("qwen-tts-vd-announcer-voice-20260330204711322-1e5e")
.responseFormat(ttsFormat)
.mode("server_commit")
// 如需使用指令控制功能,请取消下方注释,并将model替换为qwen3-tts-instruct-flash-realtime
// .instructions("")
// .optimizeInstructions(true)
.build();
qwenTtsRealtime.updateSession(config);
for (String text:textToSynthesize) {
qwenTtsRealtime.appendText(text);
Thread.sleep(100);
}
qwenTtsRealtime.finish();
completeLatch.get().await();
qwenTtsRealtime.close();
// 等待音频播放完成并关闭播放器
audioPlayer.waitForComplete();
audioPlayer.shutdown();
System.exit(0);
}
}
package com.luhu.computility.module.external.test;
/**
* @version 1.0
* @Author ljq
* @Date 2026/3/30
* @注释
*/
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import okhttp3.*;
import org.apache.commons.lang3.StringUtils;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Base64;
import java.util.Date;
/**
* 短文本合成WebSocket API接口调用示例
* 注意:
* 1.本demo展示了如何保存音频流到本地文件
* 2.本demo仅完成基本的接口调用,失败重试、token过期重新获取、日志打印等优化工作需要开发者自行完成
*
* @author data-baker
*/
public class TtsWebSocketDemo extends WebSocketListener {
/**
* 授权:需要在开放平台获取【https://ai.data-baker.com/】
*/
private static final String clientId = "YOUR_CLIENT_ID";
private static final String clientSecret = "YOUR_CLIENT_SECRET";
/**
* 获取token的地址信息
*/
public static String tokenUrl = "https://openapi.data-baker.com/oauth/2.0/token?grant_type=client_credentials&client_secret=%s&client_id=%s";
private static final String hostUrl = "ws://127.0.0.1:48080/ws/wsapi";
//private static final String hostUrl = "ws://42.192.64.253:48080/ws/wsapi";
private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");
/**
* 开始时间
*/
private static ThreadLocal<Date> timeBegin = ThreadLocal.withInitial(() -> new Date());
/**
* 结束时间
*/
private static ThreadLocal<Date> timeEnd = ThreadLocal.withInitial(() -> new Date());
private Date startTime;
private String accessToken = getAccessToken();
/**
* utf-8编码,不超过300个汉字(即900字节)
*/
private static Integer MAX_BYTE_LENGTH = 900;
/**
* 文本
*/
private String text;
/**
* 发音人
*/
private String voiceName;
/**
* 保存结果文件的路径,开发者需要根据实际路径调整
*/
private File resultFile;
public TtsWebSocketDemo(File resultFile) {
this.resultFile = resultFile;
}
public TtsWebSocketDemo(String text, File resultFile) {
this.text = text;
this.resultFile = resultFile;
}
public TtsWebSocketDemo(String text, String voiceName, File resultFile) {
this.text = text;
this.voiceName = voiceName;
this.resultFile = resultFile;
}
@Override
public void onOpen(WebSocket webSocket, Response response) {
super.onOpen(webSocket, response);
this.startTime = timeBegin.get();
new Thread(() -> {
// 连接成功,开始发送数据
JSONObject jsonObject = new JSONObject();
jsonObject.put("access_token", accessToken);
jsonObject.put("version", "2.1");
// tts相关参数 详情参考 tts_params字段说明
JSONObject ttsParams = new JSONObject();
// domain必填,固定值1
ttsParams.put("domain", 1);
ttsParams.put("language", "ZH");
// 音频格式,只支持PCM
ttsParams.put("audio_fmt", "PCM");
ttsParams.put("speed", 5.0);
ttsParams.put("volume", 5);
ttsParams.put("sample_rate", 16000);
ttsParams.put("voice_name", voiceName);
ttsParams.put("text", text);
jsonObject.put("tts_params", ttsParams);
System.out.println("dataSent:" + text);
webSocket.send(jsonObject.toString());
System.out.println("all data is send");
}).start();
}
@Override
public void onMessage(WebSocket webSocket, String text) {
super.onMessage(webSocket, text);
JSONObject resp = JSON.parseObject(text);
if (resp != null) {
if (resp.getInteger("err_no") != 0) {
// 合成失败,详见错误码
System.out.println("Tts synthesis fail,err_no=" + resp.getInteger("err_no") + ",err_msg=" + resp.getString("err_msg") + ",log_id=" + resp.getString("log_id"));
// 关闭连接
webSocket.close(1000, "");
System.out.println("发生错误,关闭连接");
return;
}
JSONObject dataObject = resp.getJSONObject("result");
if (dataObject != null) {
if (StringUtils.isNotEmpty(dataObject.getString("audio_data"))) {
// 写入文件
FileOutputStream out = null;
try {
out = new FileOutputStream(resultFile, true);
byte[] b = Base64.getDecoder().decode(dataObject.getString("audio_data"));
out.write(b);
} catch (IOException e) {
e.printStackTrace();
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
if (dataObject.getInteger("end_flag") == 1) {
// 说明数据全部返回完毕,可以关闭连接,释放资源
System.out.println("session end,tts finished.开始时间:" + sdf.format(startTime) + ",结束时间:" + sdf.format(timeEnd.get()) + ",耗时:" + (timeEnd.get().getTime() - startTime.getTime()) + "ms");
webSocket.close(1000, "");
}
}
}
}
@Override
public void onFailure(WebSocket webSocket, Throwable t, Response response) {
super.onFailure(webSocket, t, response);
try {
if (null != response) {
int code = response.code();
System.out.println("onFailure code:" + code);
System.out.println("onFailure body:" + response.body().string());
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 测试方法
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
OkHttpClient client = new OkHttpClient.Builder().build();
Request request = new Request.Builder().url(hostUrl).build();
File file = new File("/Users/jackey/Downloads/tts/test.pcm");
// 测试文本
String ttsTestText = "吾名朱熹,字元晦,号晦庵,别称紫阳先生,南宋著名理学家、思想家、教育家,闽学派的代表人物,世称“朱子”。是中国封建社会后期影响最大的哲学家和思想家之一,其学说对后世影响深远,特别是在元、明、清三代,朱熹的理学被确立为官方哲学";
// 测试简单调用
if ((ttsTestText.getBytes(Charset.forName("UTF-8"))).length > MAX_BYTE_LENGTH) {
// 单次调用长度不能超过300汉字即900字节
// 本demo策略是长度过长则直接返回,实际使用过程中可以进行文本切割
System.out.println("文本不能超过300个汉字");
return;
}
client.newWebSocket(request, new TtsWebSocketDemo(ttsTestText, "libai_v2", file));
}
public static String getAccessToken() {
String accessToken = "";
OkHttpClient client = new OkHttpClient();
// request 默认是get请求
String url = String.format(tokenUrl, clientSecret, clientId);
Request request = new Request.Builder().url(url).build();
JSONObject jsonObject;
try {
Response response = client.newCall(request).execute();
if (response.isSuccessful()) {
// 解析
String resultJson = response.body().string();
jsonObject = JSON.parseObject(resultJson);
accessToken = jsonObject.getString("access_token");
}
} catch (Exception e) {
e.printStackTrace();
}
return accessToken;
}
}
package com.luhu.computility.module.external.tts;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.validation.annotation.Validated;
import javax.validation.constraints.NotEmpty;
@Data
@Validated
@ConfigurationProperties(prefix = "external.tts")
public class TtsProperties {
/**
* DashScope API Key. If empty, falls back to env var DASHSCOPE_API_KEY.
*/
private String apiKey;
/**
* DashScope websocket base URL.
*/
@NotEmpty
private String wsBaseUrl = "wss://dashscope.aliyuncs.com/api-ws/v1/inference";
/**
* Model name, e.g. cosyvoice-v2
*/
@NotEmpty
private String model = "cosyvoice-v2";
/**
* Default voice, e.g. libai_v2
*/
@NotEmpty
private String defaultVoice = "libai_v2";
}
package com.luhu.computility.module.external.tts;
import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.common.ResultCallback;
import com.alibaba.dashscope.utils.Constants;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.concurrent.CountDownLatch;
import java.util.function.Consumer;
@RequiredArgsConstructor
@Slf4j
public class TtsSynthesisService {
private final TtsProperties properties;
/**
* Start streaming synthesis. Invokes onChunk for every audio frame, and onComplete when finished.
* onError is called if an exception occurs.
*/
public void synthesizeStream(String text,
String voiceName,
Consumer<byte[]> onChunk,
Runnable onComplete,
Consumer<Exception> onError) {
try {
// Configure DashScope base websocket URL
Constants.baseWebsocketApiUrl = properties.getWsBaseUrl();
String apiKey = resolveApiKey();
if (apiKey == null || apiKey.isEmpty()) {
throw new IllegalStateException("DashScope API key not configured (external.tts.api-key or DASHSCOPE_API_KEY)");
}
String voice = (voiceName == null || voiceName.isEmpty()) ? properties.getDefaultVoice() : voiceName;
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
.apiKey(apiKey)
.model(properties.getModel())
.voice(voice)
.build();
ResultCallback<SpeechSynthesisResult> callback = new ResultCallback<SpeechSynthesisResult>() {
@Override
public void onEvent(SpeechSynthesisResult result) {
try {
if (result.getAudioFrame() != null) {
ByteBuffer buffer = result.getAudioFrame();
byte[] audioData = new byte[buffer.remaining()];
buffer.get(audioData);
onChunk.accept(audioData);
}
} catch (Exception e) {
onError.accept(e);
}
}
@Override
public void onComplete() {
try {
onComplete.run();
} catch (Exception e) {
onError.accept(e);
}
}
@Override
public void onError(Exception e) {
onError.accept(e);
}
};
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback);
// fire and return immediately; callback will stream frames
synthesizer.call(text);
} catch (Exception e) {
onError.accept(e);
}
}
private String resolveApiKey() {
if (properties.getApiKey() != null && !properties.getApiKey().isEmpty()) {
return properties.getApiKey();
}
String env = System.getenv("DASHSCOPE_API_KEY");
return Objects.toString(env, "");
}
}
package com.luhu.computility.module.external.tts;
import com.luhu.computility.framework.common.util.json.JsonUtils;
import com.luhu.computility.module.external.tts.dto.TtsClientMessage;
import com.luhu.computility.module.external.tts.dto.TtsParams;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.socket.CloseStatus;
import org.springframework.web.socket.TextMessage;
import org.springframework.web.socket.WebSocketSession;
import org.springframework.web.socket.handler.TextWebSocketHandler;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
@RequiredArgsConstructor
@Slf4j
public class TtsWebSocketHandler extends TextWebSocketHandler {
private final TtsSynthesisService ttsSynthesisService;
private static final String ATTR_IN_PROGRESS = "ttsInProgress";
@Override
public void afterConnectionEstablished(WebSocketSession session) {
session.getAttributes().put(ATTR_IN_PROGRESS, new AtomicBoolean(false));
}
@Override
protected void handleTextMessage(WebSocketSession session, TextMessage message) {
// Only accept the first payload to start synthesis.
AtomicBoolean inProgress = (AtomicBoolean) session.getAttributes().get(ATTR_IN_PROGRESS);
if (inProgress == null) {
inProgress = new AtomicBoolean(false);
session.getAttributes().put(ATTR_IN_PROGRESS, inProgress);
}
if (inProgress.get()) {
// Ignore subsequent messages during an active synthesis
return;
}
TtsClientMessage clientMessage;
try {
clientMessage = JsonUtils.parseObject(message.getPayload(), TtsClientMessage.class);
} catch (Exception ex) {
sendErrorAndClose(session, 400, "invalid_request", ex);
return;
}
if (clientMessage == null || clientMessage.getTts_params() == null) {
sendErrorAndClose(session, 400, "missing_tts_params", null);
return;
}
final TtsParams p = clientMessage.getTts_params();
if (p.getText() == null || p.getText().isEmpty()) {
sendErrorAndClose(session, 400, "empty_text", null);
return;
}
inProgress.set(true);
final WebSocketSession ws = session;
ttsSynthesisService.synthesizeStream(
p.getText(),
p.getVoice_name(),
chunk -> {
try {
Map<String, Object> result = new HashMap<>();
String audioB64 = Base64.getEncoder().encodeToString(chunk);
Map<String, Object> payload = new HashMap<>();
payload.put("err_no", 0);
result.put("audio_data", audioB64);
result.put("end_flag", 0);
payload.put("result", result);
synchronized (ws) {
if (ws.isOpen()) {
ws.sendMessage(new TextMessage(JsonUtils.toJsonString(payload)));
}
}
} catch (Exception e) {
log.warn("TTS stream send error: {}", e.getMessage());
}
},
() -> {
try {
Map<String, Object> payload = new HashMap<>();
Map<String, Object> result = new HashMap<>();
payload.put("err_no", 0);
result.put("audio_data", "");
result.put("end_flag", 1);
payload.put("result", result);
synchronized (ws) {
if (ws.isOpen()) {
ws.sendMessage(new TextMessage(JsonUtils.toJsonString(payload)));
ws.close(CloseStatus.NORMAL);
}
}
} catch (Exception e) {
log.warn("TTS complete send/close error: {}", e.getMessage());
}
},
ex -> sendErrorAndClose(ws, 500, "synthesis_error", ex)
);
}
private void sendErrorAndClose(WebSocketSession session, int code, String msg, Exception ex) {
try {
Map<String, Object> payload = new HashMap<>();
payload.put("err_no", code);
payload.put("err_msg", msg);
payload.put("log_id", "");
synchronized (session) {
if (session.isOpen()) {
session.sendMessage(new TextMessage(JsonUtils.toJsonString(payload)));
session.close(new CloseStatus(code, msg));
}
}
} catch (Exception e) {
// ignore
}
}
}
package com.luhu.computility.module.external.tts.dto;
import lombok.Data;
@Data
public class TtsClientMessage {
private String access_token; // ignored
private String version; // ignored
private TtsParams tts_params;
}
package com.luhu.computility.module.external.tts.dto;
import lombok.Data;
@Data
public class TtsParams {
private Integer domain; // required by client, not used
private String language; // e.g. ZH
private String audio_fmt; // e.g. PCM
private Double speed; // optional
private Integer volume; // optional
private Integer sample_rate; // optional
private String voice_name; // maps to DashScope voice
private String text; // input text
}
package com.luhu.computility.module.external.websocket;
import com.luhu.computility.module.external.tts.TtsProperties;
import com.luhu.computility.module.external.tts.TtsSynthesisService;
import com.luhu.computility.module.external.tts.TtsWebSocketHandler;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.annotation.Order;
import org.springframework.http.server.ServerHttpRequest;
import org.springframework.http.server.ServerHttpResponse;
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
import org.springframework.security.web.SecurityFilterChain;
import org.springframework.web.socket.WebSocketHandler;
import org.springframework.web.socket.config.annotation.EnableWebSocket;
import org.springframework.web.socket.config.annotation.WebSocketConfigurer;
import org.springframework.web.socket.config.annotation.WebSocketHandlerRegistry;
import org.springframework.web.socket.server.HandshakeInterceptor;
import java.util.Map;
@Configuration
@EnableWebSocket
@EnableConfigurationProperties(TtsProperties.class)
public class TtsWebSocketConfig {
@Bean
public WebSocketConfigurer ttsWebSocketConfigurer(WebSocketHandler ttsWebSocketHandler) {
System.out.println("===== TTS WebSocket 配置已加载 =====");
return new WebSocketConfigurer() {
@Override
public void registerWebSocketHandlers(WebSocketHandlerRegistry registry) {
registry.addHandler(ttsWebSocketHandler, "/ws/wsapi")
.setAllowedOrigins("*")
.addInterceptors(new TtsHandshakeInterceptor()); // 添加握手拦截器,绕过 Security
System.out.println("===== WebSocket 端点 /ws/wsapi 已注册 =====");
}
};
}
@Bean
public TtsSynthesisService ttsSynthesisService(TtsProperties properties) {
return new TtsSynthesisService(properties);
}
@Bean(name = "ttsWebSocketHandler")
public WebSocketHandler ttsWebSocketHandler(TtsSynthesisService ttsSynthesisService) {
return new TtsWebSocketHandler(ttsSynthesisService);
}
/**
* 专门为 TTS WebSocket 配置安全规则,绕过 Spring Security
*/
@Bean
@Order(1) // 高优先级,先于主安全配置执行
public SecurityFilterChain ttsWebSocketSecurityFilterChain(HttpSecurity http) throws Exception {
http.securityMatcher("/ws/wsapi") // 只匹配这个路径
.authorizeHttpRequests(authorize -> authorize
.anyRequest().permitAll() // 完全放行
)
.csrf().disable(); // WebSocket 不需要 CSRF
return http.build();
}
/**
* 自定义握手拦截器,直接放行所有请求(绕过 Spring Security)
*/
public static class TtsHandshakeInterceptor implements HandshakeInterceptor {
@Override
public boolean beforeHandshake(ServerHttpRequest request, ServerHttpResponse response,
WebSocketHandler wsHandler, Map<String, Object> attributes) {
// 直接返回 true,放行所有握手请求
System.out.println("===== TTS WebSocket 握手请求已放行 =====");
return true;
}
@Override
public void afterHandshake(ServerHttpRequest request, ServerHttpResponse response,
WebSocketHandler wsHandler, Exception exception) {
// 握手完成后的回调
}
}
}
...@@ -439,3 +439,7 @@ text-to-image: ...@@ -439,3 +439,7 @@ text-to-image:
season: ${text-to-image.base-url}/v2/t2i/txt2Img/ season: ${text-to-image.base-url}/v2/t2i/txt2Img/
file-name: ${text-to-image.base-url}/user/images/ file-name: ${text-to-image.base-url}/user/images/
poetry: ${text-to-image.base-url}/v2/t2i/getPoetryImg poetry: ${text-to-image.base-url}/v2/t2i/getPoetryImg
external:
tts:
api-key: sk-48cdaf52853147ea8cb1799cc87fefa5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment