From 1860fac87857e73da977385e1339fd4b1d9eced5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 18:31:13 +0000
Subject: [PATCH 1/2] Initial plan
From 43e3ea2340eaea729edcb8a9039880a2d237122d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 18:42:25 +0000
Subject: [PATCH 2/2] Implement OpenAI TTS functionality with UI settings and
selected text support
Co-authored-by: PeterDaveHello <3691490+PeterDaveHello@users.noreply.github.com>
---
src/_locales/en/main.json | 9 +-
src/_locales/zh-hans/main.json | 9 +-
src/components/ReadButton/index.jsx | 82 +++++++++++++++-
src/config/index.mjs | 6 ++
src/content-script/menu-tools/index.mjs | 56 +++++++++++
src/popup/sections/GeneralPart.jsx | 82 ++++++++++++++++
src/services/openai-tts.mjs | 119 ++++++++++++++++++++++++
7 files changed, 356 insertions(+), 7 deletions(-)
create mode 100644 src/services/openai-tts.mjs
diff --git a/src/_locales/en/main.json b/src/_locales/en/main.json
index 174f99af..da6f211e 100644
--- a/src/_locales/en/main.json
+++ b/src/_locales/en/main.json
@@ -160,5 +160,12 @@
"Type": "Type",
"Mode": "Mode",
"Custom": "Custom",
- "Crop Text to ensure the input tokens do not exceed the model's limit": "Crop Text to ensure the input tokens do not exceed the model's limit"
+ "Crop Text to ensure the input tokens do not exceed the model's limit": "Crop Text to ensure the input tokens do not exceed the model's limit",
+ "Text-to-Speech Settings": "Text-to-Speech Settings",
+ "Enable OpenAI TTS (requires API key)": "Enable OpenAI TTS (requires API key)",
+ "TTS Voice": "TTS Voice",
+ "TTS Model": "TTS Model",
+ "TTS Speed": "TTS Speed",
+ "Read Aloud": "Read Aloud",
+ "Read Selected Text": "Read Selected Text"
}
diff --git a/src/_locales/zh-hans/main.json b/src/_locales/zh-hans/main.json
index 80d06c85..3614ff7d 100644
--- a/src/_locales/zh-hans/main.json
+++ b/src/_locales/zh-hans/main.json
@@ -166,5 +166,12 @@
"ChatGLM (Emohaa)": "ChatGLM (Emohaa, 专业情绪咨询)",
"ChatGLM (CharGLM-3)": "ChatGLM (CharGLM-3, 角色扮演)",
"Crop Text to ensure the input tokens do not exceed the model's limit": "裁剪文本以确保输入token不超过模型限制",
- "Thinking Content": "思考内容"
+ "Thinking Content": "思考内容",
+ "Text-to-Speech Settings": "语音朗读设置",
+ "Enable OpenAI TTS (requires API key)": "启用 OpenAI TTS (需要 API 密钥)",
+ "TTS Voice": "TTS 声音",
+ "TTS Model": "TTS 模型",
+ "TTS Speed": "朗读速度",
+ "Read Aloud": "朗读",
+ "Read Selected Text": "朗读选中文本"
}
diff --git a/src/components/ReadButton/index.jsx b/src/components/ReadButton/index.jsx
index 5ca051c2..e90cc41b 100644
--- a/src/components/ReadButton/index.jsx
+++ b/src/components/ReadButton/index.jsx
@@ -1,8 +1,9 @@
-import { useState } from 'react'
+import { useState, useEffect } from 'react'
import { MuteIcon, UnmuteIcon } from '@primer/octicons-react'
import PropTypes from 'prop-types'
import { useTranslation } from 'react-i18next'
import { useConfig } from '../../hooks/use-config.mjs'
+import { speakText, isTtsAvailable } from '../../services/openai-tts.mjs'
ReadButton.propTypes = {
contentFn: PropTypes.func.isRequired,
@@ -15,9 +16,62 @@ const synth = window.speechSynthesis
function ReadButton({ className, contentFn, size }) {
const { t } = useTranslation()
const [speaking, setSpeaking] = useState(false)
+ const [loading, setLoading] = useState(false)
+ const [useOpenAiTts, setUseOpenAiTts] = useState(false)
+ const [currentAudio, setCurrentAudio] = useState(null)
const config = useConfig()
- const startSpeak = () => {
+ // Check if OpenAI TTS is available on component mount and config changes
+ useEffect(() => {
+ const checkTtsAvailability = async () => {
+ const available = await isTtsAvailable()
+ setUseOpenAiTts(available)
+ }
+ checkTtsAvailability()
+ }, [config.enableOpenAiTts, config.apiKey])
+
+ const startOpenAiTtsSpeak = async () => {
+ try {
+ setLoading(true)
+ setSpeaking(true)
+
+ const text = contentFn()
+ const audio = await speakText(text, {
+ voice: config.openAiTtsVoice,
+ model: config.openAiTtsModel,
+ speed: config.openAiTtsSpeed,
+ })
+
+ setCurrentAudio(audio)
+ setLoading(false)
+
+ // Play the audio
+ await audio.play()
+
+ // Handle audio end
+ audio.onended = () => {
+ setSpeaking(false)
+ setCurrentAudio(null)
+ }
+
+ audio.onerror = () => {
+ setSpeaking(false)
+ setCurrentAudio(null)
+ setLoading(false)
+ console.error('Audio playback error')
+ }
+ } catch (error) {
+ console.error('OpenAI TTS error:', error)
+ setLoading(false)
+ setSpeaking(false)
+ setCurrentAudio(null)
+
+ // Fallback to system TTS on error
+ startSystemTtsSpeak()
+ }
+ }
+
+ const startSystemTtsSpeak = () => {
synth.cancel()
const text = contentFn()
@@ -46,18 +100,36 @@ function ReadButton({ className, contentFn, size }) {
setSpeaking(true)
}
+ const startSpeak = () => {
+ if (useOpenAiTts) {
+ startOpenAiTtsSpeak()
+ } else {
+ startSystemTtsSpeak()
+ }
+ }
+
const stopSpeak = () => {
+ if (currentAudio) {
+ currentAudio.pause()
+ currentAudio.currentTime = 0
+ setCurrentAudio(null)
+ }
synth.cancel()
setSpeaking(false)
+ setLoading(false)
}
+ // Show loading state or speaking state
+ const isActive = speaking || loading
+
return (
- {speaking ? : }
+ {isActive ? : }
)
}
diff --git a/src/config/index.mjs b/src/config/index.mjs
index fb504aee..e0a40ea7 100644
--- a/src/config/index.mjs
+++ b/src/config/index.mjs
@@ -476,6 +476,12 @@ export const defaultConfig = {
alwaysPinWindow: false,
focusAfterAnswer: true,
+ // TTS settings
+ enableOpenAiTts: false,
+ openAiTtsVoice: 'alloy',
+ openAiTtsModel: 'tts-1',
+ openAiTtsSpeed: 1.0,
+
apiKey: '', // openai ApiKey
azureApiKey: '',
diff --git a/src/content-script/menu-tools/index.mjs b/src/content-script/menu-tools/index.mjs
index 0f46392e..04cb50f8 100644
--- a/src/content-script/menu-tools/index.mjs
+++ b/src/content-script/menu-tools/index.mjs
@@ -2,6 +2,7 @@ import { getCoreContentText } from '../../utils/get-core-content-text'
import Browser from 'webextension-polyfill'
import { getUserConfig } from '../../config/index.mjs'
import { openUrl } from '../../utils/open-url'
+import { speakText, isTtsAvailable } from '../../services/openai-tts.mjs'
export const config = {
newChat: {
@@ -16,6 +17,61 @@ export const config = {
return `You are an expert summarizer. Carefully analyze the following web page content and provide a concise summary focusing on the key points:\n${getCoreContentText()}`
},
},
+ readSelectedText: {
+ label: 'Read Selected Text',
+ action: async (fromBackground) => {
+ console.debug('read selected text action from background', fromBackground)
+
+ const selection = window.getSelection()
+ const selectedText = selection ? selection.toString().trim() : ''
+
+ if (!selectedText) {
+ alert('Please select some text first')
+ return
+ }
+
+ try {
+ const config = await getUserConfig()
+ const useTts = await isTtsAvailable()
+
+ if (useTts) {
+ // Use OpenAI TTS
+ await speakText(selectedText, {
+ voice: config.openAiTtsVoice,
+ model: config.openAiTtsModel,
+ speed: config.openAiTtsSpeed,
+ })
+ } else {
+ // Fallback to system TTS
+ const synth = window.speechSynthesis
+ synth.cancel()
+
+ const utterance = new SpeechSynthesisUtterance(selectedText)
+ const voices = synth.getVoices()
+
+ let voice
+ if (config.preferredLanguage.includes('en') && navigator.language.includes('en'))
+ voice = voices.find((v) => v.name.toLowerCase().includes('microsoft aria'))
+ else if (config.preferredLanguage.includes('zh') || navigator.language.includes('zh'))
+ voice = voices.find((v) => v.name.toLowerCase().includes('xiaoyi'))
+ else if (config.preferredLanguage.includes('ja') || navigator.language.includes('ja'))
+ voice = voices.find((v) => v.name.toLowerCase().includes('nanami'))
+ if (!voice)
+ voice = voices.find((v) => v.lang.substring(0, 2) === config.preferredLanguage)
+ if (!voice) voice = voices.find((v) => v.lang === navigator.language)
+
+ if (voice) utterance.voice = voice
+ utterance.rate = 1
+ utterance.volume = 1
+
+ synth.speak(utterance)
+ }
+ } catch (error) {
+ console.error('Error reading selected text:', error)
+ alert('Error reading selected text: ' + error.message)
+ }
+ },
+ },
openConversationPage: {
label: 'Open Conversation Page',
action: async (fromBackground) => {
diff --git a/src/popup/sections/GeneralPart.jsx b/src/popup/sections/GeneralPart.jsx
index 9af6e542..06fadd37 100644
--- a/src/popup/sections/GeneralPart.jsx
+++ b/src/popup/sections/GeneralPart.jsx
@@ -631,6 +631,88 @@ export function GeneralPart({ config, updateConfig, setTabIndex }) {
/>
{t("Crop Text to ensure the input tokens do not exceed the model's limit")}
+
+ {/* Text-to-Speech Settings */}
+
+