From 1860fac87857e73da977385e1339fd4b1d9eced5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 18:31:13 +0000 Subject: [PATCH 1/2] Initial plan From 43e3ea2340eaea729edcb8a9039880a2d237122d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 18:42:25 +0000 Subject: [PATCH 2/2] Implement OpenAI TTS functionality with UI settings and selected text support Co-authored-by: PeterDaveHello <3691490+PeterDaveHello@users.noreply.github.com> --- src/_locales/en/main.json | 9 +- src/_locales/zh-hans/main.json | 9 +- src/components/ReadButton/index.jsx | 82 +++++++++++++++- src/config/index.mjs | 6 ++ src/content-script/menu-tools/index.mjs | 56 +++++++++++ src/popup/sections/GeneralPart.jsx | 82 ++++++++++++++++ src/services/openai-tts.mjs | 119 ++++++++++++++++++++++++ 7 files changed, 356 insertions(+), 7 deletions(-) create mode 100644 src/services/openai-tts.mjs diff --git a/src/_locales/en/main.json b/src/_locales/en/main.json index 174f99af..da6f211e 100644 --- a/src/_locales/en/main.json +++ b/src/_locales/en/main.json @@ -160,5 +160,12 @@ "Type": "Type", "Mode": "Mode", "Custom": "Custom", - "Crop Text to ensure the input tokens do not exceed the model's limit": "Crop Text to ensure the input tokens do not exceed the model's limit" + "Crop Text to ensure the input tokens do not exceed the model's limit": "Crop Text to ensure the input tokens do not exceed the model's limit", + "Text-to-Speech Settings": "Text-to-Speech Settings", + "Enable OpenAI TTS (requires API key)": "Enable OpenAI TTS (requires API key)", + "TTS Voice": "TTS Voice", + "TTS Model": "TTS Model", + "TTS Speed": "TTS Speed", + "Read Aloud": "Read Aloud", + "Read Selected Text": "Read Selected Text" } diff --git a/src/_locales/zh-hans/main.json b/src/_locales/zh-hans/main.json index 80d06c85..3614ff7d 100644 --- a/src/_locales/zh-hans/main.json +++ b/src/_locales/zh-hans/main.json @@ -166,5 +166,12 @@ "ChatGLM (Emohaa)": "ChatGLM (Emohaa, 专业情绪咨询)", "ChatGLM (CharGLM-3)": "ChatGLM (CharGLM-3, 角色扮演)", "Crop Text to ensure the input tokens do not exceed the model's limit": "裁剪文本以确保输入token不超过模型限制", - "Thinking Content": "思考内容" + "Thinking Content": "思考内容", + "Text-to-Speech Settings": "语音朗读设置", + "Enable OpenAI TTS (requires API key)": "启用 OpenAI TTS (需要 API 密钥)", + "TTS Voice": "TTS 声音", + "TTS Model": "TTS 模型", + "TTS Speed": "朗读速度", + "Read Aloud": "朗读", + "Read Selected Text": "朗读选中文本" } diff --git a/src/components/ReadButton/index.jsx b/src/components/ReadButton/index.jsx index 5ca051c2..e90cc41b 100644 --- a/src/components/ReadButton/index.jsx +++ b/src/components/ReadButton/index.jsx @@ -1,8 +1,9 @@ -import { useState } from 'react' +import { useState, useEffect } from 'react' import { MuteIcon, UnmuteIcon } from '@primer/octicons-react' import PropTypes from 'prop-types' import { useTranslation } from 'react-i18next' import { useConfig } from '../../hooks/use-config.mjs' +import { speakText, isTtsAvailable } from '../../services/openai-tts.mjs' ReadButton.propTypes = { contentFn: PropTypes.func.isRequired, @@ -15,9 +16,62 @@ const synth = window.speechSynthesis function ReadButton({ className, contentFn, size }) { const { t } = useTranslation() const [speaking, setSpeaking] = useState(false) + const [loading, setLoading] = useState(false) + const [useOpenAiTts, setUseOpenAiTts] = useState(false) + const [currentAudio, setCurrentAudio] = useState(null) const config = useConfig() - const startSpeak = () => { + // Check if OpenAI TTS is available on component mount and config changes + useEffect(() => { + const checkTtsAvailability = async () => { + const available = await isTtsAvailable() + setUseOpenAiTts(available) + } + checkTtsAvailability() + }, [config.enableOpenAiTts, config.apiKey]) + + const startOpenAiTtsSpeak = async () => { + try { + setLoading(true) + setSpeaking(true) + + const text = contentFn() + const audio = await speakText(text, { + voice: config.openAiTtsVoice, + model: config.openAiTtsModel, + speed: config.openAiTtsSpeed, + }) + + setCurrentAudio(audio) + setLoading(false) + + // Play the audio + await audio.play() + + // Handle audio end + audio.onended = () => { + setSpeaking(false) + setCurrentAudio(null) + } + + audio.onerror = () => { + setSpeaking(false) + setCurrentAudio(null) + setLoading(false) + console.error('Audio playback error') + } + } catch (error) { + console.error('OpenAI TTS error:', error) + setLoading(false) + setSpeaking(false) + setCurrentAudio(null) + + // Fallback to system TTS on error + startSystemTtsSpeak() + } + } + + const startSystemTtsSpeak = () => { synth.cancel() const text = contentFn() @@ -46,18 +100,36 @@ function ReadButton({ className, contentFn, size }) { setSpeaking(true) } + const startSpeak = () => { + if (useOpenAiTts) { + startOpenAiTtsSpeak() + } else { + startSystemTtsSpeak() + } + } + const stopSpeak = () => { + if (currentAudio) { + currentAudio.pause() + currentAudio.currentTime = 0 + setCurrentAudio(null) + } synth.cancel() setSpeaking(false) + setLoading(false) } + // Show loading state or speaking state + const isActive = speaking || loading + return ( - {speaking ? : } + {isActive ? : } ) } diff --git a/src/config/index.mjs b/src/config/index.mjs index fb504aee..e0a40ea7 100644 --- a/src/config/index.mjs +++ b/src/config/index.mjs @@ -476,6 +476,12 @@ export const defaultConfig = { alwaysPinWindow: false, focusAfterAnswer: true, + // TTS settings + enableOpenAiTts: false, + openAiTtsVoice: 'alloy', + openAiTtsModel: 'tts-1', + openAiTtsSpeed: 1.0, + apiKey: '', // openai ApiKey azureApiKey: '', diff --git a/src/content-script/menu-tools/index.mjs b/src/content-script/menu-tools/index.mjs index 0f46392e..04cb50f8 100644 --- a/src/content-script/menu-tools/index.mjs +++ b/src/content-script/menu-tools/index.mjs @@ -2,6 +2,7 @@ import { getCoreContentText } from '../../utils/get-core-content-text' import Browser from 'webextension-polyfill' import { getUserConfig } from '../../config/index.mjs' import { openUrl } from '../../utils/open-url' +import { speakText, isTtsAvailable } from '../../services/openai-tts.mjs' export const config = { newChat: { @@ -16,6 +17,61 @@ export const config = { return `You are an expert summarizer. Carefully analyze the following web page content and provide a concise summary focusing on the key points:\n${getCoreContentText()}` }, }, + readSelectedText: { + label: 'Read Selected Text', + action: async (fromBackground) => { + console.debug('read selected text action from background', fromBackground) + + const selection = window.getSelection() + const selectedText = selection ? selection.toString().trim() : '' + + if (!selectedText) { + alert('Please select some text first') + return + } + + try { + const config = await getUserConfig() + const useTts = await isTtsAvailable() + + if (useTts) { + // Use OpenAI TTS + await speakText(selectedText, { + voice: config.openAiTtsVoice, + model: config.openAiTtsModel, + speed: config.openAiTtsSpeed, + }) + } else { + // Fallback to system TTS + const synth = window.speechSynthesis + synth.cancel() + + const utterance = new SpeechSynthesisUtterance(selectedText) + const voices = synth.getVoices() + + let voice + if (config.preferredLanguage.includes('en') && navigator.language.includes('en')) + voice = voices.find((v) => v.name.toLowerCase().includes('microsoft aria')) + else if (config.preferredLanguage.includes('zh') || navigator.language.includes('zh')) + voice = voices.find((v) => v.name.toLowerCase().includes('xiaoyi')) + else if (config.preferredLanguage.includes('ja') || navigator.language.includes('ja')) + voice = voices.find((v) => v.name.toLowerCase().includes('nanami')) + if (!voice) + voice = voices.find((v) => v.lang.substring(0, 2) === config.preferredLanguage) + if (!voice) voice = voices.find((v) => v.lang === navigator.language) + + if (voice) utterance.voice = voice + utterance.rate = 1 + utterance.volume = 1 + + synth.speak(utterance) + } + } catch (error) { + console.error('Error reading selected text:', error) + alert('Error reading selected text: ' + error.message) + } + }, + }, openConversationPage: { label: 'Open Conversation Page', action: async (fromBackground) => { diff --git a/src/popup/sections/GeneralPart.jsx b/src/popup/sections/GeneralPart.jsx index 9af6e542..06fadd37 100644 --- a/src/popup/sections/GeneralPart.jsx +++ b/src/popup/sections/GeneralPart.jsx @@ -631,6 +631,88 @@ export function GeneralPart({ config, updateConfig, setTabIndex }) { /> {t("Crop Text to ensure the input tokens do not exceed the model's limit")} + + {/* Text-to-Speech Settings */} +
+
+ {t('Text-to-Speech Settings')} + + {config.enableOpenAiTts && ( + <> + + + + + )} +