From 35b39c6eecc5e2d070ff93e688811a0cdc9b72c0 Mon Sep 17 00:00:00 2001 From: Silberengel Date: Fri, 13 Feb 2026 19:51:51 +0100 Subject: [PATCH] add piper tts --- .gitignore | 5 + DEPLOYMENT.md | 179 ++++ Dockerfile | 36 +- docker-compose.yml | 45 + docker-entrypoint.sh | 59 +- download-voices.sh | 193 ++++ package-lock.json | 178 +++- package.json | 5 +- public/healthz.json | 4 +- setup-wyoming-piper.sh | 19 + src/app.css | 14 +- src/lib/components/content/TTSControls.svelte | 233 +++- src/lib/services/tts/tts-service.ts | 663 ++++++++---- src/lib/services/tts/types.ts | 4 +- src/routes/api/piper-tts/+server.ts | 995 ++++++++++++++++++ svelte.config.js | 9 +- 16 files changed, 2358 insertions(+), 283 deletions(-) create mode 100644 DEPLOYMENT.md create mode 100755 download-voices.sh create mode 100755 setup-wyoming-piper.sh create mode 100644 src/routes/api/piper-tts/+server.ts diff --git a/.gitignore b/.gitignore index 6635cf5..22c97cf 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,8 @@ node_modules !.env.example vite.config.js.timestamp-* vite.config.ts.timestamp-* +wyoming-piper +# Piper voice files (can be large) +piper-data/*.onnx +piper-data/*.onnx.json +piper-data/voices/ \ No newline at end of file diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..1cd0567 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,179 @@ +# Deployment Guide + +This guide explains how to deploy aitherboard with Piper TTS on a remote server with Apache. + +## Architecture + +``` +Internet → Apache (443) → aitherboard container (9876) + ↓ + piper-tts container (10200, internal) +``` + +- **Apache** proxies HTTP/WebSocket requests to the aitherboard container +- **aitherboard** container connects to **piper-tts** container via Docker internal networking +- **piper-tts** is not exposed to the host - only accessible from within Docker network + +## Docker Compose Setup + +The `docker-compose.yml` is ready to use as-is. It: + +1. Creates a Docker network (`aitherboard-network`) for container communication +2. Exposes aitherboard on port `9876` (for Apache to proxy to) +3. Keeps piper-tts internal (port `10200` only accessible from Docker network) +4. Uses service names (`piper-tts`) for internal communication + +## Apache Configuration + +Your Apache configuration should proxy to `localhost:9876` (where aitherboard runs): + +```apache +ProxyPreserveHost On +ProxyRequests Off + +# WebSocket upgrade handling - CRITICAL for Nostr apps +RewriteEngine On +RewriteCond %{HTTP:Upgrade} websocket [NC] +RewriteCond %{HTTP:Connection} upgrade [NC] +RewriteRule ^/?(.*) "ws://127.0.0.1:9876/$1" [P,L] + +# Regular HTTP proxy for static files and API calls (catch-all - MUST come LAST) +ProxyPass / http://127.0.0.1:9876/ +ProxyPassReverse / http://127.0.0.1:9876/ + +# Headers for WebSocket compatibility +ProxyAddHeaders On +Header always set X-Forwarded-Proto "https" +Header always set X-Forwarded-Port "443" +``` + +**Important**: Apache only needs to proxy to aitherboard. It does NOT need to route to piper-tts - that's handled internally by Docker. + +## Deployment Steps + +1. **Clone and prepare the repository:** + ```bash + git clone + cd aitherboard + ``` + +2. **Set up Wyoming Piper:** + ```bash + ./setup-wyoming-piper.sh + ``` + +3. **Download voices (optional, but recommended):** + ```bash + ./download-voices.sh + ``` + +4. **Start the containers:** + ```bash + docker-compose up -d --build + ``` + +5. **Verify containers are running:** + ```bash + docker-compose ps + ``` + +6. **Check logs if needed:** + ```bash + docker-compose logs aitherboard + docker-compose logs piper-tts + ``` + +## Container Communication + +The aitherboard container connects to piper-tts using: + +- **Hostname**: `piper-tts` (Docker service name) +- **Port**: `10200` (internal Docker network) + +This is configured via environment variables in `docker-compose.yml`: +- `PIPER_TTS_HOST=piper-tts` +- `PIPER_TTS_PORT=10200` + +You can override these if needed, but the defaults work for Docker Compose. + +## Network Flow + +1. **User request** → Apache (port 443) +2. **Apache** → aitherboard container (localhost:9876) +3. **aitherboard** → piper-tts container (piper-tts:10200 via Docker network) +4. **piper-tts** → returns audio to aitherboard +5. **aitherboard** → returns audio to Apache +6. **Apache** → returns audio to user + +## Troubleshooting + +### Piper TTS not working + +1. **Check if containers are on the same network:** + ```bash + docker network inspect aitherboard_aitherboard-network + ``` + +2. **Test connection from aitherboard to piper-tts:** + ```bash + docker exec aitherboard ping piper-tts + ``` + +3. **Check piper-tts logs:** + ```bash + docker-compose logs piper-tts + ``` + +4. **Verify voices are available:** + ```bash + docker exec piper-tts ls -la /data/voices/ + ``` + +### Apache can't connect to aitherboard + +1. **Check if aitherboard is listening:** + ```bash + curl http://localhost:9876/healthz.json + ``` + +2. **Check aitherboard logs:** + ```bash + docker-compose logs aitherboard + ``` + +3. **Verify port mapping:** + ```bash + docker-compose ps + # Should show: 0.0.0.0:9876->9876/tcp + ``` + +## Environment Variables + +You can customize the setup via environment variables in `docker-compose.yml`: + +- `PIPER_TTS_HOST`: Override Piper hostname (default: `piper-tts`) +- `PIPER_TTS_PORT`: Override Piper port (default: `10200`) +- `NODE_ENV`: Set to `production` (already set) + +## Security Notes + +- **piper-tts** is NOT exposed to the internet - only accessible from Docker network +- Only **aitherboard** port `9876` is exposed to the host +- Apache handles SSL/TLS termination +- All internal communication happens over Docker's bridge network + +## Updating + +To update the containers: + +```bash +docker-compose pull # If using pre-built images +docker-compose up -d --build # Rebuild and restart +``` + +To update voices: + +```bash +./download-voices.sh +docker-compose restart piper-tts # Restart to pick up new voices +``` diff --git a/Dockerfile b/Dockerfile index d7a4734..2b10ce6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,29 +12,25 @@ ENV VITE_DEFAULT_RELAYS=${VITE_DEFAULT_RELAYS} ENV VITE_THREAD_TIMEOUT_DAYS=${VITE_THREAD_TIMEOUT_DAYS} RUN npm run build -FROM httpd:alpine -RUN apk add --no-cache gettext && \ - mkdir -p /usr/local/apache2/logs && \ - chown -R daemon:daemon /usr/local/apache2/logs -COPY --from=builder /app/build /usr/local/apache2/htdocs/ -# Ensure healthz.json exists (SvelteKit copies public/healthz.json to build/) -# If it doesn't exist for some reason, create a default one -RUN if [ ! -f /usr/local/apache2/htdocs/healthz.json ]; then \ - echo '{"status":"ok","service":"aitherboard","version":"unknown","buildTime":"'$(date -Iseconds)'","timestamp":'$(date +%s)'}' > /usr/local/apache2/htdocs/healthz.json && \ - echo "Created default healthz.json"; \ - else \ - echo "healthz.json found in build output"; \ - fi -# Verify 200.html exists (required for SPA routing) -RUN if [ ! -f /usr/local/apache2/htdocs/200.html ]; then \ - echo "ERROR: 200.html not found! SPA routing will not work." && exit 1; \ - else \ - echo "200.html found - SPA routing configured correctly"; \ - fi -COPY httpd.conf.template /usr/local/apache2/conf/httpd.conf.template +# Production stage - Node.js runtime +FROM node:20-alpine +WORKDIR /app + +# Copy package files and install production dependencies only +COPY package*.json ./ +RUN npm ci --only=production && npm cache clean --force + +# Copy built application from builder stage +COPY --from=builder /app/build ./build + +# Copy entrypoint script COPY docker-entrypoint.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/docker-entrypoint.sh + ARG PORT=9876 ENV PORT=${PORT} +ENV NODE_ENV=production +ENV HOST=0.0.0.0 + EXPOSE ${PORT} ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index 41a0908..9cfffbe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,4 +12,49 @@ services: - "9876:9876" environment: - PORT=9876 + - NODE_ENV=production + - PIPER_TTS_HOST=piper-tts + - PIPER_TTS_PORT=10200 restart: unless-stopped + depends_on: + piper-tts: + condition: service_healthy + networks: + - aitherboard-network + + piper-tts: + container_name: piper-tts + # Using official Wyoming Piper from source + # To build locally: git clone https://github.com/rhasspy/wyoming-piper.git wyoming-piper + # Then change build.context to ./wyoming-piper + build: + context: ./wyoming-piper + dockerfile: Dockerfile + # Port 10200 is only used internally by aitherboard container + # No need to expose it to the host + expose: + - "10200" + command: + # --voice is required at startup but can be overridden in synthesize messages + # This is the default/fallback voice + - --voice + - en_US-lessac-medium + - --uri + - tcp://0.0.0.0:10200 + - --data-dir + - /data + volumes: + - ./piper-data:/data + restart: unless-stopped + networks: + - aitherboard-network + healthcheck: + test: ["CMD-SHELL", "timeout 3 bash -c 'echo > /dev/tcp/localhost/10200' || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + +networks: + aitherboard-network: + driver: bridge \ No newline at end of file diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 7a049f0..d2c6c00 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -6,40 +6,35 @@ if ! [ "$PORT" -ge 1 ] 2>/dev/null || ! [ "$PORT" -le 65535 ] 2>/dev/null; then PORT=9876 fi -echo "Generating Apache configuration with PORT=$PORT" -envsubst '${PORT}' < /usr/local/apache2/conf/httpd.conf.template > /usr/local/apache2/conf/httpd.conf - -echo "Testing Apache configuration..." -if ! httpd -t; then - echo "ERROR: Apache configuration test failed!" - echo "Configuration file contents:" - cat /usr/local/apache2/conf/httpd.conf +echo "Starting SvelteKit Node.js server on port $PORT..." + +# Check if build directory exists +if [ ! -d "/app/build" ]; then + echo "ERROR: Build directory not found at /app/build" + exit 1 +fi + +# Check if server entry point exists +if [ ! -f "/app/build/index.js" ] && [ ! -f "/app/build/server.js" ]; then + echo "ERROR: Server entry point not found. Expected /app/build/index.js or /app/build/server.js" + echo "Build directory contents:" + ls -la /app/build/ exit 1 fi -echo "Checking htdocs directory..." -ls -la /usr/local/apache2/htdocs/ | head -20 -echo "File count: $(find /usr/local/apache2/htdocs -type f | wc -l)" - -echo "Checking if port $PORT is available..." -# Use ss (socket statistics) which is available in Alpine, fallback to netstat if available -if command -v ss >/dev/null 2>&1; then - if ! ss -tuln 2>/dev/null | grep -q ":$PORT "; then - echo "Port $PORT appears to be available" - else - echo "WARNING: Port $PORT might be in use" - fi -elif command -v netstat >/dev/null 2>&1; then - if ! netstat -tuln 2>/dev/null | grep -q ":$PORT "; then - echo "Port $PORT appears to be available" - else - echo "WARNING: Port $PORT might be in use" - fi -else - echo "Port check skipped (ss/netstat not available)" +# Determine server entry point +SERVER_FILE="/app/build/index.js" +if [ ! -f "$SERVER_FILE" ]; then + SERVER_FILE="/app/build/server.js" fi -echo "Starting Apache on port $PORT..." -echo "Apache will run with PID: $$" -# Run httpd in foreground with error logging, redirect stderr to stdout -exec httpd -D FOREGROUND -e info 2>&1 +echo "Using server file: $SERVER_FILE" +echo "Working directory: $(pwd)" +echo "Node version: $(node --version)" +echo "NPM version: $(npm --version)" + +# Set PORT environment variable for the Node.js server +export PORT + +# Start the Node.js server +exec node "$SERVER_FILE" diff --git a/download-voices.sh b/download-voices.sh new file mode 100755 index 0000000..6201e9e --- /dev/null +++ b/download-voices.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# Script to download Piper TTS voices and place them in the correct structure +# Voices are downloaded from Hugging Face: https://huggingface.co/rhasspy/piper-voices + +# Don't exit on error - we want to continue downloading other voices even if one fails +set +e + +PIPER_DATA_DIR="./piper-data" +VOICES_BASE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main" + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +echo -e "${GREEN}Piper TTS Voice Downloader${NC}" +echo "================================" +echo "" + +# Create piper-data directory if it doesn't exist +mkdir -p "$PIPER_DATA_DIR" + +# Function to download a voice +download_voice() { + local lang=$1 + local locale=$2 + local voice=$3 + local quality=$4 + + local voice_name="${locale}-${voice}-${quality}" + local voice_dir="${PIPER_DATA_DIR}/voices/${locale}/${voice}/${quality}" + local onnx_file="${voice_dir}/${voice_name}.onnx" + local json_file="${voice_dir}/${voice_name}.onnx.json" + + # Create directory structure + mkdir -p "$voice_dir" + + # Check if voice already exists + if [ -f "$onnx_file" ] && [ -f "$json_file" ]; then + echo -e "${YELLOW}Voice ${voice_name} already exists, skipping...${NC}" + return 0 + fi + + echo "Downloading ${voice_name}..." + + # Download .onnx.json file + local json_url="${VOICES_BASE_URL}/${lang}/${locale}/${voice}/${quality}/${voice_name}.onnx.json" + local curl_output=$(curl -L -f -w "\n%{http_code}" -o "$json_file" "$json_url" 2>&1) + local http_code=$(echo "$curl_output" | tail -n1) + local curl_error=$(echo "$curl_output" | head -n-1) + + if [ "$http_code" = "200" ] && [ -f "$json_file" ] && [ -s "$json_file" ]; then + echo " ✓ Downloaded ${voice_name}.onnx.json" + else + echo " ✗ Failed to download ${voice_name}.onnx.json" + echo " URL: ${json_url}" + echo " HTTP Code: ${http_code:-unknown}" + if [ -n "$curl_error" ]; then + echo " Error: $(echo "$curl_error" | head -n1)" + fi + echo " This quality level may not be available for this voice." + rm -f "$json_file" + return 1 + fi + + # Download .onnx file + local onnx_url="${VOICES_BASE_URL}/${lang}/${locale}/${voice}/${quality}/${voice_name}.onnx" + curl_output=$(curl -L -f -w "\n%{http_code}" -o "$onnx_file" "$onnx_url" 2>&1) + http_code=$(echo "$curl_output" | tail -n1) + curl_error=$(echo "$curl_output" | head -n-1) + + if [ "$http_code" = "200" ] && [ -f "$onnx_file" ] && [ -s "$onnx_file" ]; then + local file_size=$(stat -c%s "$onnx_file" 2>/dev/null || echo "0") + local file_size_mb=$(echo "scale=2; $file_size / 1024 / 1024" | bc 2>/dev/null || echo "?") + echo " ✓ Downloaded ${voice_name}.onnx (${file_size_mb} MB)" + else + echo " ✗ Failed to download ${voice_name}.onnx" + echo " URL: ${onnx_url}" + echo " HTTP Code: ${http_code:-unknown}" + if [ -n "$curl_error" ]; then + echo " Error: $(echo "$curl_error" | head -n1)" + fi + echo " This quality level may not be available for this voice." + rm -f "$onnx_file" "$json_file" + return 1 + fi + + echo -e "${GREEN} ✓ Successfully downloaded ${voice_name}${NC}" + return 0 +} + +# List of voices to download (based on the language detection function) +# Format: language_code locale voice quality +VOICES=( + # English (US) - all quality levels + "en en_US lessac low" + "en en_US lessac medium" + "en en_US lessac high" + # English (GB) + "en en_GB alba medium" + + # German + "de de_DE thorsten medium" + "de de_DE thorsten low" + + # French + "fr fr_FR siwis medium" + "fr fr_FR siwis low" + + # Spanish + "es es_ES davefx medium" + # Note: es_ES-davefx-low doesn't exist + + # Italian - riccardo doesn't exist, removing + # "it it_IT riccardo medium" - not available + # "it it_IT riccardo low" - not available + + # Russian + "ru ru_RU ruslan medium" + # Note: ru_RU-ruslan-low doesn't exist + + # Chinese + "zh zh_CN huayan medium" + + # Arabic - hafez doesn't exist, removing + # "ar ar_SA hafez medium" - not available + + # Polish + "pl pl_PL darkman medium" + + # Portuguese - edresson doesn't exist, removing + # "pt pt_BR edresson medium" - not available + + # Dutch + "nl nl_NL mls medium" + + # Czech + "cs cs_CZ jirka medium" + + # Turkish + "tr tr_TR dfki medium" + + # Japanese - nanami doesn't exist, removing + # "ja ja_JP nanami medium" - not available + + # Korean - kyungha doesn't exist, removing + # "ko ko_KR kyungha medium" - not available +) + +# Check if specific voices are requested +if [ $# -gt 0 ]; then + VOICES=("$@") +fi + +echo "Downloading ${#VOICES[@]} voice(s)..." +echo "" + +SUCCESS=0 +FAILED=0 + +for voice_spec in "${VOICES[@]}"; do + # Parse voice specification + read -r lang locale voice_name quality <<< "$voice_spec" + + # Validate voice specification + if [ -z "$lang" ] || [ -z "$locale" ] || [ -z "$voice_name" ] || [ -z "$quality" ]; then + echo -e "${YELLOW}⚠ Skipping invalid voice specification: '${voice_spec}'${NC}" + ((FAILED++)) + echo "" + continue + fi + + if download_voice "$lang" "$locale" "$voice_name" "$quality"; then + ((SUCCESS++)) + else + ((FAILED++)) + fi + echo "" +done + +echo "================================" +echo -e "${GREEN}Download complete!${NC}" +echo "Successfully downloaded: $SUCCESS" +if [ $FAILED -gt 0 ]; then + echo -e "${YELLOW}Failed: $FAILED${NC}" +fi +echo "" +echo "Voices are now in: $PIPER_DATA_DIR" +echo "This directory is mounted into the Docker container at /data" +echo "" +echo "To use these voices, restart your Docker containers:" +echo " docker-compose down" +echo " docker-compose up --build" diff --git a/package-lock.json b/package-lock.json index eda012c..79fef9b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,6 +22,7 @@ "@sveltejs/kit": "^2.0.0", "@sveltejs/vite-plugin-svelte": "^4.0.0-next.6", "@tanstack/svelte-virtual": "^3.0.0", + "@types/ws": "^8.18.1", "asciidoctor": "3.0.x", "blurhash": "^2.0.5", "codemirror-asciidoc": "^2.0.1", @@ -33,9 +34,11 @@ "marked": "^11.1.1", "nostr-tools": "^2.22.1", "svelte": "^5.0.0", - "unicode-emoji-json": "^0.8.0" + "unicode-emoji-json": "^0.8.0", + "ws": "^8.19.0" }, "devDependencies": { + "@sveltejs/adapter-node": "^5.5.2", "@sveltejs/adapter-static": "^3.0.0", "@types/dompurify": "^3.0.5", "@types/marked": "^6.0.0", @@ -2799,6 +2802,95 @@ "integrity": "sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==", "license": "MIT" }, + "node_modules/@rollup/plugin-commonjs": { + "version": "28.0.9", + "resolved": "https://registry.npmjs.org/@rollup/plugin-commonjs/-/plugin-commonjs-28.0.9.tgz", + "integrity": "sha512-PIR4/OHZ79romx0BVVll/PkwWpJ7e5lsqFa3gFfcrFPWwLXLV39JVUzQV9RKjWerE7B845Hqjj9VYlQeieZ2dA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.0.1", + "commondir": "^1.0.1", + "estree-walker": "^2.0.2", + "fdir": "^6.2.0", + "is-reference": "1.2.1", + "magic-string": "^0.30.3", + "picomatch": "^4.0.2" + }, + "engines": { + "node": ">=16.0.0 || 14 >= 14.17" + }, + "peerDependencies": { + "rollup": "^2.68.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/is-reference": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-1.2.1.tgz", + "integrity": "sha512-U82MsXXiFIrjCK4otLT+o2NA2Cd2g5MLoOVXUZjIOhLurrRxpEXzI8O0KZHr3IjLvlAH1kTPYSuqer5T9ZVBKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "*" + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/picomatch": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", + "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/@rollup/plugin-json": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/@rollup/plugin-json/-/plugin-json-6.1.0.tgz", + "integrity": "sha512-EGI2te5ENk1coGeADSIwZ7G2Q8CJS2sF120T7jLw4xFw9n7wIOXHo+kIYRAoVpJAN+kmqZSoO3Fp4JtoNF4ReA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.1.0" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, "node_modules/@rollup/plugin-node-resolve": { "version": "15.3.1", "resolved": "https://registry.npmjs.org/@rollup/plugin-node-resolve/-/plugin-node-resolve-15.3.1.tgz", @@ -3282,6 +3374,47 @@ "acorn": "^8.9.0" } }, + "node_modules/@sveltejs/adapter-node": { + "version": "5.5.2", + "resolved": "https://registry.npmjs.org/@sveltejs/adapter-node/-/adapter-node-5.5.2.tgz", + "integrity": "sha512-L15Djwpr7HrSAPj/Z8PYfc0pa9A1tllrr18phKI0WJHJeoWw45yinPf0IGgVTmakqx1B3JQ+C/OFl9ZwmxHU1Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "@rollup/plugin-commonjs": "^28.0.1", + "@rollup/plugin-json": "^6.1.0", + "@rollup/plugin-node-resolve": "^16.0.0", + "rollup": "^4.9.5" + }, + "peerDependencies": { + "@sveltejs/kit": "^2.4.0" + } + }, + "node_modules/@sveltejs/adapter-node/node_modules/@rollup/plugin-node-resolve": { + "version": "16.0.3", + "resolved": "https://registry.npmjs.org/@rollup/plugin-node-resolve/-/plugin-node-resolve-16.0.3.tgz", + "integrity": "sha512-lUYM3UBGuM93CnMPG1YocWu7X802BrNF3jW2zny5gQyLQgRFJhV1Sq0Zi74+dh/6NBx1DxFC4b4GXg9wUCG5Qg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.0.1", + "@types/resolve": "1.20.2", + "deepmerge": "^4.2.2", + "is-module": "^1.0.0", + "resolve": "^1.22.1" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^2.78.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, "node_modules/@sveltejs/adapter-static": { "version": "3.0.10", "resolved": "https://registry.npmjs.org/@sveltejs/adapter-static/-/adapter-static-3.0.10.tgz", @@ -3443,8 +3576,6 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.0.tgz", "integrity": "sha512-DZ8VwRFUNzuqJ5khrvwMXHmvPe+zGayJhr2CDNiKB1WBE1ST8Djl00D0IC4vvNmHMdj6DlbYRIaFE7WHjlDl5w==", "license": "MIT", - "optional": true, - "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -3470,6 +3601,15 @@ "devOptional": true, "license": "MIT" }, + "node_modules/@types/ws": { + "version": "8.18.1", + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", + "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-6.21.0.tgz", @@ -4365,6 +4505,13 @@ "node": ">=4.0.0" } }, + "node_modules/commondir": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/commondir/-/commondir-1.0.1.tgz", + "integrity": "sha512-W9pAhw0ja1Edb5GVdIF1mjZw/ASI0AlShXM83UUGe2DVr5TdAPEA1OA8m/g8zWp9x6On7gqufY+FatDbC3MDQg==", + "dev": true, + "license": "MIT" + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -8963,9 +9110,7 @@ "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", - "license": "MIT", - "optional": true, - "peer": true + "license": "MIT" }, "node_modules/unicode-canonical-property-names-ecmascript": { "version": "2.0.1", @@ -9867,6 +10012,27 @@ "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "license": "ISC" }, + "node_modules/ws": { + "version": "8.19.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz", + "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", diff --git a/package.json b/package.json index 4f5215f..dacef21 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "@sveltejs/kit": "^2.0.0", "@sveltejs/vite-plugin-svelte": "^4.0.0-next.6", "@tanstack/svelte-virtual": "^3.0.0", + "@types/ws": "^8.18.1", "asciidoctor": "3.0.x", "blurhash": "^2.0.5", "codemirror-asciidoc": "^2.0.1", @@ -47,9 +48,11 @@ "marked": "^11.1.1", "nostr-tools": "^2.22.1", "svelte": "^5.0.0", - "unicode-emoji-json": "^0.8.0" + "unicode-emoji-json": "^0.8.0", + "ws": "^8.19.0" }, "devDependencies": { + "@sveltejs/adapter-node": "^5.5.2", "@sveltejs/adapter-static": "^3.0.0", "@types/dompurify": "^3.0.5", "@types/marked": "^6.0.0", diff --git a/public/healthz.json b/public/healthz.json index 33e50a0..8fa95ea 100644 --- a/public/healthz.json +++ b/public/healthz.json @@ -2,7 +2,7 @@ "status": "ok", "service": "aitherboard", "version": "0.3.1", - "buildTime": "2026-02-12T17:56:54.795Z", + "buildTime": "2026-02-13T04:20:13.671Z", "gitCommit": "unknown", - "timestamp": 1770919014796 + "timestamp": 1770956413671 } \ No newline at end of file diff --git a/setup-wyoming-piper.sh b/setup-wyoming-piper.sh new file mode 100755 index 0000000..62a82ee --- /dev/null +++ b/setup-wyoming-piper.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Setup script to clone and prepare Wyoming Piper from official source + +set -e + +WYOMING_PIPER_DIR="./wyoming-piper" + +if [ -d "$WYOMING_PIPER_DIR" ]; then + echo "Wyoming Piper directory already exists. Updating..." + cd "$WYOMING_PIPER_DIR" + git pull + cd .. +else + echo "Cloning Wyoming Piper from official repository..." + git clone https://github.com/rhasspy/wyoming-piper.git "$WYOMING_PIPER_DIR" +fi + +echo "Wyoming Piper setup complete!" +echo "You can now run: docker-compose up --build" diff --git a/src/app.css b/src/app.css index fac8908..521ed23 100644 --- a/src/app.css +++ b/src/app.css @@ -157,7 +157,7 @@ h1 { } :global(.dark) h1 { - color: var(--fog-dark-text, #cbd5e1); + color: #f1f5f9; /* Brighter for better contrast in fog dark mode */ } h2 { @@ -170,7 +170,7 @@ h2 { } :global(.dark) h2 { - color: var(--fog-dark-text, #cbd5e1); + color: #e2e8f0; /* Brighter for better contrast in fog dark mode */ } h3 { @@ -183,7 +183,7 @@ h3 { } :global(.dark) h3 { - color: var(--fog-dark-text, #cbd5e1); + color: #cbd5e1; /* Slightly brighter than regular text for better contrast */ } h4, h5, h6 { @@ -191,6 +191,14 @@ h4, h5, h6 { line-height: 1.4; margin-bottom: 0.625rem; margin-top: 0; + font-weight: 600; + color: var(--fog-text, #475569); +} + +:global(.dark) h4, +:global(.dark) h5, +:global(.dark) h6 { + color: #cbd5e1; /* Same as h3 for consistency */ } /* Common main container */ diff --git a/src/lib/components/content/TTSControls.svelte b/src/lib/components/content/TTSControls.svelte index 857ebec..e4722ab 100644 --- a/src/lib/components/content/TTSControls.svelte +++ b/src/lib/components/content/TTSControls.svelte @@ -13,8 +13,11 @@ let { text, autoStart = false }: Props = $props(); const ttsService = getTTSService(); + // @ts-ignore - Svelte 5 rune let state = $state('idle'); + // @ts-ignore - Svelte 5 rune let voices = $state([]); + // @ts-ignore - Svelte 5 rune let selectedVoice = $state(null); let speed = $state(1.0); let volume = $state(1.0); @@ -22,6 +25,108 @@ let extractedText = $state(''); let available = $state(false); + // Score voice quality (higher = better) + function getVoiceQualityScore(voice: TTSVoice): number { + const name = voice.name.toLowerCase(); + let score = 0; + + // Google voices are usually best quality + if (name.includes('google')) score += 100; + // Neural voices are high quality + if (name.includes('neural')) score += 80; + // Premium voices + if (name.includes('premium')) score += 60; + // Avoid robotic voices + if (name.includes('sapi')) score -= 50; + if (name.includes('microsoft zira') || name.includes('microsoft david')) score -= 30; + // Prefer English voices + if (voice.lang.startsWith('en')) score += 20; + + return score; + } + + // Sort voices by quality + function sortVoicesByQuality(voicesList: TTSVoice[]): TTSVoice[] { + return [...voicesList].sort((a, b) => { + const scoreA = getVoiceQualityScore(a); + const scoreB = getVoiceQualityScore(b); + if (scoreB !== scoreA) { + return scoreB - scoreA; // Higher score first + } + // If same score, sort by name + return a.name.localeCompare(b.name); + }); + } + + // Check if voice is recommended + function isRecommendedVoice(voice: TTSVoice): boolean { + const score = getVoiceQualityScore(voice); + return score >= 80; // Google or neural voices + } + + // Simple language detection based on character patterns + function detectLanguage(text: string): string { + if (!text || text.length === 0) return 'en'; + + const sample = text.substring(0, Math.min(500, text.length)); + + // German: ä, ö, ü, ß + const germanChars = (sample.match(/[äöüßÄÖÜ]/g) || []).length; + // French: é, è, ê, ç, à, etc. + const frenchChars = (sample.match(/[éèêëàâäçôùûüÉÈÊËÀÂÄÇÔÙÛÜ]/g) || []).length; + // Spanish: ñ, á, é, í, ó, ú, ¿, ¡ + const spanishChars = (sample.match(/[ñáéíóúüÑÁÉÍÓÚÜ¿¡]/g) || []).length; + // Italian: à, è, é, ì, ò, ù + const italianChars = (sample.match(/[àèéìòùÀÈÉÌÒÙ]/g) || []).length; + // Russian/Cyrillic + const cyrillicChars = (sample.match(/[а-яёА-ЯЁ]/g) || []).length; + // Chinese/Japanese/Korean (CJK) + const cjkChars = (sample.match(/[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g) || []).length; + // Arabic + const arabicChars = (sample.match(/[\u0600-\u06ff]/g) || []).length; + + const total = sample.length; + const germanRatio = germanChars / total; + const frenchRatio = frenchChars / total; + const spanishRatio = spanishChars / total; + const italianRatio = italianChars / total; + const cyrillicRatio = cyrillicChars / total; + const cjkRatio = cjkChars / total; + const arabicRatio = arabicChars / total; + + if (cyrillicRatio > 0.1) return 'ru'; + if (cjkRatio > 0.1) return 'zh'; + if (arabicRatio > 0.1) return 'ar'; + if (germanRatio > 0.02) return 'de'; + if (frenchRatio > 0.02) return 'fr'; + if (spanishRatio > 0.02) return 'es'; + if (italianRatio > 0.02) return 'it'; + + return 'en'; + } + + // Map language code to voice ID + function getVoiceIdForLanguage(lang: string): string { + const voiceMap: Record = { + 'en': 'en_US-lessac-medium', + 'de': 'de_DE-thorsten-medium', + 'fr': 'fr_FR-siwis-medium', + 'es': 'es_ES-davefx-medium', + // 'it': 'it_IT-riccardo-medium', // Italian - not available + 'ru': 'ru_RU-ruslan-medium', + 'zh': 'zh_CN-huayan-medium', + // 'ar': 'ar_SA-hafez-medium', // Arabic - not available + 'pl': 'pl_PL-darkman-medium', + // 'pt': 'pt_BR-edresson-medium', // Portuguese - not available + 'nl': 'nl_NL-mls-medium', + 'cs': 'cs_CZ-jirka-medium', + 'tr': 'tr_TR-dfki-medium', + // 'ja': 'ja_JP-nanami-medium', // Japanese - not available + // 'ko': 'ko_KR-kyungha-medium', // Korean - not available + }; + return voiceMap[lang] || voiceMap['en']; + } + onMount(async () => { // Check availability available = await ttsService.isAvailable(); @@ -32,12 +137,9 @@ // Load voices try { const loadedVoices = await ttsService.getVoices(); - voices = loadedVoices; - // Select default voice (prefer English) - const defaultVoice = loadedVoices.find(v => v.lang.startsWith('en')) || loadedVoices[0]; - if (defaultVoice) { - selectedVoice = defaultVoice; - } + // Sort voices by quality (best first) + const sortedVoices = sortVoicesByQuality(loadedVoices); + voices = sortedVoices; } catch (error) { console.error('Failed to load voices:', error); } @@ -49,6 +151,16 @@ extractedText = extractTextForTTS(text); } + // Auto-detect language and preset voice + if (extractedText && voices.length > 0) { + const detectedLang = detectLanguage(extractedText); + const voiceId = getVoiceIdForLanguage(detectedLang); + const presetVoice = voices.find((v: TTSVoice) => v.id === voiceId) || voices[0]; + selectedVoice = presetVoice; + } else if (voices.length > 0) { + selectedVoice = voices[0]; + } + // Set up callbacks ttsService.setCallbacks({ onStateChange: (newState) => { @@ -79,14 +191,29 @@ async function handlePlay() { if (!extractedText) return; - if (state === 'paused') { - await ttsService.resume(); - } else { - await ttsService.speak(extractedText, { - voice: selectedVoice || undefined, - speed, - volume - }); + try { + if (state === 'paused') { + await ttsService.resume(); + } else { + // Ensure we have a valid voice selected + let voiceToUse = selectedVoice; + if (!voiceToUse && voices.length > 0) { + voiceToUse = voices[0]; + selectedVoice = voiceToUse; + } + + console.log('TTS: Using voice:', voiceToUse?.id || 'default'); + + await ttsService.speak(extractedText, { + voice: voiceToUse || undefined, + speed, + volume + }); + } + } catch (error) { + console.error('TTS error:', error); + // Error is already handled by the service's onError callback + // The service will automatically fall back to Web Speech API for quota errors } } @@ -122,13 +249,21 @@ function handleVoiceChange(e: Event) { const target = e.target as HTMLSelectElement; const voiceId = target.value; - selectedVoice = voices.find(v => v.id === voiceId) || null; + const voice = voices.find((v: TTSVoice) => v.id === voiceId); + if (voice) { + selectedVoice = voice; + } // If playing, restart with new voice if (state === 'playing' || state === 'paused') { handleStop(); setTimeout(() => handlePlay(), 100); } } + + // Check if controls should be disabled (when playing/paused/synthesizing) + // @ts-ignore - Svelte 5 rune + let controlsDisabled = $derived(state === 'playing' || state === 'paused' || state === 'synthesizing'); + {#if available} @@ -136,7 +271,11 @@
- {#if state === 'playing'} + {#if state === 'synthesizing'} + + {:else if state === 'playing'} @@ -145,18 +284,36 @@ ▶ Resume {:else} - {/if} - {#if state === 'playing' || state === 'paused'} + {#if state === 'playing' || state === 'paused' || state === 'synthesizing'} {/if}
+ + {#if voices.length > 0} +
+ + +
+ {/if} +
@@ -168,6 +325,7 @@ step="0.1" value={speed} oninput={handleSpeedChange} + disabled={controlsDisabled} class="tts-slider" /> {speed.toFixed(1)}x @@ -184,27 +342,12 @@ step="0.1" value={volume} oninput={handleVolumeChange} + disabled={controlsDisabled} class="tts-slider" /> {Math.round(volume * 100)}%
- - {#if voices.length > 0} -
- - -
- {/if}
@@ -313,18 +456,38 @@ background: var(--fog-surface, #f8fafc); border: 1px solid var(--fog-border, #e5e7eb); border-radius: 0.25rem; - padding: 0.25rem 0.5rem; + padding: 0.5rem; font-size: 0.875rem; color: var(--fog-text, #475569); + cursor: pointer; min-width: 200px; } + .tts-select:hover:not(:disabled) { + background: var(--fog-highlight, #f3f4f6); + } + + .tts-select:disabled { + opacity: 0.5; + cursor: not-allowed; + } + :global(.dark) .tts-select { background: var(--fog-dark-surface, #1e293b); border-color: var(--fog-dark-border, #475569); color: var(--fog-dark-text, #cbd5e1); } + :global(.dark) .tts-select:hover:not(:disabled) { + background: var(--fog-dark-highlight, #374151); + } + + .tts-slider:disabled { + opacity: 0.5; + cursor: not-allowed; + } + + .tts-progress { margin-top: 1rem; height: 4px; diff --git a/src/lib/services/tts/tts-service.ts b/src/lib/services/tts/tts-service.ts index f3282b9..08e103a 100644 --- a/src/lib/services/tts/tts-service.ts +++ b/src/lib/services/tts/tts-service.ts @@ -5,6 +5,95 @@ import type { TTSProvider, TTSProviderInterface, TTSOptions, TTSState, TTSVoice, TTSEventCallbacks } from './types.js'; +/** + * Base class for audio-based TTS providers + */ +abstract class AudioProvider implements TTSProviderInterface { + protected state: TTSState = 'idle'; + protected callbacks: TTSEventCallbacks = {}; + protected audioElement: HTMLAudioElement | null = null; + + abstract readonly name: string; + abstract readonly type: TTSProvider; + + protected updateState(newState: TTSState): void { + if (this.state !== newState) { + this.state = newState; + this.callbacks.onStateChange?.(newState); + } + } + + protected setupAudioElement(audioUrl: string, volume: number = 1.0): void { + this.audioElement = new Audio(audioUrl); + this.audioElement.volume = volume; + + this.audioElement.onplay = () => this.updateState('playing'); + this.audioElement.onpause = () => this.updateState('paused'); + this.audioElement.onended = () => { + this.updateState('idle'); + URL.revokeObjectURL(audioUrl); + this.audioElement = null; + this.callbacks.onEnd?.(); + }; + this.audioElement.onerror = () => { + this.updateState('error'); + this.callbacks.onError?.(new Error('Audio playback failed')); + }; + this.audioElement.ontimeupdate = () => { + if (this.audioElement?.duration) { + this.callbacks.onProgress?.(this.audioElement.currentTime / this.audioElement.duration); + } + }; + } + + async pause(): Promise { + if (this.audioElement && this.state === 'playing') { + this.audioElement.pause(); + } + } + + async resume(): Promise { + if (this.audioElement && this.state === 'paused') { + await this.audioElement.play(); + } + } + + async stop(): Promise { + if (this.audioElement) { + this.audioElement.pause(); + this.audioElement.currentTime = 0; + this.audioElement = null; + } + this.updateState('idle'); + } + + getState(): TTSState { + return this.state; + } + + async getProgress(): Promise { + if (this.audioElement?.duration) { + return this.audioElement.currentTime / this.audioElement.duration; + } + return 0; + } + + async setProgress(position: number): Promise { + if (this.audioElement?.duration) { + this.audioElement.currentTime = position * this.audioElement.duration; + } + } + + setCallbacks(callbacks: TTSEventCallbacks): void { + this.callbacks = { ...this.callbacks, ...callbacks }; + } + + abstract isAvailable(): Promise; + abstract getVoices(): Promise; + abstract speak(text: string, options?: TTSOptions): Promise; + abstract destroy(): void; +} + /** * Web Speech API TTS Provider */ @@ -16,27 +105,27 @@ class WebSpeechProvider implements TTSProviderInterface { private utterance: SpeechSynthesisUtterance | null = null; private state: TTSState = 'idle'; private callbacks: TTSEventCallbacks = {}; - private currentText = ''; - private currentOptions: TTSOptions = {}; - private voices: TTSVoice[] = []; - private voiceLoaded = false; - + constructor() { if (typeof window === 'undefined' || !('speechSynthesis' in window)) { throw new Error('Web Speech API not available'); } this.synth = window.speechSynthesis; + this.synth.onvoiceschanged = () => this.loadVoices(); this.loadVoices(); - - // Reload voices when they become available (some browsers load them asynchronously) - this.synth.onvoiceschanged = () => { - this.loadVoices(); - }; } - + private loadVoices(): void { - const browserVoices = this.synth.getVoices(); - this.voices = browserVoices.map(voice => ({ + // Voices loaded asynchronously + } + + async isAvailable(): Promise { + return typeof window !== 'undefined' && 'speechSynthesis' in window; + } + + async getVoices(): Promise { + const voices = this.synth.getVoices(); + return voices.map(voice => ({ id: voice.voiceURI, name: voice.name, lang: voice.lang, @@ -44,127 +133,355 @@ class WebSpeechProvider implements TTSProviderInterface { voice.name.toLowerCase().includes('male') ? 'male' : 'neutral', provider: 'webspeech' })); - this.voiceLoaded = true; - } - - async isAvailable(): Promise { - return typeof window !== 'undefined' && 'speechSynthesis' in window; } - - async getVoices(): Promise { - if (!this.voiceLoaded) { - await new Promise(resolve => setTimeout(resolve, 100)); - this.loadVoices(); - } - return this.voices; + + private getBestVoice(lang: string = 'en'): SpeechSynthesisVoice | null { + const voices = this.synth.getVoices(); + const langPrefix = lang.split('-')[0]; + const langVoices = voices.filter(v => v.lang.startsWith(langPrefix)); + + if (langVoices.length === 0) return voices[0] || null; + + return langVoices.find(v => + v.name.toLowerCase().includes('google') || + v.voiceURI.toLowerCase().includes('google') + ) || langVoices.find(v => v.name.toLowerCase().includes('neural')) || langVoices[0]; } - + async speak(text: string, options?: TTSOptions): Promise { - // Stop any current speech this.stop(); - - if (!text.trim()) { - return; - } - - this.currentText = text; - this.currentOptions = options || {}; - - // Create utterance + if (!text.trim()) return; + this.utterance = new SpeechSynthesisUtterance(text); - // Set voice - if (options?.voice && options.voice.provider === 'webspeech') { - const browserVoice = this.synth.getVoices().find(v => v.voiceURI === options.voice!.id); - if (browserVoice) { - this.utterance.voice = browserVoice; - } + if (options?.voice?.provider === 'webspeech') { + const voice = this.synth.getVoices().find(v => v.voiceURI === options.voice!.id); + if (voice) this.utterance.voice = voice; + } else { + const bestVoice = this.getBestVoice(); + if (bestVoice) this.utterance.voice = bestVoice; } - - // Set options + this.utterance.rate = options?.speed ?? 1.0; this.utterance.pitch = options?.pitch ?? 1.0; this.utterance.volume = options?.volume ?? 1.0; - - // Set up event handlers - this.utterance.onstart = () => { - this.state = 'playing'; - this.callbacks.onStateChange?.(this.state); - }; - + if (this.utterance.voice) { + this.utterance.lang = this.utterance.voice.lang; + } + + this.utterance.onstart = () => this.updateState('playing'); this.utterance.onend = () => { - this.state = 'idle'; + this.updateState('idle'); this.utterance = null; - this.callbacks.onStateChange?.(this.state); this.callbacks.onEnd?.(); }; - this.utterance.onerror = (event) => { - this.state = 'error'; - const error = new Error(`Speech synthesis error: ${event.error}`); - this.callbacks.onError?.(error); - this.callbacks.onStateChange?.(this.state); + this.updateState('error'); + this.callbacks.onError?.(new Error(`Speech synthesis error: ${event.error}`)); }; - - // Speak + this.synth.speak(this.utterance); - this.state = 'playing'; - this.callbacks.onStateChange?.(this.state); + this.updateState('playing'); } - + async pause(): Promise { if (this.state === 'playing' && this.synth.speaking) { this.synth.pause(); - this.state = 'paused'; - this.callbacks.onStateChange?.(this.state); + this.updateState('paused'); } } - + async resume(): Promise { if (this.state === 'paused' && this.synth.paused) { this.synth.resume(); - this.state = 'playing'; - this.callbacks.onStateChange?.(this.state); + this.updateState('playing'); } } - + async stop(): Promise { if (this.synth.speaking || this.synth.paused) { this.synth.cancel(); } this.utterance = null; - this.state = 'idle'; - this.callbacks.onStateChange?.(this.state); + this.updateState('idle'); } - + getState(): TTSState { return this.state; } - + async getProgress(): Promise { - // Web Speech API doesn't provide progress, so we estimate based on time - // This is a simplified implementation - return 0; + return 0; // Web Speech API doesn't provide progress } - + async setProgress(position: number): Promise { - // Web Speech API doesn't support seeking - // Would need to stop and restart at new position - if (this.currentText && position >= 0 && position <= 1) { - const charIndex = Math.floor(this.currentText.length * position); - const newText = this.currentText.substring(charIndex); - await this.stop(); - await this.speak(newText, this.currentOptions); + // Not supported - would need to stop and restart + } + + destroy(): void { + this.stop(); + this.callbacks = {}; + } + + setCallbacks(callbacks: TTSEventCallbacks): void { + this.callbacks = { ...this.callbacks, ...callbacks }; + } + + private updateState(newState: TTSState): void { + if (this.state !== newState) { + this.state = newState; + this.callbacks.onStateChange?.(newState); } } +} + +/** + * OpenAI TTS Provider + */ +class OpenAIProvider extends AudioProvider { + readonly name = 'OpenAI TTS'; + readonly type: TTSProvider = 'openai'; + private apiKey: string | null = null; + + constructor(apiKey?: string) { + super(); + this.apiKey = apiKey || null; + } + + setApiKey(apiKey: string): void { + this.apiKey = apiKey; + } + + async isAvailable(): Promise { + return this.apiKey !== null && typeof window !== 'undefined'; + } + + async getVoices(): Promise { + return [ + { id: 'alloy', name: 'Alloy', lang: 'en', provider: 'openai' }, + { id: 'echo', name: 'Echo', lang: 'en', provider: 'openai' }, + { id: 'fable', name: 'Fable', lang: 'en', provider: 'openai' }, + { id: 'onyx', name: 'Onyx', lang: 'en', provider: 'openai' }, + { id: 'nova', name: 'Nova', lang: 'en', provider: 'openai' }, + { id: 'shimmer', name: 'Shimmer', lang: 'en', provider: 'openai' } + ]; + } + + async speak(text: string, options?: TTSOptions): Promise { + if (!this.apiKey) { + throw new Error('OpenAI API key not set'); + } + + this.stop(); + if (!text.trim()) return; + + const voice = options?.voice || (await this.getVoices())[0]; + const speed = Math.max(0.25, Math.min(4.0, options?.speed ?? 1.0)); + + try { + this.updateState('synthesizing'); + + const response = await fetch('https://api.openai.com/v1/audio/speech', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${this.apiKey}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: 'tts-1', + input: text, + voice: voice.id, + speed + }) + }); + + if (!response.ok) { + const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } })); + const errorMessage = error.error?.message || response.statusText; + + if (response.status === 429 || errorMessage.toLowerCase().includes('quota')) { + const quotaError = new Error(`OpenAI TTS quota exceeded: ${errorMessage}`); + (quotaError as any).isQuotaError = true; + throw quotaError; + } + + throw new Error(`OpenAI TTS error: ${errorMessage}`); + } + + const audioBlob = await response.blob(); + const audioUrl = URL.createObjectURL(audioBlob); + + this.setupAudioElement(audioUrl, options?.volume ?? 1.0); + await this.audioElement!.play(); + this.updateState('playing'); + } catch (error) { + this.updateState('error'); + const err = error instanceof Error ? error : new Error('Failed to speak text'); + this.callbacks.onError?.(err); + throw err; + } + } + destroy(): void { this.stop(); this.callbacks = {}; + this.apiKey = null; } +} + +/** + * Piper TTS Provider + */ +class PiperProvider extends AudioProvider { + readonly name = 'Piper TTS'; + readonly type: TTSProvider = 'piper'; - setCallbacks(callbacks: TTSEventCallbacks): void { - this.callbacks = { ...this.callbacks, ...callbacks }; + private serverUrl: string | null = null; + private abortController: AbortController | null = null; + private timeoutId: ReturnType | null = null; + + private readonly defaultVoices: TTSVoice[] = [ + // English (US) - all quality levels + { id: 'en_US-lessac-low', name: 'English (US) - Lessac Low', lang: 'en-US', provider: 'piper' }, + { id: 'en_US-lessac-medium', name: 'English (US) - Lessac Medium', lang: 'en-US', provider: 'piper' }, + { id: 'en_US-lessac-high', name: 'English (US) - Lessac High', lang: 'en-US', provider: 'piper' }, + // English (GB) + { id: 'en_GB-alba-medium', name: 'English (GB) - Alba Medium', lang: 'en-GB', provider: 'piper' }, + // German + { id: 'de_DE-thorsten-low', name: 'German - Thorsten Low', lang: 'de-DE', provider: 'piper' }, + { id: 'de_DE-thorsten-medium', name: 'German - Thorsten Medium', lang: 'de-DE', provider: 'piper' }, + // French + { id: 'fr_FR-siwis-low', name: 'French - Siwis Low', lang: 'fr-FR', provider: 'piper' }, + { id: 'fr_FR-siwis-medium', name: 'French - Siwis Medium', lang: 'fr-FR', provider: 'piper' }, + // Spanish + { id: 'es_ES-davefx-medium', name: 'Spanish - Davefx Medium', lang: 'es-ES', provider: 'piper' }, + // Italian - riccardo voices not available + // Russian + { id: 'ru_RU-ruslan-medium', name: 'Russian - Ruslan Medium', lang: 'ru-RU', provider: 'piper' }, + // Chinese + { id: 'zh_CN-huayan-medium', name: 'Chinese - Huayan Medium', lang: 'zh-CN', provider: 'piper' }, + // Arabic - hafez voice not available + // Polish + { id: 'pl_PL-darkman-medium', name: 'Polish - Darkman Medium', lang: 'pl-PL', provider: 'piper' }, + // Portuguese - edresson voice not available + // Dutch + { id: 'nl_NL-mls-medium', name: 'Dutch - MLS Medium', lang: 'nl-NL', provider: 'piper' }, + // Czech + { id: 'cs_CZ-jirka-medium', name: 'Czech - Jirka Medium', lang: 'cs-CZ', provider: 'piper' }, + // Turkish + { id: 'tr_TR-dfki-medium', name: 'Turkish - DFKI Medium', lang: 'tr-TR', provider: 'piper' }, + // Japanese - nanami voice not available + // Korean - kyungha voice not available + ]; + + constructor(serverUrl?: string) { + super(); + this.serverUrl = serverUrl || null; + } + + async isAvailable(): Promise { + if (typeof window === 'undefined') return false; + return this.serverUrl !== null || typeof Worker !== 'undefined'; + } + + async getVoices(): Promise { + return this.defaultVoices; + } + + async initialize(): Promise { + // Server-based synthesis doesn't need initialization + } + + async speak(text: string, options?: TTSOptions): Promise { + if (!text.trim()) return; + + this.stop(); + this.updateState('synthesizing'); + + if (!this.serverUrl) { + throw new Error('Piper TTS server URL not configured'); + } + + const voice = options?.voice || this.defaultVoices[0]; + const speed = Math.max(0.25, Math.min(2.0, options?.speed ?? 1.0)); + + console.log('PiperProvider: Using voice:', voice.id, 'from options:', options?.voice?.id || 'default'); + + try { + // Create abort controller for cancellation + this.abortController = new AbortController(); + this.timeoutId = setTimeout(() => { + console.log('Piper TTS: Request timeout'); + this.abortController?.abort(); + }, 300000); // 5 minutes + + const response = await fetch('/api/piper-tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text, voice: voice.id, speed }), + signal: this.abortController.signal, + }); + + // Clear timeout on success + if (this.timeoutId) { + clearTimeout(this.timeoutId); + this.timeoutId = null; + } + + if (!response.ok) { + const errorText = await response.text().catch(() => response.statusText); + throw new Error(`Piper TTS server error: ${response.status} ${errorText}`); + } + + const audioBlob = await response.blob(); + + if (audioBlob.size === 0) { + throw new Error('Received empty audio blob from Piper TTS server'); + } + + const audioUrl = URL.createObjectURL(audioBlob); + this.setupAudioElement(audioUrl, options?.volume ?? 1.0); + await this.audioElement!.play(); + this.updateState('playing'); + } catch (error) { + // Clear abort controller and timeout + this.abortController = null; + if (this.timeoutId) { + clearTimeout(this.timeoutId); + this.timeoutId = null; + } + + if (error instanceof Error && error.name === 'AbortError') { + console.log('Piper TTS: Request cancelled'); + this.updateState('idle'); + return; // Don't throw on cancellation + } + + this.updateState('error'); + const err = error instanceof Error ? error : new Error('Failed to speak text'); + this.callbacks.onError?.(err); + throw err; + } + } + + async stop(): Promise { + // Abort ongoing fetch request + if (this.abortController) { + console.log('Piper TTS: Aborting request'); + this.abortController.abort(); + this.abortController = null; + } + if (this.timeoutId) { + clearTimeout(this.timeoutId); + this.timeoutId = null; + } + await super.stop(); + } + + destroy(): void { + this.stop(); + this.callbacks = {}; } } @@ -176,146 +493,143 @@ export class TTSService { private provider: TTSProviderInterface | null = null; private providerType: TTSProvider = 'webspeech'; private callbacks: TTSEventCallbacks = {}; - - /** - * Initialize TTS service with a provider - */ + async initialize(providerType: TTSProvider = 'webspeech'): Promise { - // Cleanup existing provider if (this.provider) { this.provider.destroy(); } - + this.providerType = providerType; - - // Create provider + if (providerType === 'webspeech') { - try { - this.provider = new WebSpeechProvider(); - const available = await this.provider.isAvailable(); - if (!available) { - throw new Error('Web Speech API not available'); - } - if (this.provider.setCallbacks) { - this.provider.setCallbacks(this.callbacks); - } - } catch (error) { - console.error('Failed to initialize Web Speech API:', error); - throw error; - } + this.provider = new WebSpeechProvider(); } else if (providerType === 'openai') { - // TODO: Implement OpenAI provider - throw new Error('OpenAI TTS provider not yet implemented'); - } else if (providerType === 'elevenlabs') { - // TODO: Implement ElevenLabs provider - throw new Error('ElevenLabs TTS provider not yet implemented'); + const { loadEncryptedApiKey } = await import('../security/api-key-storage.js'); + const password = prompt('Enter your password to access OpenAI API key:'); + if (!password) { + throw new Error('Password required to access OpenAI API key'); + } + + const apiKey = await loadEncryptedApiKey('tts.openai', password); + if (!apiKey || !apiKey.startsWith('sk-')) { + throw new Error('Invalid or missing OpenAI API key'); + } + + this.provider = new OpenAIProvider(apiKey); + } else if (providerType === 'piper') { + const serverUrl = localStorage.getItem('piper_tts_server_url') || 'http://localhost:5000'; + this.provider = new PiperProvider(serverUrl); + await (this.provider as PiperProvider).initialize(); } else { throw new Error(`Unknown TTS provider: ${providerType}`); } + + if (this.provider.setCallbacks) { + this.provider.setCallbacks(this.callbacks); + } } - - /** - * Check if current provider is available - */ + async isAvailable(): Promise { if (!this.provider) { - await this.initialize(); + // Try to auto-initialize with best available provider + try { + const { hasApiKey } = await import('../security/api-key-storage.js'); + if (await hasApiKey('tts.openai')) { + try { + await this.initialize('openai'); + return true; + } catch { + // Fall through + } + } + } catch { + // Ignore + } + + try { + const serverUrl = localStorage.getItem('piper_tts_server_url') || 'http://localhost:5000'; + const provider = new PiperProvider(serverUrl); + if (await provider.isAvailable()) { + await this.initialize('piper'); + return true; + } + } catch { + // Fall through + } + + await this.initialize('webspeech'); } + return this.provider ? await this.provider.isAvailable() : false; } - - /** - * Get available voices - */ + async getVoices(): Promise { if (!this.provider) { await this.initialize(); } return this.provider ? await this.provider.getVoices() : []; } - - /** - * Speak text - */ + async speak(text: string, options?: TTSOptions): Promise { if (!this.provider) { await this.initialize(); } + if (this.provider) { - await this.provider.speak(text, options); + try { + await this.provider.speak(text, options); + } catch (error) { + // Auto-fallback from OpenAI to Web Speech on quota error + if (error instanceof Error && (error as any).isQuotaError && this.providerType === 'openai') { + console.warn('OpenAI TTS quota exceeded, falling back to Web Speech API'); + await this.initialize('webspeech'); + if (this.provider) { + await this.provider.speak(text, options); + return; + } + } + throw error; + } } } - - /** - * Pause playback - */ + async pause(): Promise { - if (this.provider) { - await this.provider.pause(); - } + await this.provider?.pause(); } - - /** - * Resume playback - */ + async resume(): Promise { - if (this.provider) { - await this.provider.resume(); - } + await this.provider?.resume(); } - - /** - * Stop playback - */ + async stop(): Promise { - if (this.provider) { - await this.provider.stop(); - } + await this.provider?.stop(); } - - /** - * Get current state - */ + getState(): TTSState { - return this.provider ? this.provider.getState() : 'idle'; + return this.provider?.getState() ?? 'idle'; } - - /** - * Get current progress - */ + async getProgress(): Promise { return this.provider ? await this.provider.getProgress() : 0; } - - /** - * Set playback position - */ + async setProgress(position: number): Promise { if (this.provider) { await this.provider.setProgress(position); } } - - /** - * Set event callbacks - */ + setCallbacks(callbacks: TTSEventCallbacks): void { this.callbacks = { ...this.callbacks, ...callbacks }; - if (this.provider && this.provider.setCallbacks) { + if (this.provider?.setCallbacks) { this.provider.setCallbacks(this.callbacks); } } - - /** - * Get current provider type - */ + getProviderType(): TTSProvider { return this.providerType; } - - /** - * Cleanup - */ + destroy(): void { if (this.provider) { this.provider.destroy(); @@ -328,9 +642,6 @@ export class TTSService { // Singleton instance let ttsServiceInstance: TTSService | null = null; -/** - * Get TTS service instance - */ export function getTTSService(): TTSService { if (!ttsServiceInstance) { ttsServiceInstance = new TTSService(); diff --git a/src/lib/services/tts/types.ts b/src/lib/services/tts/types.ts index ef75a6e..3c063bd 100644 --- a/src/lib/services/tts/types.ts +++ b/src/lib/services/tts/types.ts @@ -5,7 +5,7 @@ /** * TTS provider type */ -export type TTSProvider = 'webspeech' | 'openai' | 'elevenlabs'; +export type TTSProvider = 'webspeech' | 'openai' | 'elevenlabs' | 'piper'; /** * TTS voice configuration @@ -21,7 +21,7 @@ export interface TTSVoice { /** * TTS playback state */ -export type TTSState = 'idle' | 'playing' | 'paused' | 'error'; +export type TTSState = 'idle' | 'synthesizing' | 'playing' | 'paused' | 'error'; /** * TTS options diff --git a/src/routes/api/piper-tts/+server.ts b/src/routes/api/piper-tts/+server.ts new file mode 100644 index 0000000..71647e0 --- /dev/null +++ b/src/routes/api/piper-tts/+server.ts @@ -0,0 +1,995 @@ +import type { RequestHandler } from './$types'; + +interface TTSRequest { + text: string; + voice?: string; + speed?: number; +} + +/** + * Proxy endpoint for Piper TTS using Wyoming protocol (TCP) + * Wyoming protocol: JSON messages newline-delimited, then raw binary audio + */ +export const POST: RequestHandler = async ({ request }) => { + console.log('Piper TTS API: Request received'); + try { + const body: TTSRequest = await request.json(); + const { text, voice, speed } = body; + + console.log('Piper TTS API: Processing request', { textLength: text?.length, voice, speed, voiceType: typeof voice, voiceValue: voice }); + + if (!text?.trim()) { + console.error('Piper TTS API: Missing text field'); + return errorResponse(400, 'Missing required field: text'); + } + + // Filter and prepare text + const filteredText = filterCryptographicContent(text); + if (!filteredText.trim()) { + console.warn('Piper TTS API: Text is empty after filtering'); + return errorResponse(400, 'Text contains only cryptographic addresses/IDs that cannot be read aloud'); + } + + const sentences = splitIntoSentences(filteredText); + const fullText = sentences.filter(s => s.trim().length > 0).join(' '); + console.log(`Piper TTS API: Processing ${sentences.length} sentences, total length: ${fullText.length}`); + + // Use provided voice, or auto-detect language and select voice if not provided + let selectedVoice = voice; + if (!selectedVoice || selectedVoice.trim() === '') { + const detectedLang = detectLanguage(fullText); + selectedVoice = getVoiceForLanguage(detectedLang); + console.log(`Piper TTS API: No voice provided, auto-detected language: ${detectedLang}, selected voice: ${selectedVoice}`); + } else { + console.log(`Piper TTS API: Using provided voice: ${selectedVoice}`); + } + + // Stream audio response with cancellation support + const abortController = new AbortController(); + let wyomingCleanup: (() => void) | null = null; + + const stream = new ReadableStream({ + async start(controller) { + try { + const audioChunks: Uint8Array[] = []; + let audioFormat: { rate: number; width: number; channels: number } | null = null; + let totalBytes = 0; + + const tcpConfig = getTcpConfig(); + console.log('Piper TTS API: Connecting to Wyoming server at', tcpConfig.hostname, 'port', tcpConfig.port); + + await synthesizeWithWyoming( + tcpConfig, + fullText, + selectedVoice, + speed, + abortController.signal, + (cleanup) => { + wyomingCleanup = cleanup; + }, + (chunk: Uint8Array, format?: { rate: number; width: number; channels: number }) => { + if (abortController.signal.aborted) return; + + if (format && !audioFormat) { + audioFormat = format; + console.log('Piper TTS API: Received audio format:', format); + } + if (chunk.length > 0) { + audioChunks.push(chunk); + totalBytes += chunk.length; + } + } + ); + + if (abortController.signal.aborted) { + console.log('Piper TTS API: Synthesis aborted'); + controller.close(); + return; + } + + if (!audioFormat || totalBytes === 0) { + throw new Error('No audio data received from Wyoming server'); + } + + console.log('Piper TTS API: Collected audio, total size:', totalBytes, 'bytes'); + + const format = audioFormat as { rate: number; width: number; channels: number }; + const wavHeader = createWavHeader(format.rate, format.width, format.channels, totalBytes); + controller.enqueue(wavHeader); + + for (const chunk of audioChunks) { + if (abortController.signal.aborted) break; + controller.enqueue(chunk); + } + + controller.close(); + } catch (error) { + if (abortController.signal.aborted) { + console.log('Piper TTS API: Operation cancelled'); + controller.close(); + } else { + console.error('Piper TTS API: Streaming error:', error); + controller.error(error); + } + } + }, + cancel() { + console.log('Piper TTS API: Stream cancelled by client'); + abortController.abort(); + if (wyomingCleanup) { + wyomingCleanup(); + } + } + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'audio/wav', + 'Transfer-Encoding': 'chunked', + 'Access-Control-Allow-Origin': '*', + }, + }); + } catch (error) { + const message = error instanceof Error ? error.message : 'Unknown error'; + console.error('Piper TTS API error:', message); + return errorResponse(500, message); + } +}; + +/** + * Synthesize speech using Wyoming protocol + * Protocol flow (standard): + * 1. Send: {"type":"synthesize","data":{"text":"..."}}\n + * 2. Receive format: {"rate":22050,"width":2,"channels":1}\n + * 3. Receive raw binary audio (no delimiters) + * 4. Optionally receive: {"type":"done"}\n or connection closes + * + * Some implementations may send audio-chunk messages: + * - {"type":"audio-chunk","payload_length":N}\n followed by N bytes of binary audio + * - These may arrive before or after the format message + * - We handle both standard and audio-chunk variants for compatibility + */ +async function synthesizeWithWyoming( + config: { hostname: string; port: number }, + text: string, + voice: string | undefined, + speed: number | undefined, + abortSignal: AbortSignal, + onCleanup: (cleanup: () => void) => void, + onChunk: (chunk: Uint8Array, format?: { rate: number; width: number; channels: number }) => void +): Promise { + const net = await import('net'); + + return new Promise((resolve, reject) => { + let socket: import('net').Socket | null = null; + let buffer = Buffer.alloc(0); + let audioFormat: { rate: number; width: number; channels: number } | null = null; + let hasReceivedAudio = false; + let isResolved = false; + let lastDataTime = Date.now(); + let completionTimer: NodeJS.Timeout | null = null; + const preFormatAudioChunks: Uint8Array[] = []; // Buffer audio chunks received before format + let hasProcessedAudioChunks = false; // Track if we've processed audio-chunk messages + + console.log('Wyoming: Creating TCP connection to', config.hostname, 'port', config.port); + + const cleanup = () => { + if (socket && !socket.destroyed) { + console.log('Wyoming: Cleaning up TCP connection'); + socket.destroy(); + } + }; + + // Register cleanup function + onCleanup(cleanup); + + // Check if already aborted + if (abortSignal.aborted) { + console.log('Wyoming: Abort signal already set, not connecting'); + reject(new Error('Operation cancelled')); + return; + } + + // Listen for abort signal + const abortHandler = () => { + console.log('Wyoming: Abort signal received, cleaning up'); + if (completionTimer) { + clearTimeout(completionTimer); + completionTimer = null; + } + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + reject(new Error('Operation cancelled')); + } + }; + abortSignal.addEventListener('abort', abortHandler); + + const timeout = setTimeout(() => { + cleanup(); + if (!isResolved) { + isResolved = true; + console.error('Wyoming: Timeout after 5 minutes'); + reject(new Error('Wyoming protocol timeout')); + } + }, 300000); // 5 minutes + + try { + socket = net.createConnection(config.port, config.hostname, () => { + console.log('Wyoming: TCP connected successfully'); + // Send synthesize request + // Wyoming protocol expects voice as an object with 'name' property, not a plain string + const message = { + type: 'synthesize', + data: { + text, + ...(voice ? { voice: { name: voice } } : {}), + ...(speed !== undefined && speed !== 1.0 ? { speed } : {}), + } + }; + const messageStr = JSON.stringify(message) + '\n'; + console.log('Wyoming: Sending synthesize message, text length:', text.length, 'voice:', voice ? `{name: "${voice}"}` : 'none (will use default)'); + console.log('Wyoming: Full message:', messageStr.trim()); + try { + socket!.write(messageStr); + console.log('Wyoming: Synthesize message sent'); + } catch (writeError) { + console.error('Wyoming: Failed to write message:', writeError); + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + reject(new Error(`Failed to send message: ${writeError instanceof Error ? writeError.message : String(writeError)}`)); + } + } + }); + } catch (error) { + console.error('Wyoming: Failed to create connection:', error); + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + reject(new Error(`Failed to create connection: ${error instanceof Error ? error.message : String(error)}`)); + } + return; + } + + socket.on('data', (data: Buffer) => { + // Check if aborted + if (abortSignal.aborted) { + console.log('Wyoming: Aborted, ignoring data'); + return; + } + + lastDataTime = Date.now(); + + // Clear completion timer since we're receiving data + if (completionTimer) { + clearTimeout(completionTimer); + completionTimer = null; + } + + console.log('Wyoming: Received data, size:', data.length, 'bytes, audioFormat:', audioFormat ? 'received' : 'not received'); + buffer = Buffer.concat([buffer, data]); + + // Process buffer + while (buffer.length > 0) { + // Check if aborted during processing + if (abortSignal.aborted) { + console.log('Wyoming: Aborted during buffer processing'); + break; + } + + // After format received, check for "done" message, audio-chunk messages, or process as raw audio + if (audioFormat) { + // Check if buffer starts with JSON (for done/error/audio-chunk messages) + if (buffer.length > 0 && buffer[0] === 0x7b) { // '{' byte + const newlineIndex = buffer.indexOf('\n'); + if (newlineIndex !== -1) { + try { + const line = buffer.subarray(0, newlineIndex).toString('utf8').trim(); + const message = JSON.parse(line); + + if (message.type === 'done') { + console.log('Wyoming: Received done message'); + if (completionTimer) { + clearTimeout(completionTimer); + completionTimer = null; + } + buffer = buffer.subarray(newlineIndex + 1); + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + resolve(); + } + return; + } + + if (message.type === 'error') { + console.error('Wyoming: Received error message:', message.message); + buffer = buffer.subarray(newlineIndex + 1); + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + reject(new Error(message.message || 'Wyoming protocol error')); + } + return; + } + + if (message.type === 'audio-stop') { + console.log('Wyoming: Received audio-stop message'); + buffer = buffer.subarray(newlineIndex + 1); + if (completionTimer) { + clearTimeout(completionTimer); + completionTimer = null; + } + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + resolve(); + } + return; + } + + // Handle audio-chunk messages after format + if (message.type === 'audio-chunk' && typeof message.payload_length === 'number') { + const payloadLength = message.payload_length; + const messageEnd = newlineIndex + 1; + // If data_length is specified, there's additional JSON data before the payload + const dataLength = typeof message.data_length === 'number' ? message.data_length : 0; + const payloadStart = messageEnd + dataLength; + const payloadEnd = payloadStart + payloadLength; + + if (buffer.length >= payloadEnd) { + const audioPayload = new Uint8Array(buffer.subarray(payloadStart, payloadEnd)); + onChunk(audioPayload); + hasReceivedAudio = true; + hasProcessedAudioChunks = true; + buffer = buffer.subarray(payloadEnd); + continue; // Continue processing loop + } else { + // Don't have full payload yet - wait for more data + break; + } + } + } catch (error) { + // Not valid JSON - treat as raw audio + // Fall through to raw audio processing + } + } else { + // No newline yet - might be incomplete JSON, wait for more data + break; + } + } + + // No JSON message found - process all buffer as raw audio + if (buffer.length > 0) { + onChunk(new Uint8Array(buffer)); + hasReceivedAudio = true; + buffer = Buffer.alloc(0); + } + + // If we've received audio and buffer is empty, set a completion timer + // This handles cases where the server doesn't send "done" or close connection + if (hasReceivedAudio && buffer.length === 0 && !completionTimer) { + completionTimer = setTimeout(() => { + if (!isResolved && hasReceivedAudio && !abortSignal.aborted) { + console.log('Wyoming: No data received for 500ms after audio, assuming completion'); + cleanup(); + clearTimeout(timeout); + isResolved = true; + resolve(); + } + }, 500); // 500ms timeout after last data + } + + // Break and wait for more data (could be more audio or "done" message) + break; + } + + // Before format: scan buffer for JSON format message + // Look for '{' followed by newline-delimited JSON + let formatFound = false; + let searchStart = 0; + + while (searchStart < buffer.length && !formatFound) { + const braceIndex = buffer.indexOf(0x7b, searchStart); // '{' byte + if (braceIndex === -1) { + // No more '{' found - this is all binary data, buffer it + break; + } + + // Look for newline after this '{' + const newlineIndex = buffer.indexOf('\n', braceIndex); + if (newlineIndex === -1) { + // No newline yet - wait for more data + break; + } + + // Try to parse as JSON + const lineBytes = buffer.subarray(braceIndex, newlineIndex); + const line = lineBytes.toString('utf8').trim(); + + if (line.endsWith('}')) { + try { + const message = JSON.parse(line); + console.log('Wyoming: Received message:', JSON.stringify(message)); + + // Check for audio-start message (contains format info) + if (message.type === 'audio-start' && (message.rate !== undefined || message.channels !== undefined)) { + audioFormat = { + rate: message.rate, + width: message.width || 2, + channels: message.channels, + }; + console.log('Wyoming: Audio format from audio-start:', audioFormat); + + // Send format notification + onChunk(new Uint8Array(0), audioFormat); + + // Process any buffered audio chunks + if (preFormatAudioChunks.length > 0) { + console.log('Wyoming: Processing', preFormatAudioChunks.length, 'buffered audio chunks after audio-start'); + for (const chunk of preFormatAudioChunks) { + onChunk(chunk); + hasReceivedAudio = true; + } + preFormatAudioChunks.length = 0; + hasProcessedAudioChunks = true; + } + + buffer = buffer.subarray(newlineIndex + 1); + searchStart = 0; + continue; + } + + // Check for format message (can be a standalone format object or embedded in other messages) + if (message.rate !== undefined || message.channels !== undefined) { + audioFormat = { + rate: message.rate, + width: message.width || 2, + channels: message.channels, + }; + console.log('Wyoming: Audio format:', audioFormat); + + // Remove everything up to and including the format message + const dataAfterFormat = buffer.subarray(newlineIndex + 1); + + // Send format notification first + onChunk(new Uint8Array(0), audioFormat); + + // Process any buffered audio chunks received before format + if (preFormatAudioChunks.length > 0) { + console.log('Wyoming: Processing', preFormatAudioChunks.length, 'buffered audio chunks'); + for (const chunk of preFormatAudioChunks) { + onChunk(chunk); + hasReceivedAudio = true; + } + preFormatAudioChunks.length = 0; // Clear the buffer + hasProcessedAudioChunks = true; + } + + // Process any raw data before format as audio (protocol violation, but handle it) + // BUT: Skip this if we've already processed audio-chunk messages, as that data + // is likely protocol overhead or corrupted, not actual audio + if (braceIndex > 0 && !hasProcessedAudioChunks) { + const preFormatData = buffer.subarray(0, braceIndex); + // Only process if it's not empty and looks like audio (not JSON) + // Also check that it's a reasonable size (not just a few bytes of protocol overhead) + if (preFormatData.length > 0 && preFormatData[0] !== 0x7b && preFormatData.length > 100) { + console.warn('Wyoming: Processing', braceIndex, 'bytes of raw data received before format message as audio'); + onChunk(new Uint8Array(preFormatData)); + hasReceivedAudio = true; + } else if (preFormatData.length > 0 && preFormatData.length <= 100) { + console.warn('Wyoming: Skipping', preFormatData.length, 'bytes of data before format (likely protocol overhead)'); + } + } else if (braceIndex > 0 && hasProcessedAudioChunks) { + console.warn('Wyoming: Skipping', braceIndex, 'bytes of data before format (audio-chunk messages already processed)'); + } + + // Process data after format as audio + if (dataAfterFormat.length > 0) { + onChunk(new Uint8Array(dataAfterFormat)); + hasReceivedAudio = true; + } + + buffer = Buffer.alloc(0); + formatFound = true; + continue; // Continue processing loop + } + + // Check for done/error messages + if (message.type === 'done') { + console.log('Wyoming: Received done message'); + buffer = buffer.subarray(newlineIndex + 1); + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + if (hasReceivedAudio) { + resolve(); + } else { + reject(new Error('No audio data received')); + } + } + return; + } + + if (message.type === 'error') { + console.error('Wyoming: Received error message:', message.message); + buffer = buffer.subarray(newlineIndex + 1); + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + reject(new Error(message.message || 'Wyoming protocol error')); + } + return; + } + + // Handle audio-stop message + if (message.type === 'audio-stop') { + console.log('Wyoming: Received audio-stop message'); + buffer = buffer.subarray(newlineIndex + 1); + + // If we have buffered audio chunks but no format, use default format + if (preFormatAudioChunks.length > 0 && !audioFormat) { + console.warn('Wyoming: Format message never received, using default format for', preFormatAudioChunks.length, 'buffered chunks'); + // Default Piper TTS format: 22050 Hz, 16-bit (width=2), mono (channels=1) + audioFormat = { + rate: 22050, + width: 2, + channels: 1, + }; + console.log('Wyoming: Using default audio format:', audioFormat); + + // Send format notification + onChunk(new Uint8Array(0), audioFormat); + + // Process buffered chunks + for (const chunk of preFormatAudioChunks) { + onChunk(chunk); + hasReceivedAudio = true; + } + preFormatAudioChunks.length = 0; + hasProcessedAudioChunks = true; + } + + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + if (hasReceivedAudio) { + resolve(); + } else { + reject(new Error('No audio data received')); + } + } + return; + } + + // Handle audio-chunk messages + if (message.type === 'audio-chunk' && typeof message.payload_length === 'number') { + const payloadLength = message.payload_length; + const messageEnd = newlineIndex + 1; + // If data_length is specified, there's additional JSON data before the payload + const dataLength = typeof message.data_length === 'number' ? message.data_length : 0; + const payloadStart = messageEnd + dataLength; + const payloadEnd = payloadStart + payloadLength; + + console.log('Wyoming: Processing audio-chunk, payload_length:', payloadLength, 'data_length:', dataLength, 'buffer length:', buffer.length, 'payloadStart:', payloadStart, 'payloadEnd:', payloadEnd); + + // Check if we have the full payload + if (buffer.length >= payloadEnd) { + // If there's data_length, try to parse the format from that data + if (dataLength > 0 && !audioFormat) { + const dataBytes = buffer.subarray(messageEnd, payloadStart); + try { + const dataStr = dataBytes.toString('utf8'); + const formatData = JSON.parse(dataStr); + if (formatData.rate !== undefined || formatData.channels !== undefined) { + audioFormat = { + rate: formatData.rate, + width: formatData.width || 2, + channels: formatData.channels, + }; + console.log('Wyoming: Found format in data section:', audioFormat); + onChunk(new Uint8Array(0), audioFormat); + } + } catch (e) { + console.warn('Wyoming: Failed to parse data section as JSON:', e); + } + } + + // Extract the audio payload (after the data section) + const audioPayload = new Uint8Array(buffer.subarray(payloadStart, payloadEnd)); + console.log('Wyoming: Extracted audio payload:', audioPayload.length, 'bytes, first 8 bytes:', Array.from(audioPayload.slice(0, 8)).map(b => '0x' + b.toString(16).padStart(2, '0')).join(' ')); + + // Check if format is embedded in the audio-chunk message itself + if (!audioFormat && (message.rate !== undefined || message.channels !== undefined)) { + audioFormat = { + rate: message.rate || 22050, + width: message.width || 2, + channels: message.channels || 1, + }; + console.log('Wyoming: Found format in audio-chunk message:', audioFormat); + onChunk(new Uint8Array(0), audioFormat); + } + + // If we have format, process it as audio; otherwise buffer it + if (audioFormat) { + onChunk(audioPayload); + hasReceivedAudio = true; + hasProcessedAudioChunks = true; + } else { + // Buffer audio chunks until we get format + preFormatAudioChunks.push(audioPayload); + console.log('Wyoming: Buffering audio-chunk payload of', payloadLength, 'bytes (format not yet received)'); + hasProcessedAudioChunks = true; // Mark that we've seen audio-chunk messages + } + + // Remove the message and payload from buffer + buffer = buffer.subarray(payloadEnd); + searchStart = 0; // Reset search to start of buffer + continue; + } else { + // Don't have full payload yet - wait for more data + console.log('Wyoming: Waiting for more data, need', payloadEnd, 'have', buffer.length); + break; + } + } + + // Other JSON message - skip it and continue searching + searchStart = newlineIndex + 1; + } catch (error) { + // Not valid JSON - continue searching + searchStart = braceIndex + 1; + } + } else { + // Incomplete JSON - continue searching + searchStart = braceIndex + 1; + } + } + + // If we found format, continue processing; otherwise wait for more data + if (!formatFound) { + break; + } + } + }); + + socket.on('error', (error: Error) => { + console.error('Wyoming: TCP error:', error.message); + abortSignal.removeEventListener('abort', abortHandler); + cleanup(); + clearTimeout(timeout); + if (!isResolved) { + isResolved = true; + reject(new Error(`TCP error: ${error.message}`)); + } + }); + + socket.on('close', () => { + console.log('Wyoming: Connection closed, hasReceivedAudio:', hasReceivedAudio, 'buffer length:', buffer.length, 'buffered chunks:', preFormatAudioChunks.length); + if (completionTimer) { + clearTimeout(completionTimer); + completionTimer = null; + } + abortSignal.removeEventListener('abort', abortHandler); + cleanup(); + clearTimeout(timeout); + + // If we have buffered audio chunks but no format, use default format + if (!abortSignal.aborted && preFormatAudioChunks.length > 0 && !audioFormat) { + console.warn('Wyoming: Format message never received before connection close, using default format for', preFormatAudioChunks.length, 'buffered chunks'); + // Default Piper TTS format: 22050 Hz, 16-bit (width=2), mono (channels=1) + audioFormat = { + rate: 22050, + width: 2, + channels: 1, + }; + console.log('Wyoming: Using default audio format:', audioFormat); + + // Send format notification + onChunk(new Uint8Array(0), audioFormat); + + // Process buffered chunks + for (const chunk of preFormatAudioChunks) { + onChunk(chunk); + hasReceivedAudio = true; + } + preFormatAudioChunks.length = 0; + hasProcessedAudioChunks = true; + } + + // Only process remaining buffer if not aborted + if (!abortSignal.aborted && buffer.length > 0 && audioFormat) { + console.log('Wyoming: Streaming remaining buffer:', buffer.length, 'bytes'); + onChunk(new Uint8Array(buffer)); + hasReceivedAudio = true; + } + + if (!isResolved) { + isResolved = true; + if (abortSignal.aborted) { + console.log('Wyoming: Connection closed after abort'); + reject(new Error('Operation cancelled')); + } else if (hasReceivedAudio) { + console.log('Wyoming: Resolving - audio received'); + resolve(); + } else { + console.error('Wyoming: Rejecting - no audio received'); + reject(new Error('Connection closed without audio data')); + } + } + }); + }); +} + +function getTcpConfig(): { hostname: string; port: number } { + // Allow override via environment variable + const piperHost = process.env.PIPER_TTS_HOST || process.env.PIPER_HOST; + const piperPort = process.env.PIPER_TTS_PORT || process.env.PIPER_PORT; + + if (piperHost && piperPort) { + return { + hostname: piperHost, + port: parseInt(piperPort, 10), + }; + } + + // Default: use Docker service name in production, localhost in development + const isDevelopment = process.env.NODE_ENV === 'development'; + return { + hostname: isDevelopment ? 'localhost' : 'piper-tts', + port: 10200, + }; +} + +function createWavHeader(sampleRate: number, bytesPerSample: number, channels: number, dataSize: number): Uint8Array { + const header = new ArrayBuffer(44); + const view = new DataView(header); + + // RIFF header + view.setUint8(0, 0x52); // 'R' + view.setUint8(1, 0x49); // 'I' + view.setUint8(2, 0x46); // 'F' + view.setUint8(3, 0x46); // 'F' + view.setUint32(4, 36 + dataSize, true); // File size - 8 + + // WAVE header + view.setUint8(8, 0x57); // 'W' + view.setUint8(9, 0x41); // 'A' + view.setUint8(10, 0x56); // 'V' + view.setUint8(11, 0x45); // 'E' + + // fmt chunk + view.setUint8(12, 0x66); // 'f' + view.setUint8(13, 0x6D); // 'm' + view.setUint8(14, 0x74); // 't' + view.setUint8(15, 0x20); // ' ' + view.setUint32(16, 16, true); // fmt chunk size + view.setUint16(20, 1, true); // Audio format (1 = PCM) + view.setUint16(22, channels, true); // Number of channels + view.setUint32(24, sampleRate, true); // Sample rate + view.setUint32(28, sampleRate * channels * bytesPerSample, true); // Byte rate + view.setUint16(32, channels * bytesPerSample, true); // Block align + view.setUint16(34, bytesPerSample * 8, true); // Bits per sample + + // data chunk + view.setUint8(36, 0x64); // 'd' + view.setUint8(37, 0x61); // 'a' + view.setUint8(38, 0x74); // 't' + view.setUint8(39, 0x61); // 'a' + view.setUint32(40, dataSize, true); // Data size + + return new Uint8Array(header); +} + +function filterCryptographicContent(text: string): string { + let filtered = text; + + // Remove URLs + filtered = filtered.replace(/https?:\/\/[^\s]+/gi, ''); + filtered = filtered.replace(/www\.[^\s]+/gi, ''); + + // Remove Nostr URIs and bech32 addresses + filtered = filtered.replace(/nostr:[^\s]+/gi, ''); + filtered = filtered.replace(/\b(npub|note|nevent|naddr|nprofile|nsec|ncryptsec)1[a-z0-9]{20,}\b/gi, ''); + + // Remove hex strings + filtered = filtered.replace(/\b[0-9a-f]{64}\b/gi, ''); + filtered = filtered.replace(/\b[0-9a-f]{32,63}\b/gi, ''); + + // Remove emojis + filtered = filtered.replace(/[\u{1F300}-\u{1F9FF}]/gu, ''); + filtered = filtered.replace(/[\u{1F600}-\u{1F64F}]/gu, ''); + filtered = filtered.replace(/[\u{2600}-\u{26FF}]/gu, ''); + filtered = filtered.replace(/[\u{2700}-\u{27BF}]/gu, ''); + + // Remove markdown and asciidoc markup + + // Code blocks (markdown and asciidoc) + filtered = filtered.replace(/```[\s\S]*?```/g, ''); + filtered = filtered.replace(/`[^`]+`/g, ''); + filtered = filtered.replace(/----[\s\S]*?----/g, ''); // AsciiDoc code blocks + filtered = filtered.replace(/\[source[^\]]*\][\s\S]*?----/g, ''); // AsciiDoc source blocks + + // Headers (markdown and asciidoc) + filtered = filtered.replace(/^#+\s+/gm, ''); // Markdown headers at start of line + filtered = filtered.replace(/\s+#+\s+/g, ' '); // Markdown headers in middle of text + filtered = filtered.replace(/^=+\s*$/gm, ''); // AsciiDoc headers (single line) + filtered = filtered.replace(/^=+\s+/gm, ''); // AsciiDoc headers at start of line + filtered = filtered.replace(/\s+=+\s+/g, ' '); // AsciiDoc headers in middle of text + + // Links (markdown and asciidoc) + filtered = filtered.replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1'); // Markdown links + filtered = filtered.replace(/\[\[([^\]]+)\]\]/g, '$1'); // AsciiDoc links + filtered = filtered.replace(/link:([^\[]+)\[([^\]]+)\]/g, '$2'); // AsciiDoc link: syntax + + // Images (markdown and asciidoc) + filtered = filtered.replace(/!\[([^\]]*)\]\([^\)]+\)/g, ''); // Markdown images + filtered = filtered.replace(/image::?[^\[]+\[([^\]]*)\]/g, '$1'); // AsciiDoc images + + // Emphasis and formatting + filtered = filtered.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold markdown + filtered = filtered.replace(/\*([^*]+)\*/g, '$1'); // Italic markdown + filtered = filtered.replace(/__([^_]+)__/g, '$1'); // Bold markdown (underscore) + filtered = filtered.replace(/_([^_]+)_/g, '$1'); // Italic markdown (underscore) + filtered = filtered.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold asciidoc + filtered = filtered.replace(/\*([^*]+)\*/g, '$1'); // Italic asciidoc + filtered = filtered.replace(/\+\+([^+]+)\+\+/g, '$1'); // Monospace asciidoc + filtered = filtered.replace(/~~([^~]+)~~/g, '$1'); // Strikethrough markdown + + // Lists (markdown and asciidoc) + filtered = filtered.replace(/^[\*\-\+]\s+/gm, ''); // Markdown unordered lists + filtered = filtered.replace(/^\d+\.\s+/gm, ''); // Markdown ordered lists + filtered = filtered.replace(/^\.\s+/gm, ''); // AsciiDoc unordered lists + filtered = filtered.replace(/^\d+\.\s+/gm, ''); // AsciiDoc ordered lists + + // Blockquotes + filtered = filtered.replace(/^>\s+/gm, ''); // Markdown blockquotes + filtered = filtered.replace(/^\[quote[^\]]*\][\s\S]*?\[quote\]/g, ''); // AsciiDoc quotes + + // Horizontal rules + filtered = filtered.replace(/^[-*_]{3,}\s*$/gm, ''); // Markdown horizontal rules + filtered = filtered.replace(/^'''+\s*$/gm, ''); // AsciiDoc horizontal rules + + // Tables (markdown and asciidoc) + filtered = filtered.replace(/\|/g, ' '); // Remove table separators + filtered = filtered.replace(/^\|.+\|\s*$/gm, ''); // Remove table rows + filtered = filtered.replace(/^\[cols?=[^\]]*\][\s\S]*?\|===\s*$/gm, ''); // AsciiDoc tables + + // Other asciidoc syntax + filtered = filtered.replace(/\[\[([^\]]+)\]\]/g, ''); // AsciiDoc anchors + filtered = filtered.replace(/\[NOTE\]/gi, ''); + filtered = filtered.replace(/\[TIP\]/gi, ''); + filtered = filtered.replace(/\[WARNING\]/gi, ''); + filtered = filtered.replace(/\[IMPORTANT\]/gi, ''); + filtered = filtered.replace(/\[CAUTION\]/gi, ''); + filtered = filtered.replace(/\[source[^\]]*\]/gi, ''); + filtered = filtered.replace(/\[caption[^\]]*\]/gi, ''); + + // Clean up whitespace + filtered = filtered.replace(/\s+/g, ' ').trim(); + + return filtered; +} + +function splitIntoSentences(text: string): string[] { + const cleaned = text + .replace(/^#+\s+/gm, '') + .replace(/\n+/g, ' ') + .trim(); + + const sentences: string[] = []; + const regex = /([.!?]+)\s+/g; + let lastIndex = 0; + let match; + + while ((match = regex.exec(cleaned)) !== null) { + const sentence = cleaned.substring(lastIndex, match.index + match[1].length).trim(); + if (sentence.length > 0) { + sentences.push(sentence); + } + lastIndex = match.index + match[0].length; + } + + const remaining = cleaned.substring(lastIndex).trim(); + if (remaining.length > 0) { + sentences.push(remaining); + } + + return sentences.length > 0 ? sentences : [cleaned]; +} + +function errorResponse(status: number, message: string): Response { + return new Response(JSON.stringify({ error: message }), { + status, + headers: { 'Content-Type': 'application/json' }, + }); +} + +/** + * Simple language detection based on character patterns + * Returns language code (e.g., 'en', 'de', 'fr', 'es', etc.) + */ +function detectLanguage(text: string): string { + if (!text || text.length === 0) return 'en'; + + // Count character patterns to detect language + const sample = text.substring(0, Math.min(500, text.length)); + + // German: ä, ö, ü, ß + const germanChars = (sample.match(/[äöüßÄÖÜ]/g) || []).length; + // French: é, è, ê, ç, à, etc. + const frenchChars = (sample.match(/[éèêëàâäçôùûüÉÈÊËÀÂÄÇÔÙÛÜ]/g) || []).length; + // Spanish: ñ, á, é, í, ó, ú, ¿, ¡ + const spanishChars = (sample.match(/[ñáéíóúüÑÁÉÍÓÚÜ¿¡]/g) || []).length; + // Italian: à, è, é, ì, ò, ù + const italianChars = (sample.match(/[àèéìòùÀÈÉÌÒÙ]/g) || []).length; + // Russian/Cyrillic + const cyrillicChars = (sample.match(/[а-яёА-ЯЁ]/g) || []).length; + // Chinese/Japanese/Korean (CJK) + const cjkChars = (sample.match(/[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g) || []).length; + // Arabic + const arabicChars = (sample.match(/[\u0600-\u06ff]/g) || []).length; + + // Calculate ratios + const total = sample.length; + const germanRatio = germanChars / total; + const frenchRatio = frenchChars / total; + const spanishRatio = spanishChars / total; + const italianRatio = italianChars / total; + const cyrillicRatio = cyrillicChars / total; + const cjkRatio = cjkChars / total; + const arabicRatio = arabicChars / total; + + // Detect based on highest ratio + if (cyrillicRatio > 0.1) return 'ru'; + if (cjkRatio > 0.1) return 'zh'; // Default to Chinese for CJK + if (arabicRatio > 0.1) return 'ar'; + if (germanRatio > 0.02) return 'de'; + if (frenchRatio > 0.02) return 'fr'; + if (spanishRatio > 0.02) return 'es'; + if (italianRatio > 0.02) return 'it'; + + // Default to English + return 'en'; +} + +/** + * Map language code to Piper voice name + * Returns voice name (always returns a value, defaults to English) + * Voice names follow pattern: {lang}_{locale}-{voice}-{quality} + * + * Note: These are common voice names. You may need to adjust based on + * which voices are actually available in your piper-data directory. + * To see available voices, check the piper-data folder or Wyoming server logs. + */ +function getVoiceForLanguage(lang: string): string { + // Common voice mappings - adjust based on available voices in your piper-data directory + const voiceMap: Record = { + 'en': 'en_US-lessac-medium', // Default English voice + 'de': 'de_DE-thorsten-medium', // German + 'fr': 'fr_FR-siwis-medium', // French + 'es': 'es_ES-davefx-medium', // Spanish + // 'it': 'it_IT-riccardo-medium', // Italian - not available + 'ru': 'ru_RU-ruslan-medium', // Russian + 'zh': 'zh_CN-huayan-medium', // Chinese + // 'ar': 'ar_SA-hafez-medium', // Arabic - not available + 'pl': 'pl_PL-darkman-medium', // Polish + // 'pt': 'pt_BR-edresson-medium', // Portuguese - not available + 'nl': 'nl_NL-mls-medium', // Dutch + 'cs': 'cs_CZ-jirka-medium', // Czech + 'tr': 'tr_TR-dfki-medium', // Turkish + // 'ja': 'ja_JP-nanami-medium', // Japanese - not available + // 'ko': 'ko_KR-kyungha-medium', // Korean - not available + }; + + return voiceMap[lang] || voiceMap['en']; // Fall back to English +} diff --git a/svelte.config.js b/svelte.config.js index a3848ff..3fd1dae 100644 --- a/svelte.config.js +++ b/svelte.config.js @@ -1,4 +1,4 @@ -import adapter from '@sveltejs/adapter-static'; +import adapter from '@sveltejs/adapter-node'; import { vitePreprocess } from '@sveltejs/vite-plugin-svelte'; /** @type {import('@sveltejs/kit').Config} */ @@ -11,11 +11,8 @@ const config = { }, kit: { adapter: adapter({ - pages: 'build', - assets: 'build', - fallback: '200.html', - precompress: true, - strict: true + out: 'build', + precompress: true }), prerender: { handleUnseenRoutes: 'ignore'