Skillsbench audiobook
Create audiobooks from web content or text files. Handles content fetching, text processing, and TTS conversion with automatic fallback between ElevenLabs, OpenAI TTS, and gTTS.
install
source · Clone the upstream repo
git clone https://github.com/benchflow-ai/skillsbench
Claude Code · Install into ~/.claude/skills/
T=$(mktemp -d) && git clone --depth=1 https://github.com/benchflow-ai/skillsbench "$T" && mkdir -p ~/.claude/skills && cp -r "$T/tasks/pg-essay-to-audiobook/environment/skills/audiobook" ~/.claude/skills/benchflow-ai-skillsbench-audiobook && rm -rf "$T"
manifest:
tasks/pg-essay-to-audiobook/environment/skills/audiobook/SKILL.mdsource content
Audiobook Creation Guide
Create audiobooks from web articles, essays, or text files. This skill covers the full pipeline: content fetching, text processing, and audio generation.
Quick Start
import os # 1. Check which TTS API is available def get_tts_provider(): if os.environ.get("ELEVENLABS_API_KEY"): return "elevenlabs" elif os.environ.get("OPENAI_API_KEY"): return "openai" else: return "gtts" # Free, no API key needed provider = get_tts_provider() print(f"Using TTS provider: {provider}")
Step 1: Fetching Web Content
IMPORTANT: Verify fetched content is complete
WebFetch and similar tools may return summaries instead of full text. Always verify:
import subprocess def fetch_article_content(url): """Fetch article content using curl for reliability.""" # Use curl to get raw HTML - more reliable than web fetch tools result = subprocess.run( ["curl", "-s", url], capture_output=True, text=True ) html = result.stdout # Strip HTML tags (basic approach) import re text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL) text = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL) text = re.sub(r'<[^>]+>', ' ', text) text = re.sub(r'\s+', ' ', text).strip() return text
Content verification checklist
Before converting to audio, verify:
- Text length is reasonable for the source (articles typically 1,000-10,000+ words)
- Content includes actual article text, not just navigation/headers
- No "summary" or "key points" headers that indicate truncation
def verify_content(text, expected_min_chars=1000): """Basic verification that content is complete.""" if len(text) < expected_min_chars: print(f"WARNING: Content may be truncated ({len(text)} chars)") return False if "summary" in text.lower()[:500] or "key points" in text.lower()[:500]: print("WARNING: Content appears to be a summary, not full text") return False return True
Step 2: Text Processing
Clean and prepare text for TTS
import re def clean_text_for_tts(text): """Clean text for better TTS output.""" # Remove URLs text = re.sub(r'http[s]?://\S+', '', text) # Remove footnote markers like [1], [2] text = re.sub(r'\[\d+\]', '', text) # Normalize whitespace text = re.sub(r'\s+', ' ', text) # Remove special characters that confuse TTS text = re.sub(r'[^\w\s.,!?;:\'"()-]', '', text) return text.strip() def chunk_text(text, max_chars=4000): """Split text into chunks at sentence boundaries.""" sentences = re.split(r'(?<=[.!?])\s+', text) chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk) + len(sentence) < max_chars: current_chunk += sentence + " " else: if current_chunk: chunks.append(current_chunk.strip()) current_chunk = sentence + " " if current_chunk: chunks.append(current_chunk.strip()) return chunks
Step 3: TTS Conversion with Fallback
Automatic provider selection
import os import subprocess def create_audiobook(text, output_path): """Convert text to audiobook with automatic TTS provider selection.""" # Check available providers has_elevenlabs = bool(os.environ.get("ELEVENLABS_API_KEY")) has_openai = bool(os.environ.get("OPENAI_API_KEY")) if has_elevenlabs: print("Using ElevenLabs TTS (highest quality)") return create_with_elevenlabs(text, output_path) elif has_openai: print("Using OpenAI TTS (high quality)") return create_with_openai(text, output_path) else: print("Using gTTS (free, no API key required)") return create_with_gtts(text, output_path)
ElevenLabs implementation
import requests def create_with_elevenlabs(text, output_path): """Generate audiobook using ElevenLabs API.""" api_key = os.environ.get("ELEVENLABS_API_KEY") voice_id = "21m00Tcm4TlvDq8ikWAM" # Rachel - calm female voice chunks = chunk_text(text, max_chars=4500) audio_files = [] for i, chunk in enumerate(chunks): chunk_file = f"/tmp/chunk_{i:03d}.mp3" response = requests.post( f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}", headers={ "xi-api-key": api_key, "Content-Type": "application/json" }, json={ "text": chunk, "model_id": "eleven_turbo_v2_5", "voice_settings": {"stability": 0.5, "similarity_boost": 0.75} } ) if response.status_code == 200: with open(chunk_file, "wb") as f: f.write(response.content) audio_files.append(chunk_file) else: print(f"Error: {response.status_code} - {response.text}") return False return concatenate_audio(audio_files, output_path)
OpenAI TTS implementation
def create_with_openai(text, output_path): """Generate audiobook using OpenAI TTS API.""" api_key = os.environ.get("OPENAI_API_KEY") chunks = chunk_text(text, max_chars=4000) audio_files = [] for i, chunk in enumerate(chunks): chunk_file = f"/tmp/chunk_{i:03d}.mp3" response = requests.post( "https://api.openai.com/v1/audio/speech", headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" }, json={ "model": "tts-1", "input": chunk, "voice": "onyx", # Deep male voice, good for essays "response_format": "mp3" } ) if response.status_code == 200: with open(chunk_file, "wb") as f: f.write(response.content) audio_files.append(chunk_file) else: print(f"Error: {response.status_code} - {response.text}") return False return concatenate_audio(audio_files, output_path)
gTTS implementation (free fallback)
def create_with_gtts(text, output_path): """Generate audiobook using gTTS (free, no API key).""" from gtts import gTTS from pydub import AudioSegment chunks = chunk_text(text, max_chars=4500) audio_files = [] for i, chunk in enumerate(chunks): chunk_file = f"/tmp/chunk_{i:03d}.mp3" tts = gTTS(text=chunk, lang='en', slow=False) tts.save(chunk_file) audio_files.append(chunk_file) return concatenate_audio(audio_files, output_path)
Audio concatenation
def concatenate_audio(audio_files, output_path): """Concatenate multiple audio files using ffmpeg.""" if not audio_files: return False # Create file list for ffmpeg list_file = "/tmp/audio_list.txt" with open(list_file, "w") as f: for audio_file in audio_files: f.write(f"file '{audio_file}'\n") # Concatenate with ffmpeg result = subprocess.run([ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", output_path ], capture_output=True) # Cleanup temp files import os for f in audio_files: os.unlink(f) os.unlink(list_file) return result.returncode == 0
Complete Example
#!/usr/bin/env python3 """Create audiobook from web articles.""" import os import re import subprocess import requests # ... include all helper functions above ... def main(): # Fetch articles urls = [ "https://example.com/article1", "https://example.com/article2" ] all_text = "" for url in urls: print(f"Fetching: {url}") text = fetch_article_content(url) if not verify_content(text): print(f"WARNING: Content from {url} may be incomplete") all_text += f"\n\n{text}" # Clean and convert clean_text = clean_text_for_tts(all_text) print(f"Total text: {len(clean_text)} characters") # Create audiobook success = create_audiobook(clean_text, "/root/audiobook.mp3") if success: print("Audiobook created successfully!") else: print("Failed to create audiobook") if __name__ == "__main__": main()
TTS Provider Comparison
| Provider | Quality | Cost | API Key Required | Best For |
|---|---|---|---|---|
| ElevenLabs | Excellent | Paid | Yes | Professional audiobooks |
| OpenAI TTS | Very Good | Paid | Yes | General purpose |
| gTTS | Good | Free | No | Testing, budget projects |
Troubleshooting
"Content appears to be a summary"
- Use
directly instead of web fetch toolscurl - Verify the URL is correct and accessible
- Check if the site requires JavaScript rendering
"API key not found"
- Check environment variables:
echo $OPENAI_API_KEY - Ensure keys are exported in the shell
- Fall back to gTTS if no paid API keys available
"Audio chunks don't sound continuous"
- Ensure chunking happens at sentence boundaries
- Consider adding small pauses between sections
- Use consistent voice settings across all chunks