Files
xmpp-radio-tower/ytdlp_handler.py
2025-12-13 18:26:54 +00:00

337 lines
11 KiB
Python

#!/usr/bin/env python3
"""
YouTube-DLP Integration Module.
Provides async wrappers for searching and downloading content via yt-dlp.
Includes parsing logic for yt-dlp's JSON output and progress updates.
"""
import os
import asyncio
import subprocess
import json
import re
from pathlib import Path
from typing import Optional, List, Dict, Callable
from dataclasses import dataclass
import logging
logger = logging.getLogger(__name__)
@dataclass
class SearchResult:
"""Represents a single search result from yt-dlp."""
id: str
title: str
duration: float
uploader: str
url: str
thumbnail: Optional[str] = None
view_count: int = 0
@property
def duration_str(self) -> str:
"""Formats duration seconds into HH:MM:SS string."""
try:
total_seconds = int(self.duration) if self.duration else 0
if total_seconds < 0:
return "??:??"
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60
if hours > 0:
return f"{hours}:{minutes:02d}:{seconds:02d}"
return f"{minutes}:{seconds:02d}"
except (TypeError, ValueError):
return "??:??"
def to_dict(self) -> Dict:
return {
'id': self.id,
'title': self.title,
'duration': self.duration,
'duration_str': self.duration_str,
'uploader': self.uploader,
'url': self.url,
'thumbnail': self.thumbnail,
'view_count': self.view_count
}
@dataclass
class DownloadProgress:
"""Real-time status of an active download."""
filename: str
status: str
percent: float = 0.0
speed: str = ""
eta: str = ""
error: str = ""
class YtDlpHandler:
"""Manages yt-dlp subprocesses."""
def __init__(
self,
download_directory: str,
audio_format: str = "bestaudio",
max_filesize: int = 50,
max_search_results: int = 10
):
self.download_directory = Path(download_directory)
self.download_directory.mkdir(parents=True, exist_ok=True)
self.audio_format = audio_format
self.max_filesize = max_filesize * 1024 * 1024 if max_filesize > 0 else 0
self.max_search_results = max_search_results
self._ytdlp_path = self._find_ytdlp()
self._active_downloads: Dict[str, asyncio.subprocess.Process] = {}
def _find_ytdlp(self) -> str:
"""Locates the yt-dlp binary in the system path."""
for name in ['yt-dlp', 'yt-dlp.exe', 'youtube-dl']:
try:
result = subprocess.run(
[name, '--version'],
capture_output=True,
text=True
)
if result.returncode == 0:
logger.info(f"Found yt-dlp: {name} (version {result.stdout.strip()})")
return name
except FileNotFoundError:
continue
raise RuntimeError("yt-dlp not found. Install with: pip install yt-dlp")
async def search(self, query: str, source: str = "youtube") -> List[SearchResult]:
"""Performs a non-download search using yt-dlp's internal search operators."""
search_prefix = {
"youtube": "ytsearch",
"soundcloud": "scsearch",
"bandcamp": "bcsearch"
}.get(source, "ytsearch")
search_query = f"{search_prefix}{self.max_search_results}:{query}"
cmd = [
self._ytdlp_path,
search_query,
'--dump-json',
'--no-download',
'--flat-playlist',
'--ignore-errors',
'--no-warnings'
]
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await asyncio.wait_for(
process.communicate(),
timeout=60
)
results = []
for line in stdout.decode().strip().split('\n'):
if line:
try:
data = json.loads(line)
duration = data.get('duration')
if duration is None:
duration = 0
else:
try:
duration = float(duration)
except (TypeError, ValueError):
duration = 0
result = SearchResult(
id=data.get('id', ''),
title=data.get('title', 'Unknown'),
duration=duration,
uploader=data.get('uploader', data.get('channel', 'Unknown')) or 'Unknown',
url=data.get('url', data.get('webpage_url', '')),
thumbnail=data.get('thumbnail'),
view_count=int(data.get('view_count', 0) or 0)
)
results.append(result)
except json.JSONDecodeError:
continue
except Exception as e:
logger.debug(f"Error parsing result: {e}")
continue
return results
except asyncio.TimeoutError:
logger.error("Search timed out")
return []
except Exception as e:
logger.error(f"Search error: {e}")
return []
async def download(
self,
url: str,
progress_callback: Optional[Callable[[DownloadProgress], None]] = None
) -> Optional[str]:
"""Downloads audio and converts it to Opus."""
output_template = str(self.download_directory / "%(title)s.%(ext)s")
cmd = [
self._ytdlp_path,
url,
'-x',
'--audio-format', 'opus',
'--audio-quality', '0',
'-o', output_template,
'--no-playlist',
'--no-warnings',
'--newline',
'--restrict-filenames',
]
if self.max_filesize > 0:
cmd.extend(['--max-filesize', str(self.max_filesize)])
cmd.extend(['-f', self.audio_format])
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
download_id = url
self._active_downloads[download_id] = process
output_file = None
async for line in process.stdout:
line = line.decode().strip()
logger.debug(f"yt-dlp: {line}")
if progress_callback:
progress = self._parse_progress(line)
if progress:
progress_callback(progress)
# Parse stdout to find where the file is being saved
if '[download] Destination:' in line:
output_file = line.split('Destination:', 1)[1].strip()
elif 'has already been downloaded' in line:
match = re.search(r'\[download\] (.+) has already been downloaded', line)
if match:
output_file = match.group(1)
elif '[ExtractAudio] Destination:' in line:
output_file = line.split('Destination:', 1)[1].strip()
await process.wait()
if download_id in self._active_downloads:
del self._active_downloads[download_id]
if process.returncode == 0:
if not output_file or not os.path.exists(output_file):
# Fallback: check most recent file in download dir
recent_files = sorted(
self.download_directory.glob('*.opus'),
key=lambda x: x.stat().st_mtime,
reverse=True
)
if recent_files:
output_file = str(recent_files[0])
if output_file and os.path.exists(output_file):
logger.info(f"Downloaded: {output_file}")
return output_file
stderr_output = await process.stderr.read()
logger.error(f"Download failed: {stderr_output.decode()}")
return None
except asyncio.CancelledError:
if download_id in self._active_downloads:
self._active_downloads[download_id].terminate()
del self._active_downloads[download_id]
raise
except Exception as e:
logger.error(f"Download error: {e}")
return None
def _parse_progress(self, line: str) -> Optional[DownloadProgress]:
"""Parses standard yt-dlp stdout progress lines."""
percent_match = re.search(r'(\d+\.?\d*)%', line)
speed_match = re.search(r'at\s+(\S+/s)', line)
eta_match = re.search(r'ETA\s+(\S+)', line)
if percent_match:
return DownloadProgress(
filename="",
status="downloading",
percent=float(percent_match.group(1)),
speed=speed_match.group(1) if speed_match else "",
eta=eta_match.group(1) if eta_match else ""
)
if 'Deleting original file' in line or '[ExtractAudio]' in line:
return DownloadProgress(
filename="",
status="converting",
percent=100.0
)
return None
async def get_info(self, url: str) -> Optional[Dict]:
cmd = [
self._ytdlp_path,
url,
'--dump-json',
'--no-download',
'--no-warnings'
]
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, _ = await asyncio.wait_for(
process.communicate(),
timeout=30
)
if process.returncode == 0:
return json.loads(stdout.decode())
except Exception as e:
logger.error(f"Error getting info: {e}")
return None
def cancel_download(self, url: str) -> bool:
if url in self._active_downloads:
self._active_downloads[url].terminate()
return True
return False
def cancel_all_downloads(self):
for process in self._active_downloads.values():
process.terminate()
self._active_downloads.clear()