Add docker support

This commit is contained in:
Mikhail Yevchenko
2026-04-01 20:41:52 +00:00
parent 27e97adbc8
commit a468a7a268
13 changed files with 417 additions and 609 deletions
+152 -144
View File
@@ -3,7 +3,9 @@ import os
import time
from typing import Optional
from urllib.parse import unquote
from urllib.parse import urlparse
import yt_dlp
from yt_dlp.networking import Request
logger = logging.getLogger(__name__)
@@ -52,6 +54,75 @@ def _set_cached_info(video_url: str, info: dict) -> None:
_cache_timestamps[key] = time.time()
# store segment mappings per video
_segment_maps = {}
def _get_segment_id(full_url: str) -> str:
"""Build a stable segment id that survives signed query refreshes."""
import hashlib
parsed = urlparse(full_url)
stable_key = parsed.path or full_url.split("?", 1)[0]
return hashlib.md5(stable_key.encode("utf-8")).hexdigest()
def _refresh_hls_url(video_url: str, attempts: int = 3) -> Optional[str]:
"""Re-extract until yt-dlp returns an HLS URL or we exhaust retries."""
last_info = None
for _ in range(attempts):
_session_cache.pop(video_url, None)
_cache_timestamps.pop(video_url, None)
info = _get_video_info(video_url)
last_info = info
if info.get("hls_url"):
return info["hls_url"]
if last_info and last_info.get("direct_url"):
logger.info("Extractor returned direct URL but no HLS URL")
return None
def _get_request_headers(video_url: str) -> dict:
info = _get_video_info(video_url)
raw_info = info.get("raw_info") or {}
return dict(raw_info.get("http_headers") or {})
def _fetch_url(video_url: str, url: str) -> bytes:
ydl = _get_ydl()
request = Request(url, headers=_get_request_headers(video_url))
with ydl.urlopen(request) as response:
return response.read()
def _populate_nested_maps(video_url: str, content: str, base_url: str, video_id: str, visited: Optional[set[str]] = None, depth: int = 0) -> None:
"""Preload nested playlists so segment ids survive rebuilds after 410s."""
from urllib.parse import urljoin, urlparse
if visited is None:
visited = set()
if depth >= 3:
return
for line in content.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
parsed = urlparse(line)
full_url = line if parsed.scheme else urljoin(base_url, line)
if not urlparse(full_url).path.endswith(".m3u8") or full_url in visited:
continue
visited.add(full_url)
try:
nested_content = _fetch_url(video_url, full_url).decode("utf-8")
_rewrite_urls(nested_content, video_url, full_url, video_id)
_populate_nested_maps(video_url, nested_content, full_url, video_id, visited, depth + 1)
except Exception as e:
logger.info("Failed to preload nested playlist: %s", e)
def _extract_hls_url(info: dict) -> Optional[str]:
"""Extract HLS URL from yt-dlp info dict."""
# First check top-level fields (these are set when there's only one format)
@@ -183,29 +254,27 @@ def get_stream_info(video_url: str) -> dict:
def get_hls_playlist(video_url: str) -> str:
"""Get HLS playlist content with rewritten URLs."""
import urllib.request
import urllib.error
# First call _get_video_info to ensure cache is populated (yt-dlp quirk)
info = _get_video_info(video_url)
hls_url = info.get("hls_url")
if not hls_url:
raise ValueError("No HLS stream available for this video")
hls_url = _refresh_hls_url(video_url)
if not hls_url:
raise ValueError("No HLS stream available for this video")
from utils import get_video_id
video_id = get_video_id(video_url)
# Try to get playlist, retry once if URL expired
for attempt in range(2):
try:
with urllib.request.urlopen(hls_url, timeout=SOCKET_TIMEOUT) as response:
playlist_content = response.read().decode("utf-8")
return _rewrite_urls(playlist_content, video_url, hls_url)
except urllib.error.HTTPError as e:
if e.code == 410 and attempt == 0:
# Clear cache and fetch fresh HLS URL
_session_cache.pop(video_url, None)
_cache_timestamps.pop(video_url, None)
playlist_content = _fetch_url(video_url, hls_url).decode("utf-8")
rewritten = _rewrite_urls(playlist_content, video_url, hls_url, video_id)
_populate_nested_maps(video_url, playlist_content, hls_url, video_id)
return rewritten
except Exception as e:
if "410" in str(e) and attempt == 0:
logger.info("HLS URL expired, fetching fresh HLS URL")
info = _get_video_info(video_url)
hls_url = info.get("hls_url")
hls_url = _refresh_hls_url(video_url)
if not hls_url:
raise ValueError("No HLS stream available for this video")
continue
@@ -220,162 +289,101 @@ def get_direct_video_url(video_url: str) -> str:
return info["direct_url"]
def _rewrite_urls(content: str, video_url: str, base_url: str) -> str:
def _rewrite_urls(content: str, video_url: str, base_url: str, video_id: str) -> str:
"""Rewrite relative URLs in HLS playlist to point through proxy."""
from urllib.parse import urljoin, quote, urlparse, parse_qs, urlencode
# URL encode the video URL for safe path usage
encoded_video_url = quote(video_url, safe="")
# Parse base URL to get directory path and query
base_parsed = urlparse(base_url)
base_path = base_parsed.path
base_query = parse_qs(base_parsed.query)
# Get directory path (remove the .m3u8 filename)
dir_path = base_path.rsplit("/", 1)[0]
from urllib.parse import urljoin, urlparse
lines = content.split("\n")
new_lines = []
# persist mapping across nested playlists
if video_url not in _segment_maps:
_segment_maps[video_url] = {}
segment_map = _segment_maps[video_url]
for line in lines:
if line and not line.startswith("#"):
parsed = urlparse(line)
if parsed.scheme:
# Absolute URL - extract just the path component
# e.g., https://example.com/video/segment.ts -> segment.ts
filename = quote(parsed.path.split("/")[-1], safe="")
if parsed.query:
filename += "?" + quote(parsed.query, safe="")
full_url = line
else:
# Relative URL - use as-is (with query params if any)
filename = quote(line, safe="")
# New format: /hls/<encoded_video_url>--<filename> (-- is delimiter)
proxy_url = f"/hls/{encoded_video_url}--{filename}"
full_url = urljoin(base_url, line)
# stable id must ignore expiring signatures in query strings
seg_id = _get_segment_id(full_url)
segment_map[seg_id] = full_url
proxy_url = f"/hls/{video_id}/seg/{seg_id}"
new_lines.append(proxy_url)
continue
new_lines.append(line)
# mapping already updated in-place
return "\n".join(new_lines)
def get_hls_segment(video_url: str, segment_url: str) -> bytes:
"""Get HLS segment or sub-playlist content."""
import urllib.request
import urllib.error
from urllib.parse import unquote, urlparse, parse_qs, urlencode
# Get the base URL from yt-dlp cache
info = _get_video_info(video_url)
hls_url = info.get("hls_url")
if not hls_url:
raise ValueError("No HLS URL available")
# Parse the HLS URL to get base path
base_parsed = urlparse(hls_url)
base_path = base_parsed.path.rsplit("/", 1)[0]
base_query = parse_qs(base_parsed.query)
# Check if it's a playlist (regardless of query params)
is_playlist = unquote(segment_url).split("?")[0].endswith(".m3u8")
# Reconstruct full URL from filename
filename = unquote(segment_url)
if "?" in filename:
rel_path, rel_query = filename.split("?", 1)
rel_qs = parse_qs(rel_query)
full_url = f"{base_parsed.scheme}://{base_parsed.netloc}{base_path}/{rel_path}"
merged_qs = {**base_query, **rel_qs}
if merged_qs:
full_url += "?" + urlencode(merged_qs, doseq=True)
else:
full_url = f"{base_parsed.scheme}://{base_parsed.netloc}{base_path}/{filename}"
# Pure mapping-based resolution (no yt-dlp dependency here)
# New format: segment_url is index
seg_id = segment_url
segment_map = _segment_maps.get(video_url)
if not segment_map:
# build mapping on-demand to avoid state coupling
_ = get_hls_playlist(video_url)
segment_map = _segment_maps.get(video_url)
if not segment_map:
raise ValueError("No segment map available")
if seg_id not in segment_map:
# try rebuild once to refresh mappings (e.g., after expiry)
_ = get_hls_playlist(video_url)
segment_map = _segment_maps.get(video_url) or {}
if seg_id not in segment_map:
raise ValueError("Segment not found")
full_url = segment_map[seg_id]
try:
response = urllib.request.urlopen(full_url, timeout=SOCKET_TIMEOUT)
data = response.read()
except urllib.error.HTTPError as e:
if e.code == 410:
raise ValueError("HLS URL expired (410 Gone)")
raise
data = _fetch_url(video_url, full_url)
except Exception as e:
raise ValueError("HLS URL expired (410 Gone)") from e
# Detect playlist dynamically (covers sub-playlists too)
try:
from utils import get_video_id
video_id = get_video_id(video_url)
text = data.decode("utf-8", errors="ignore")
head = text.lstrip()[:200]
if "#EXTM3U" in head:
rewritten = _rewrite_urls(text, video_url, full_url, video_id)
return rewritten.encode("utf-8")
except Exception:
pass
if is_playlist:
return _rewrite_urls(data.decode("utf-8"), video_url, full_url).encode("utf-8")
return data
def get_hls_segment_with_retry(video_url: str, segment_url: str) -> bytes:
"""Get HLS segment with retry on 410 error (refetches sub-playlist if needed)."""
from urllib.parse import unquote
# Check if this is a segment (not a playlist)
is_segment = not unquote(segment_url).split("?")[0].endswith(".m3u8")
"""Get HLS segment with one rebuild after signed URL expiry."""
for attempt in range(2):
try:
if video_url not in _segment_maps:
_ = get_hls_playlist(video_url)
return get_hls_segment(video_url, segment_url)
except ValueError as e:
if "410 Gone" in str(e) and attempt == 0:
if is_segment:
# For segments: re-fetch the sub-playlist (which has fresh segment URLs)
logger.info("Segment URL expired, re-fetching sub-playlist")
# Get fresh HLS URL
info = _get_video_info(video_url)
hls_url = info.get("hls_url")
if not hls_url:
raise ValueError("No HLS stream available")
# Fetch the sub-playlist from the fresh HLS URL
import urllib.request
from urllib.parse import urlparse, parse_qs, urlencode
# Get base path from HLS URL
parsed = urlparse(hls_url)
base_path = parsed.path.rsplit("/", 1)[0]
base_query = parse_qs(parsed.query)
# Find sub-playlist in main playlist
with urllib.request.urlopen(hls_url, timeout=SOCKET_TIMEOUT) as response:
playlist_content = response.read().decode("utf-8")
# Extract sub-playlist filename from first #EXT-X-STREAM-INF
sub_playlist_path = None
for line in playlist_content.split("\n"):
if line.startswith("#EXT-X-STREAM-INF:"):
continue
elif line and not line.startswith("#"):
sub_playlist_path = line
break
if not sub_playlist_path:
raise ValueError("Could not find sub-playlist URL")
# Build full sub-playlist URL with fresh tokens
if "?" in sub_playlist_path:
rel_path, rel_query = sub_playlist_path.split("?", 1)
rel_qs = parse_qs(rel_query)
full_url = f"{parsed.scheme}://{parsed.netloc}{base_path}/{rel_path}"
merged_qs = {**base_query, **rel_qs}
full_url += "?" + urlencode(merged_qs, doseq=True)
else:
full_url = f"{parsed.scheme}://{parsed.netloc}{base_path}/{sub_playlist_path}"
logger.info(f"Fetching fresh sub-playlist: {full_url[:100]}...")
# Fetch sub-playlist content
with urllib.request.urlopen(full_url, timeout=SOCKET_TIMEOUT) as response:
sub_content = response.read().decode("utf-8")
# Rewrite URLs in sub-playlist
rewritten = _rewrite_urls(sub_content, video_url, full_url)
logger.info(f"Rewritten sub-playlist (first 200 chars): {rewritten[:200]}...")
return rewritten.encode("utf-8")
else:
# For sub-playlist: clear cache and retry
_session_cache.pop(video_url, None)
_cache_timestamps.pop(video_url, None)
logger.info("Sub-playlist expired, refetching")
if "410 Gone" in str(e):
if attempt == 0:
logger.info("Segment 410, retrying")
continue
logger.info("Segment still 410, rebuilding playlist and map")
_session_cache.pop(video_url, None)
_cache_timestamps.pop(video_url, None)
_segment_maps.pop(video_url, None)
_ = get_hls_playlist(video_url)
return get_hls_segment(video_url, segment_url)
raise