Enhance HLS proxy functionality and improve caching mechanism

- Updated AGENTS.md to clarify dlp.py module usage and segment handling.
- Modified README.md to include ALLOW_LOCAL configuration for testing.
- Refactored app.py to streamline HLS proxy logic and improve error handling.
- Enhanced dlp.py to optimize caching and segment retrieval processes.
- Updated player.html to ensure proper JSON formatting for proxy URLs.
- Improved test_integration.py to validate HLS segment proxying and added test for Pornhub HLS extraction.
- Adjusted test_proxy.py to reflect changes in caching functions and data structure.
This commit is contained in:
Mikhail Yevchenko
2026-04-01 12:47:21 +00:00
parent 154f600fd2
commit 01a376ae21
7 changed files with 143 additions and 110 deletions
+86 -88
View File
@@ -1,22 +1,18 @@
import logging
import os
import time
import re
from typing import Optional
import yt_dlp
logger = logging.getLogger(__name__)
CACHE_TTL = int(os.getenv("CACHE_TTL", 31536000))
SOCKET_TIMEOUT = int(os.getenv("SOCKET_TIMEOUT", 30))
_session_cache = {}
_cache_timestamps = {}
def _is_hls_url(url: str) -> bool:
return url.endswith(".m3u8") or "m3u8" in url
def _get_cache_key(video_url: str) -> str:
return video_url
@@ -28,110 +24,112 @@ def _is_cache_expired(video_url: str) -> bool:
return time.time() - _cache_timestamps[key] > CACHE_TTL
def _get_cached_session(video_url: str) -> Optional[dict]:
def _get_cached_info(video_url: str) -> Optional[dict]:
key = _get_cache_key(video_url)
if key in _session_cache and not _is_cache_expired(video_url):
return _session_cache[key]
return None
def _set_cached_session(video_url: str, session_data: dict) -> None:
def _set_cached_info(video_url: str, info: dict) -> None:
key = _get_cache_key(video_url)
_session_cache[key] = session_data
_session_cache[key] = info
_cache_timestamps[key] = time.time()
def clear_expired_cache() -> None:
expired_keys = [
key for key in _session_cache
if _is_cache_expired(key)
]
for key in expired_keys:
del _session_cache[key]
del _cache_timestamps[key]
def _extract_hls_url(info: dict) -> Optional[str]:
"""Extract HLS URL from yt-dlp info dict."""
if info.get("formats"):
for f in reversed(info["formats"]):
if f.get("protocol") == "m3u8_native":
url = f.get("manifest_url") or f.get("url")
if url and ".m3u8" in url:
return url
return None
def get_hls_playlist(video_url: str) -> str:
cached = _get_cached_session(video_url)
if cached and "hls_playlist" in cached:
return cached["hls_playlist"]
if _is_hls_url(video_url):
hls_url = video_url
else:
ydl_opts = {
"quiet": True,
"no_warnings": True,
"socket_timeout": int(os.getenv("SOCKET_TIMEOUT", 30)),
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=False)
if not info or "hls" not in info or not info["hls"]:
raise ValueError("No HLS stream available for this video")
hls_url = info["hls"]
import urllib.request
with urllib.request.urlopen(hls_url, timeout=30) as response:
playlist_content = response.read().decode("utf-8")
session_data = {
"hls_playlist": playlist_content,
"hls_url": hls_url,
"video_url": video_url,
}
_set_cached_session(video_url, session_data)
return playlist_content
def get_hls_segment(video_url: str, segment_name: str) -> bytes:
cached = _get_cached_session(video_url)
if not cached or "hls_url" not in cached:
get_hls_playlist(video_url)
cached = _get_cached_session(video_url)
hls_url = cached["hls_url"]
base_url = hls_url.rsplit("/", 1)[0]
if segment_name.startswith("/"):
segment_name = segment_name[1:]
segment_url = f"{base_url}/{segment_name}"
import urllib.request
with urllib.request.urlopen(segment_url, timeout=30) as response:
return response.read()
def get_stream_info(video_url: str) -> dict:
cached = _get_cached_session(video_url)
def _get_video_info(video_url: str) -> dict:
"""Get video info using yt-dlp."""
cached = _get_cached_info(video_url)
if cached:
return cached
if _is_hls_url(video_url):
return {
"title": "Test Video",
"hls_url": video_url,
"thumbnail": None,
}
ydl_opts = {
"quiet": True,
"no_warnings": True,
"socket_timeout": int(os.getenv("SOCKET_TIMEOUT", 30)),
"socket_timeout": SOCKET_TIMEOUT,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=False)
if not info:
raise ValueError("Could not extract video info")
hls_url = _extract_hls_url(info)
result = {
"title": info.get("title"),
"thumbnail": info.get("thumbnail"),
"hls_url": hls_url,
"raw_info": info,
}
_set_cached_info(video_url, result)
return result
return {
"title": info.get("title", "Unknown"),
"hls_url": info.get("hls"),
"thumbnail": info.get("thumbnail"),
}
def get_stream_info(video_url: str) -> dict:
"""Get video info (title, hls_url, thumbnail)."""
info = _get_video_info(video_url)
return {
"title": info["title"],
"hls_url": info["hls_url"],
"thumbnail": info["thumbnail"],
}
def get_hls_playlist(video_url: str) -> str:
"""Get HLS playlist content with rewritten URLs."""
info = _get_video_info(video_url)
if not info["hls_url"]:
raise ValueError("No HLS stream available for this video")
import urllib.request
with urllib.request.urlopen(info["hls_url"], timeout=SOCKET_TIMEOUT) as response:
playlist_content = response.read().decode("utf-8")
return _rewrite_urls(playlist_content, video_url, info["hls_url"])
def _rewrite_urls(content: str, video_url: str, base_url: str) -> str:
"""Rewrite relative URLs in HLS playlist to point through proxy."""
from urllib.parse import urljoin, quote
lines = content.split("\n")
new_lines = []
for line in lines:
if line and not line.startswith("#") and line.startswith("http"):
abs_url = line
elif line and not line.startswith("#"):
abs_url = urljoin(base_url, line)
proxy_url = f"/hls?url={quote(video_url, safe='')}&path={quote(abs_url, safe='')}"
new_lines.append(proxy_url)
continue
new_lines.append(line)
return "\n".join(new_lines)
def get_hls_segment(video_url: str, segment_url: str) -> bytes:
"""Get HLS segment or sub-playlist content."""
from urllib.parse import unquote
decoded_url = unquote(segment_url)
import urllib.request
try:
response = urllib.request.urlopen(decoded_url, timeout=SOCKET_TIMEOUT)
data = response.read()
except urllib.error.HTTPError as e:
if e.code == 410:
raise ValueError("HLS URL expired (410 Gone)")
raise
if decoded_url.endswith(".m3u8"):
return _rewrite_urls(data.decode("utf-8"), video_url, decoded_url).encode("utf-8")
return data