Compare commits
12 Commits
244a624bb9
..
master
| Author | SHA1 | Date | |
|---|---|---|---|
| a468a7a268 | |||
| 27e97adbc8 | |||
| 3ec080dbd3 | |||
| 9bbbbc5a65 | |||
| 198f85b67d | |||
| 15b9702956 | |||
| 34e49c0d9f | |||
| 01a376ae21 | |||
| 154f600fd2 | |||
| 9f107e388c | |||
| 4548f455a3 | |||
| 30ecd60601 |
@@ -3,10 +3,34 @@
|
||||
|
||||
"image": "mcr.microsoft.com/devcontainers/base:trixie",
|
||||
|
||||
"features": {
|
||||
"ghcr.io/devcontainers/features/python:1.8.0": {
|
||||
"toolsToInstall": "flake8,virtualenv,pytest,pylint"
|
||||
},
|
||||
"ghcr.io/devcontainers-extra/features/apt-get-packages:1.0.8": {
|
||||
"packages": "ffmpeg,nodejs"
|
||||
},
|
||||
"ghcr.io/devcontainers/features/docker-in-docker:2.16.1": {
|
||||
"moby": false
|
||||
}
|
||||
},
|
||||
|
||||
"runArgs": ["--add-host=ollama:host-gateway"],
|
||||
|
||||
"containerEnv": {
|
||||
"OLLAMA_HOST": "ollama:11434"
|
||||
}
|
||||
},
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"forwardPorts": [
|
||||
5000
|
||||
]
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
*
|
||||
|
||||
!*.py
|
||||
!templates/*
|
||||
!tests/*
|
||||
!requirements.txt
|
||||
@@ -4,4 +4,3 @@ __pycache__/
|
||||
.venv/
|
||||
venv/
|
||||
*.log
|
||||
.vscode/
|
||||
Vendored
+23
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Flask",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "flask",
|
||||
"env": {
|
||||
"FLASK_APP": "app.py",
|
||||
"FLASK_ENV": "development"
|
||||
},
|
||||
"args": ["run", "--host=0.0.0.0", "--port=5000"]
|
||||
},
|
||||
{
|
||||
"name": "Pytest",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "pytest",
|
||||
"args": ["tests/", "-v"]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -18,7 +18,7 @@ Obviously, we need to temporarily cache yt-dlp sessions for some period to avoid
|
||||
|
||||
## Implementation
|
||||
|
||||
To implement the yt-dlp proxy server, you can use Python and the Flask library to create a web server. You will also need the yt-dlp library to interact with YouTube and other platforms and get HLS streams.
|
||||
To implement the yt-dlp proxy server, you can use Python and the Flask library to create a web server. You will also need the yt-dlp library to interact with YouTube and other platforms and get HLS streams. Examine yt_dlp/YoutubeDL.py in venv you download to understand how to use yt-dlp for getting HLS playlists and segments.
|
||||
|
||||
As an HTML templating engine, you can use Jinja2, which is built into Flask, for dynamically generating the page with the HLS player based on the video URL. Styles: `<link rel="stylesheet" href="https://unpkg.com/mvp.css">` for a simple and clean design.
|
||||
|
||||
@@ -34,17 +34,28 @@ As an HTML templating engine, you can use Jinja2, which is built into Flask, for
|
||||
8. Errors and logs — only practical minimum: understandable HTTP errors and basic structured logging.
|
||||
9. Configuration only through environment variables: port, cache TTL, log level and timeouts.
|
||||
10. HTTPS not in application: TLS terminates at external reverse proxy (Nginx/Caddy/Traefik), Flask runs behind it.
|
||||
11. Tests only on critical path: URL parsing, cache, playlist and segment proxying, error handling.
|
||||
11. TDD: Write a single integration test that will consist of downloading few video urls. It should query these videos over proxy and check if it works properly (yt-dlp is fully capable substitute for a browser that can be configured to output all necessary debug inforation, such as headers and cookies). Also write tests for critical functions like URL parsing, caching, playlist and segment proxying, and error handling. All test should be in `tests/` folder and use `pytest` as a testing framework. All tests should generate maximum debugging output to make it easy to understand what went wrong in case of failure.
|
||||
12. yt-dlp usage restriction (critical):
|
||||
- yt-dlp MUST be used strictly as a Python library (`import yt_dlp`).
|
||||
- DO NOT invoke yt-dlp via CLI (`yt-dlp` binary or `python -m yt_dlp`) anywhere in the application or tests.
|
||||
- Integration tests MUST simulate playback using the library or HTTP requests through the proxy, not by spawning yt-dlp subprocesses.
|
||||
- Any use of subprocess to call yt-dlp is considered a violation of architecture.
|
||||
12. Documentation and license: only `README.md`, `AGENTS.md` and MIT license.
|
||||
|
||||
### Common Pitfalls
|
||||
|
||||
1. Do not disable tests or skip critical paths. If something is not working, fix it instead of skipping tests.
|
||||
2. Do not create workarounds. They are not allowed. If something is not working, fix it instead of creating a workaround.
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
- app.py - main Flask application file that handles incoming HTTP requests and interacts with yt-dlp through functions from dlp.py.
|
||||
- dlp.py - module for interacting with yt-dlp, containing functions to get HLS playlists and segments.
|
||||
- dlp.py - module for interacting with yt-dlp, containing functions to get HLS playlists and segments. examine yt_dlp/YoutubeDL.py in venv in order to understand how to use yt-dlp for getting HLS playlists and segments
|
||||
functions:
|
||||
- get_hls_playlist(video_url): gets HLS playlist for the specified video as a string that can be returned to the client. The segment list should be filtered to only include those available for the given video and supported by yt-dlp.
|
||||
- get_hls_segment(video_url, segment_name): gets the specified video segment: downloads it using yt-dlp and returns its content as bytes that can be returned to the client. It should also use yt-dlp to download the segment since only yt-dlp can handle the necessary authentication and access control for the video content.
|
||||
it should also rewrite segment filenames in case if they expire during of before download, so that they can be requested through the proxy using predictable URL structure.
|
||||
- get_hls_segment(video_url, segment_filename): gets the specified video segment for rewritten filename: downloads it using yt-dlp and returns its content as bytes that can be returned to the client. It should also use yt-dlp to download the segment since only yt-dlp can handle the necessary authentication and access control for the video content.
|
||||
|
||||
caching:
|
||||
- Caching of yt-dlp sessions will be implemented using a simple in-memory dictionary that will store video parsing results for each VIDEO_ID. No complex in-memory solutions, just a dictionary with TTL for each key. TTL will be set to 365 days, which will effectively cache results and minimize repeated requests to yt-dlp.
|
||||
@@ -52,8 +63,8 @@ As an HTML templating engine, you can use Jinja2, which is built into Flask, for
|
||||
- tests/ - folder for tests that will check critical application paths such as URL parsing, caching, playlist and segment proxying, and error handling.
|
||||
1. functions tests
|
||||
2. integration tests for the main application flow:
|
||||
signle integration test that will consist of server serving a single test video (use ffmpeg for generating it). it should query that server over proxy and check if it works properly.
|
||||
yt-dlp expects from the server a javascript player that it can recognize. also server should set a cookie on the video page and require that cookie for the HLS playlist and segments requests. this will ensure that only requests coming from the video page can access the HLS content, providing a basic level of security and preventing unauthorized access to the video streams.
|
||||
- test that the proxy can successfully retrieve and return HLS playlists and segments for valid video URLs. http logging should display when type of url is parsed and served, and when cache is hit or missed. test should also print out the headers and parial content of the playlist and segment responses (as hex) to verify that they are correct and contain expected data.
|
||||
- test that the proxy correctly handles invalid URLs, unsupported platforms, and other error scenarios, returning appropriate HTTP error responses.
|
||||
- templates/index.html - simple HTML file with form for video URL input.
|
||||
- templates/player.html - HTML file with HLS player that will be used to play video obtained through proxy.
|
||||
- requirements.txt
|
||||
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
FROM python:3.14.3-alpine
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install ffmpeg for HLS handling
|
||||
RUN apk add --no-cache ffmpeg nodejs
|
||||
|
||||
# Copy application
|
||||
COPY . .
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
# Use production WSGI server
|
||||
CMD ["gunicorn", "-w", "1", "-b", "0.0.0.0:5000", "--timeout", "60", "--access-logfile", "-", "--error-logfile", "-", "--log-level", "info", "app:app"]
|
||||
@@ -9,17 +9,37 @@ A simple Flask proxy server that uses yt-dlp to fetch HLS streams and serves the
|
||||
- URL validation with allowed domains
|
||||
- HTML5 video player with hls.js
|
||||
- Configurable via environment variables
|
||||
- Full metadata display (title, description, views, likes, tags, etc.)
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Option 1: Direct Python
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
cp .env.example .env
|
||||
python app.py
|
||||
```
|
||||
|
||||
Visit http://localhost:5000 and enter a video URL.
|
||||
|
||||
### Option 2: Docker
|
||||
|
||||
```bash
|
||||
# Build and run
|
||||
docker-compose up -d
|
||||
|
||||
# Or pull from GitHub Container Registry (if available)
|
||||
docker pull ghcr.io/yourusername/yt-dlp-proxy:latest
|
||||
docker run -p 5000:5000 ghcr.io/yourusername/yt-dlp-proxy:latest
|
||||
```
|
||||
|
||||
### Option 3: Docker Build
|
||||
|
||||
```bash
|
||||
docker build -t yt-dlp-proxy .
|
||||
docker run -p 5000:5000 yt-dlp-proxy
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
| Variable | Default | Description |
|
||||
@@ -30,13 +50,97 @@ Visit http://localhost:5000 and enter a video URL.
|
||||
| SOCKET_TIMEOUT | 30 | Socket timeout for requests |
|
||||
| VALIDATION_ENABLED | true | Enable URL validation |
|
||||
| ALLOWED_DOMAINS | youtube.com,youtu.be,pornhub.com,xvideos.com | Allowed video domains |
|
||||
| ALLOW_LOCAL | true | Allow localhost/127.0.0.1 URLs (for testing) |
|
||||
|
||||
### Docker Environment Variables
|
||||
|
||||
```bash
|
||||
docker run -e PORT=5000 -e LOG_LEVEL=INFO -p 5000:5000 yt-dlp-proxy
|
||||
```
|
||||
|
||||
### Docker Compose Example
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
yt-dlp-proxy:
|
||||
image: yt-dlp-proxy
|
||||
ports:
|
||||
- "5000:5000"
|
||||
environment:
|
||||
- PORT=5000
|
||||
- LOG_LEVEL=INFO
|
||||
- CACHE_TTL=31536000
|
||||
- ALLOWED_DOMAINS=youtube.com,youtu.be,pornhub.com,xvideos.com
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
## Routes
|
||||
|
||||
- `/` - Home page with video URL input
|
||||
- `/player?url=VIDEO_URL` - Video player page
|
||||
- `/hls/<query>/index.m3u8` - HLS playlist proxy
|
||||
- `/hls/<query>/<segment>.ts` - HLS segment proxy
|
||||
- `/player?url=VIDEO_URL` - Video player page with full metadata
|
||||
- `/hls/<encoded_url>--index.m3u8` - HLS main playlist
|
||||
- `/hls/<encoded_url>--<segment>.ts` - HLS segment
|
||||
|
||||
## Connecting External Player
|
||||
|
||||
### URL Format
|
||||
|
||||
The proxy uses this format: `/hls/<encoded_video_url>--<filename>`
|
||||
|
||||
Example for PornHub:
|
||||
```
|
||||
/hls/https%3A%2F%2Frt.pornhub.com%2Fview_video.php%3Fviewkey%3D69c13273df690--index.m3u8
|
||||
```
|
||||
|
||||
### Using with yt-dlp (as browser substitute)
|
||||
|
||||
```bash
|
||||
# Get playlist URL via proxy
|
||||
yt-dlp --hls-use-mpegts --no-download --print url \
|
||||
"http://localhost:5000/hls/https%3A%2F%2Frt.pornhub.com%2Fview_video.php%3Fviewkey%3D69c13273df690--index.m3u8"
|
||||
```
|
||||
|
||||
### Using with VLC
|
||||
|
||||
1. Open Network Stream (Ctrl+N)
|
||||
2. Enter: `http://localhost:5000/hls/<encoded_url>--index.m3u8`
|
||||
|
||||
### Using with mpv
|
||||
|
||||
```bash
|
||||
mpv "http://localhost:5000/hls/https%3A%2F%2Frt.pornhub.com%2Fview_video.php%3Fviewkey%3D69c13273df690--index.m3u8"
|
||||
```
|
||||
|
||||
### Using with any HLS-compatible player
|
||||
|
||||
Most players accept the m3u8 URL directly:
|
||||
|
||||
```
|
||||
http://localhost:5000/hls/<encoded_video_url>--index.m3u8
|
||||
```
|
||||
|
||||
Where `<encoded_video_url>` is the URL-encoded original video URL.
|
||||
|
||||
Example - full URL:
|
||||
```
|
||||
http://localhost:5000/hls/https%3A%2F%2Frt.pornhub.com%2Fview_video.php%3Fviewkey%3D69c13273df690--index.m3u8
|
||||
```
|
||||
|
||||
### Python example (using requests + hlsjs)
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
video_url = "https://rt.pornhub.com/view_video.php?viewkey=69c13273df690"
|
||||
from urllib.parse import quote
|
||||
encoded = quote(video_url, safe="")
|
||||
hls_url = f"http://localhost:5000/hls/{encoded}--index.m3u8"
|
||||
|
||||
response = requests.get(hls_url)
|
||||
print(response.text) # Contains rewritten segment URLs
|
||||
```
|
||||
|
||||
## Running with Gunicorn
|
||||
|
||||
@@ -50,6 +154,12 @@ gunicorn -w 4 -b 0.0.0.0:5000 app:app
|
||||
pytest tests/test_proxy.py -v
|
||||
```
|
||||
|
||||
## Supported Sites
|
||||
|
||||
- PornHub (primary - HLS)
|
||||
- YouTube (direct URL fallback, no HLS)
|
||||
- Any site supported by yt-dlp
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
MIT
|
||||
@@ -4,7 +4,7 @@ from flask import Flask, render_template, request, Response, abort, jsonify
|
||||
from werkzeug.exceptions import HTTPException
|
||||
|
||||
import dlp
|
||||
from utils import is_valid_url, get_error_message
|
||||
from utils import is_valid_url, get_error_message, get_video_id, resolve_video_id
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@@ -34,50 +34,64 @@ def player():
|
||||
|
||||
try:
|
||||
stream_info = dlp.get_stream_info(video_url)
|
||||
from urllib.parse import quote
|
||||
encoded_url = quote(video_url, safe="")
|
||||
proxy_hls_url = f"/hls?url={encoded_url}&path=index.m3u8"
|
||||
video_id = get_video_id(video_url)
|
||||
hls_url = stream_info.get("hls_url")
|
||||
proxy_hls_url = f"/hls/{video_id}/index.m3u8" if hls_url else None
|
||||
|
||||
return render_template(
|
||||
"player.html",
|
||||
video_url=video_url,
|
||||
proxy_hls_url=proxy_hls_url,
|
||||
direct_url=stream_info.get("direct_url"),
|
||||
title=stream_info.get("title", "Video"),
|
||||
thumbnail=stream_info.get("thumbnail")
|
||||
thumbnail=stream_info.get("thumbnail"),
|
||||
# Pass all metadata to template
|
||||
description=stream_info.get("description"),
|
||||
uploader=stream_info.get("uploader"),
|
||||
uploader_url=stream_info.get("uploader_url"),
|
||||
duration=stream_info.get("duration"),
|
||||
duration_string=stream_info.get("duration_string"),
|
||||
upload_date=stream_info.get("upload_date"),
|
||||
view_count=stream_info.get("view_count"),
|
||||
like_count=stream_info.get("like_count"),
|
||||
dislike_count=stream_info.get("dislike_count"),
|
||||
comment_count=stream_info.get("comment_count"),
|
||||
age_limit=stream_info.get("age_limit"),
|
||||
categories=stream_info.get("categories"),
|
||||
tags=stream_info.get("tags"),
|
||||
language=stream_info.get("language"),
|
||||
license=stream_info.get("license"),
|
||||
channel=stream_info.get("channel"),
|
||||
channel_url=stream_info.get("channel_url"),
|
||||
channel_id=stream_info.get("channel_id"),
|
||||
extractor=stream_info.get("extractor"),
|
||||
extractor_key=stream_info.get("extractor_key"),
|
||||
display_id=stream_info.get("display_id"),
|
||||
url=stream_info.get("url"),
|
||||
fulltitle=stream_info.get("fulltitle"),
|
||||
resolution=stream_info.get("resolution"),
|
||||
format=stream_info.get("format"),
|
||||
format_note=stream_info.get("format_note"),
|
||||
filesize=stream_info.get("filesize"),
|
||||
filesize_approx=stream_info.get("filesize_approx"),
|
||||
hls_url=hls_url
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting stream info: {e}")
|
||||
abort(500, description=str(e))
|
||||
|
||||
|
||||
@app.route("/hls")
|
||||
def hls_proxy():
|
||||
@app.route("/hls/<video_id>/index.m3u8")
|
||||
def hls_index(video_id):
|
||||
try:
|
||||
url_param = request.args.get("url", "")
|
||||
if not url_param:
|
||||
abort(400, description="Missing url parameter")
|
||||
|
||||
from urllib.parse import urlparse, unquote
|
||||
video_url = resolve_video_id(video_id)
|
||||
if not video_url:
|
||||
abort(400, description="Unknown video id")
|
||||
|
||||
path = request.args.get("path", "")
|
||||
|
||||
if ".m3u8" in url_param and not path:
|
||||
video_url = url_param
|
||||
elif ".m3u8" in url_param and path:
|
||||
video_url = url_param
|
||||
else:
|
||||
video_url = url_param
|
||||
|
||||
video_url = unquote(video_url)
|
||||
|
||||
if not is_valid_url(video_url):
|
||||
abort(400, description="Invalid URL")
|
||||
|
||||
if path.endswith(".m3u8") or not path:
|
||||
playlist = dlp.get_hls_playlist(video_url)
|
||||
return Response(playlist, mimetype="application/vnd.apple.mpegurl")
|
||||
|
||||
segment_data = dlp.get_hls_segment(video_url, path)
|
||||
return Response(segment_data, mimetype="video/mp2t")
|
||||
playlist = dlp.get_hls_playlist(video_url)
|
||||
return Response(playlist, mimetype="application/vnd.apple.mpegurl", headers={"Cache-Control": "public, max-age=31536000"})
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
@@ -86,7 +100,33 @@ def hls_proxy():
|
||||
abort(400, description=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"HLS proxy error: {e}")
|
||||
abort(500, description="Error fetching stream")
|
||||
return Response(str(e), status=500, mimetype="text/plain")
|
||||
|
||||
|
||||
@app.route("/hls/<video_id>/seg/<seg_id>")
|
||||
def hls_segment(video_id, seg_id):
|
||||
try:
|
||||
video_url = resolve_video_id(video_id)
|
||||
if not video_url:
|
||||
abort(400, description="Unknown video id")
|
||||
|
||||
if not is_valid_url(video_url):
|
||||
abort(400, description="Invalid URL")
|
||||
|
||||
data = dlp.get_hls_segment_with_retry(video_url, str(seg_id))
|
||||
if data is None:
|
||||
abort(500, description="Failed to fetch segment")
|
||||
|
||||
return Response(data, mimetype="video/mp2t", headers={"Cache-Control": "public, max-age=31536000"})
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except ValueError as e:
|
||||
logger.warning(f"Validation error: {e}")
|
||||
abort(400, description=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"HLS segment error: {e}")
|
||||
return Response(str(e), status=500, mimetype="text/plain")
|
||||
|
||||
|
||||
@app.errorhandler(Exception)
|
||||
|
||||
@@ -1,20 +1,33 @@
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
from typing import Optional
|
||||
from urllib.parse import unquote
|
||||
from urllib.parse import urlparse
|
||||
import yt_dlp
|
||||
from yt_dlp.networking import Request
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CACHE_TTL = int(os.getenv("CACHE_TTL", 31536000))
|
||||
SOCKET_TIMEOUT = int(os.getenv("SOCKET_TIMEOUT", 30))
|
||||
|
||||
_session_cache = {}
|
||||
_cache_timestamps = {}
|
||||
|
||||
_ydl_instance = None
|
||||
|
||||
def _is_hls_url(url: str) -> bool:
|
||||
return url.endswith(".m3u8") or "m3u8" in url
|
||||
|
||||
def _get_ydl():
|
||||
"""Get or create a singleton yt-dlp instance."""
|
||||
global _ydl_instance
|
||||
if _ydl_instance is None:
|
||||
_ydl_instance = yt_dlp.YoutubeDL({
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"socket_timeout": SOCKET_TIMEOUT,
|
||||
})
|
||||
return _ydl_instance
|
||||
|
||||
|
||||
def _get_cache_key(video_url: str) -> str:
|
||||
@@ -28,110 +41,349 @@ def _is_cache_expired(video_url: str) -> bool:
|
||||
return time.time() - _cache_timestamps[key] > CACHE_TTL
|
||||
|
||||
|
||||
def _get_cached_session(video_url: str) -> Optional[dict]:
|
||||
def _get_cached_info(video_url: str) -> Optional[dict]:
|
||||
key = _get_cache_key(video_url)
|
||||
if key in _session_cache and not _is_cache_expired(video_url):
|
||||
return _session_cache[key]
|
||||
return None
|
||||
|
||||
|
||||
def _set_cached_session(video_url: str, session_data: dict) -> None:
|
||||
def _set_cached_info(video_url: str, info: dict) -> None:
|
||||
key = _get_cache_key(video_url)
|
||||
_session_cache[key] = session_data
|
||||
_session_cache[key] = info
|
||||
_cache_timestamps[key] = time.time()
|
||||
|
||||
|
||||
def clear_expired_cache() -> None:
|
||||
expired_keys = [
|
||||
key for key in _session_cache
|
||||
if _is_cache_expired(key)
|
||||
]
|
||||
for key in expired_keys:
|
||||
del _session_cache[key]
|
||||
del _cache_timestamps[key]
|
||||
# store segment mappings per video
|
||||
_segment_maps = {}
|
||||
|
||||
|
||||
def get_hls_playlist(video_url: str) -> str:
|
||||
cached = _get_cached_session(video_url)
|
||||
if cached and "hls_playlist" in cached:
|
||||
return cached["hls_playlist"]
|
||||
def _get_segment_id(full_url: str) -> str:
|
||||
"""Build a stable segment id that survives signed query refreshes."""
|
||||
import hashlib
|
||||
|
||||
if _is_hls_url(video_url):
|
||||
hls_url = video_url
|
||||
else:
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"socket_timeout": int(os.getenv("SOCKET_TIMEOUT", 30)),
|
||||
}
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(video_url, download=False)
|
||||
|
||||
if not info or "hls" not in info or not info["hls"]:
|
||||
raise ValueError("No HLS stream available for this video")
|
||||
|
||||
hls_url = info["hls"]
|
||||
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(hls_url, timeout=30) as response:
|
||||
playlist_content = response.read().decode("utf-8")
|
||||
|
||||
session_data = {
|
||||
"hls_playlist": playlist_content,
|
||||
"hls_url": hls_url,
|
||||
"video_url": video_url,
|
||||
}
|
||||
_set_cached_session(video_url, session_data)
|
||||
|
||||
return playlist_content
|
||||
parsed = urlparse(full_url)
|
||||
stable_key = parsed.path or full_url.split("?", 1)[0]
|
||||
return hashlib.md5(stable_key.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def get_hls_segment(video_url: str, segment_name: str) -> bytes:
|
||||
cached = _get_cached_session(video_url)
|
||||
if not cached or "hls_url" not in cached:
|
||||
get_hls_playlist(video_url)
|
||||
cached = _get_cached_session(video_url)
|
||||
def _refresh_hls_url(video_url: str, attempts: int = 3) -> Optional[str]:
|
||||
"""Re-extract until yt-dlp returns an HLS URL or we exhaust retries."""
|
||||
last_info = None
|
||||
for _ in range(attempts):
|
||||
_session_cache.pop(video_url, None)
|
||||
_cache_timestamps.pop(video_url, None)
|
||||
info = _get_video_info(video_url)
|
||||
last_info = info
|
||||
if info.get("hls_url"):
|
||||
return info["hls_url"]
|
||||
if last_info and last_info.get("direct_url"):
|
||||
logger.info("Extractor returned direct URL but no HLS URL")
|
||||
return None
|
||||
|
||||
hls_url = cached["hls_url"]
|
||||
base_url = hls_url.rsplit("/", 1)[0]
|
||||
|
||||
if segment_name.startswith("/"):
|
||||
segment_name = segment_name[1:]
|
||||
def _get_request_headers(video_url: str) -> dict:
|
||||
info = _get_video_info(video_url)
|
||||
raw_info = info.get("raw_info") or {}
|
||||
return dict(raw_info.get("http_headers") or {})
|
||||
|
||||
segment_url = f"{base_url}/{segment_name}"
|
||||
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(segment_url, timeout=30) as response:
|
||||
def _fetch_url(video_url: str, url: str) -> bytes:
|
||||
ydl = _get_ydl()
|
||||
request = Request(url, headers=_get_request_headers(video_url))
|
||||
with ydl.urlopen(request) as response:
|
||||
return response.read()
|
||||
|
||||
|
||||
def get_stream_info(video_url: str) -> dict:
|
||||
cached = _get_cached_session(video_url)
|
||||
def _populate_nested_maps(video_url: str, content: str, base_url: str, video_id: str, visited: Optional[set[str]] = None, depth: int = 0) -> None:
|
||||
"""Preload nested playlists so segment ids survive rebuilds after 410s."""
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
if visited is None:
|
||||
visited = set()
|
||||
if depth >= 3:
|
||||
return
|
||||
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
parsed = urlparse(line)
|
||||
full_url = line if parsed.scheme else urljoin(base_url, line)
|
||||
if not urlparse(full_url).path.endswith(".m3u8") or full_url in visited:
|
||||
continue
|
||||
|
||||
visited.add(full_url)
|
||||
try:
|
||||
nested_content = _fetch_url(video_url, full_url).decode("utf-8")
|
||||
_rewrite_urls(nested_content, video_url, full_url, video_id)
|
||||
_populate_nested_maps(video_url, nested_content, full_url, video_id, visited, depth + 1)
|
||||
except Exception as e:
|
||||
logger.info("Failed to preload nested playlist: %s", e)
|
||||
|
||||
|
||||
def _extract_hls_url(info: dict) -> Optional[str]:
|
||||
"""Extract HLS URL from yt-dlp info dict."""
|
||||
# First check top-level fields (these are set when there's only one format)
|
||||
url = info.get("manifest_url") or info.get("url")
|
||||
if url and ".m3u8" in url:
|
||||
return url
|
||||
|
||||
# Check requested_formats (post-processed by yt-dlp)
|
||||
if info.get("requested_formats"):
|
||||
for f in info["requested_formats"]:
|
||||
url = f.get("url") or f.get("manifest_url")
|
||||
if url and ".m3u8" in url:
|
||||
return url
|
||||
|
||||
# Check formats for m3u8_native protocol
|
||||
if info.get("formats"):
|
||||
for f in reversed(info["formats"]):
|
||||
if f.get("protocol") == "m3u8_native":
|
||||
url = f.get("manifest_url") or f.get("url")
|
||||
if url and ".m3u8" in url:
|
||||
return url
|
||||
|
||||
# Try to find any m3u8 URL in formats
|
||||
if info.get("formats"):
|
||||
for f in info["formats"]:
|
||||
url = f.get("url", "")
|
||||
if ".m3u8" in url:
|
||||
return url
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _extract_direct_url(info: dict) -> Optional[str]:
|
||||
"""Extract direct video URL when HLS is not available."""
|
||||
# Check url field first
|
||||
url = info.get("url")
|
||||
if url:
|
||||
return url
|
||||
|
||||
# Check requested_formats
|
||||
if info.get("requested_formats"):
|
||||
for f in info["requested_formats"]:
|
||||
url = f.get("url")
|
||||
if url:
|
||||
return url
|
||||
|
||||
# Check formats for best quality https format
|
||||
if info.get("formats"):
|
||||
for f in reversed(info["formats"]):
|
||||
if f.get("protocol") in ("https", "http"):
|
||||
url = f.get("url")
|
||||
if url:
|
||||
return url
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _get_video_info(video_url: str) -> dict:
|
||||
"""Get video info using yt-dlp."""
|
||||
cached = _get_cached_info(video_url)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
if _is_hls_url(video_url):
|
||||
return {
|
||||
"title": "Test Video",
|
||||
"hls_url": video_url,
|
||||
"thumbnail": None,
|
||||
}
|
||||
import shutil
|
||||
if not shutil.which("node"):
|
||||
deno_path = os.path.expanduser("~/.deno/bin/deno")
|
||||
if not os.path.exists(deno_path):
|
||||
logger.warning("No JavaScript runtime (node/deno) found - YouTube may not work properly")
|
||||
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"socket_timeout": int(os.getenv("SOCKET_TIMEOUT", 30)),
|
||||
ydl = _get_ydl()
|
||||
info = ydl.extract_info(video_url, download=False)
|
||||
|
||||
hls_url = _extract_hls_url(info)
|
||||
direct_url = _extract_direct_url(info)
|
||||
result = {
|
||||
"title": info.get("title"),
|
||||
"thumbnail": info.get("thumbnail"),
|
||||
"hls_url": hls_url,
|
||||
"direct_url": direct_url,
|
||||
"raw_info": info,
|
||||
}
|
||||
_set_cached_info(video_url, result)
|
||||
return result
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(video_url, download=False)
|
||||
|
||||
if not info:
|
||||
raise ValueError("Could not extract video info")
|
||||
def get_stream_info(video_url: str) -> dict:
|
||||
"""Get video info with all available metadata."""
|
||||
info = _get_video_info(video_url)
|
||||
|
||||
# Extract useful metadata from raw_info
|
||||
raw = info.get("raw_info", {})
|
||||
metadata = {
|
||||
"title": info["title"],
|
||||
"thumbnail": info["thumbnail"],
|
||||
"hls_url": info.get("hls_url"),
|
||||
"direct_url": info.get("direct_url"),
|
||||
# Additional metadata
|
||||
"description": raw.get("description"),
|
||||
"uploader": raw.get("uploader"),
|
||||
"uploader_url": raw.get("uploader_url"),
|
||||
"duration": raw.get("duration"),
|
||||
"upload_date": raw.get("upload_date"),
|
||||
"view_count": raw.get("view_count"),
|
||||
"like_count": raw.get("like_count"),
|
||||
"dislike_count": raw.get("dislike_count"),
|
||||
"comment_count": raw.get("comment_count"),
|
||||
"age_limit": raw.get("age_limit"),
|
||||
"categories": raw.get("categories"),
|
||||
"tags": raw.get("tags"),
|
||||
"language": raw.get("language"),
|
||||
"license": raw.get("license"),
|
||||
"channel": raw.get("channel"),
|
||||
"channel_url": raw.get("channel_url"),
|
||||
"channel_id": raw.get("channel_id"),
|
||||
"extractor": raw.get("extractor"),
|
||||
"extractor_key": raw.get("extractor_key"),
|
||||
"display_id": raw.get("display_id"),
|
||||
"url": raw.get("url"),
|
||||
"fulltitle": raw.get("fulltitle"),
|
||||
"duration_string": raw.get("duration_string"),
|
||||
"resolution": raw.get("resolution"),
|
||||
"format": raw.get("format"),
|
||||
"format_note": raw.get("format_note"),
|
||||
"filesize": raw.get("filesize"),
|
||||
"filesize_approx": raw.get("filesize_approx"),
|
||||
}
|
||||
return metadata
|
||||
|
||||
return {
|
||||
"title": info.get("title", "Unknown"),
|
||||
"hls_url": info.get("hls"),
|
||||
"thumbnail": info.get("thumbnail"),
|
||||
}
|
||||
|
||||
def get_hls_playlist(video_url: str) -> str:
|
||||
"""Get HLS playlist content with rewritten URLs."""
|
||||
info = _get_video_info(video_url)
|
||||
hls_url = info.get("hls_url")
|
||||
if not hls_url:
|
||||
hls_url = _refresh_hls_url(video_url)
|
||||
if not hls_url:
|
||||
raise ValueError("No HLS stream available for this video")
|
||||
|
||||
from utils import get_video_id
|
||||
video_id = get_video_id(video_url)
|
||||
|
||||
# Try to get playlist, retry once if URL expired
|
||||
for attempt in range(2):
|
||||
try:
|
||||
playlist_content = _fetch_url(video_url, hls_url).decode("utf-8")
|
||||
rewritten = _rewrite_urls(playlist_content, video_url, hls_url, video_id)
|
||||
_populate_nested_maps(video_url, playlist_content, hls_url, video_id)
|
||||
return rewritten
|
||||
except Exception as e:
|
||||
if "410" in str(e) and attempt == 0:
|
||||
logger.info("HLS URL expired, fetching fresh HLS URL")
|
||||
hls_url = _refresh_hls_url(video_url)
|
||||
if not hls_url:
|
||||
raise ValueError("No HLS stream available for this video")
|
||||
continue
|
||||
raise
|
||||
|
||||
|
||||
def get_direct_video_url(video_url: str) -> str:
|
||||
"""Get direct video URL when HLS is not available."""
|
||||
info = _get_video_info(video_url)
|
||||
if not info.get("direct_url"):
|
||||
raise ValueError("No video URL available for this video")
|
||||
return info["direct_url"]
|
||||
|
||||
|
||||
def _rewrite_urls(content: str, video_url: str, base_url: str, video_id: str) -> str:
|
||||
"""Rewrite relative URLs in HLS playlist to point through proxy."""
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
lines = content.split("\n")
|
||||
new_lines = []
|
||||
|
||||
# persist mapping across nested playlists
|
||||
if video_url not in _segment_maps:
|
||||
_segment_maps[video_url] = {}
|
||||
segment_map = _segment_maps[video_url]
|
||||
|
||||
for line in lines:
|
||||
if line and not line.startswith("#"):
|
||||
parsed = urlparse(line)
|
||||
|
||||
if parsed.scheme:
|
||||
full_url = line
|
||||
else:
|
||||
full_url = urljoin(base_url, line)
|
||||
|
||||
# stable id must ignore expiring signatures in query strings
|
||||
seg_id = _get_segment_id(full_url)
|
||||
segment_map[seg_id] = full_url
|
||||
|
||||
proxy_url = f"/hls/{video_id}/seg/{seg_id}"
|
||||
new_lines.append(proxy_url)
|
||||
continue
|
||||
|
||||
new_lines.append(line)
|
||||
|
||||
# mapping already updated in-place
|
||||
|
||||
return "\n".join(new_lines)
|
||||
|
||||
|
||||
def get_hls_segment(video_url: str, segment_url: str) -> bytes:
|
||||
"""Get HLS segment or sub-playlist content."""
|
||||
# Pure mapping-based resolution (no yt-dlp dependency here)
|
||||
# New format: segment_url is index
|
||||
seg_id = segment_url
|
||||
segment_map = _segment_maps.get(video_url)
|
||||
if not segment_map:
|
||||
# build mapping on-demand to avoid state coupling
|
||||
_ = get_hls_playlist(video_url)
|
||||
segment_map = _segment_maps.get(video_url)
|
||||
if not segment_map:
|
||||
raise ValueError("No segment map available")
|
||||
|
||||
if seg_id not in segment_map:
|
||||
# try rebuild once to refresh mappings (e.g., after expiry)
|
||||
_ = get_hls_playlist(video_url)
|
||||
segment_map = _segment_maps.get(video_url) or {}
|
||||
if seg_id not in segment_map:
|
||||
raise ValueError("Segment not found")
|
||||
|
||||
full_url = segment_map[seg_id]
|
||||
|
||||
try:
|
||||
data = _fetch_url(video_url, full_url)
|
||||
except Exception as e:
|
||||
raise ValueError("HLS URL expired (410 Gone)") from e
|
||||
|
||||
# Detect playlist dynamically (covers sub-playlists too)
|
||||
try:
|
||||
from utils import get_video_id
|
||||
video_id = get_video_id(video_url)
|
||||
text = data.decode("utf-8", errors="ignore")
|
||||
head = text.lstrip()[:200]
|
||||
if "#EXTM3U" in head:
|
||||
rewritten = _rewrite_urls(text, video_url, full_url, video_id)
|
||||
return rewritten.encode("utf-8")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def get_hls_segment_with_retry(video_url: str, segment_url: str) -> bytes:
|
||||
"""Get HLS segment with one rebuild after signed URL expiry."""
|
||||
for attempt in range(2):
|
||||
try:
|
||||
if video_url not in _segment_maps:
|
||||
_ = get_hls_playlist(video_url)
|
||||
return get_hls_segment(video_url, segment_url)
|
||||
except ValueError as e:
|
||||
if "410 Gone" in str(e):
|
||||
if attempt == 0:
|
||||
logger.info("Segment 410, retrying")
|
||||
continue
|
||||
|
||||
logger.info("Segment still 410, rebuilding playlist and map")
|
||||
_session_cache.pop(video_url, None)
|
||||
_cache_timestamps.pop(video_url, None)
|
||||
_segment_maps.pop(video_url, None)
|
||||
_ = get_hls_playlist(video_url)
|
||||
return get_hls_segment(video_url, segment_url)
|
||||
raise
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
services:
|
||||
yt-dlp-proxy:
|
||||
build: .
|
||||
ports:
|
||||
- "5000:5000"
|
||||
environment:
|
||||
- PORT=5000
|
||||
- LOG_LEVEL=INFO
|
||||
- CACHE_TTL=31536000
|
||||
- SOCKET_TIMEOUT=30
|
||||
- ALLOWED_DOMAINS=youtube.com,youtu.be,pornhub.com,xvideos.com
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:5000/"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# Optional: nginx reverse proxy configuration
|
||||
# Uncomment to enable
|
||||
#
|
||||
# nginx:
|
||||
# image: nginx:latest
|
||||
# ports:
|
||||
# - "80:80"
|
||||
# - "443:443"
|
||||
# volumes:
|
||||
# - ./nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
# depends_on:
|
||||
# - yt-dlp-proxy
|
||||
@@ -1,3 +1,4 @@
|
||||
flask>=2.0.0
|
||||
yt-dlp
|
||||
gunicorn
|
||||
requests
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
<form action="/player" method="get">
|
||||
<div class="form-group">
|
||||
<label for="url">Video URL:</label>
|
||||
<input type="text" id="url" name="url" placeholder="https://www.youtube.com/watch?v=..." required>
|
||||
<input type="text" id="url" name="url" placeholder="https://rt.pornhub.com/view_video.php?viewkey=..." required>
|
||||
</div>
|
||||
<button type="submit">Watch</button>
|
||||
</form>
|
||||
|
||||
+205
-13
@@ -7,17 +7,18 @@
|
||||
<link rel="stylesheet" href="https://unpkg.com/mvp.css">
|
||||
<style>
|
||||
body {
|
||||
max-width: 900px;
|
||||
max-width: 1100px;
|
||||
margin: 0 auto;
|
||||
padding: 1rem;
|
||||
}
|
||||
h1 {
|
||||
margin-bottom: 1rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
.video-container {
|
||||
width: 100%;
|
||||
background: #000;
|
||||
aspect-ratio: 16 / 9;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
video {
|
||||
width: 100%;
|
||||
@@ -27,28 +28,219 @@
|
||||
display: inline-block;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
.metadata {
|
||||
background: #f5f5f5;
|
||||
padding: 1rem;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
.metadata h2 {
|
||||
margin-top: 0;
|
||||
font-size: 1.2rem;
|
||||
}
|
||||
.metadata-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||
gap: 0.75rem;
|
||||
}
|
||||
.metadata-item {
|
||||
word-break: break-word;
|
||||
}
|
||||
.metadata-label {
|
||||
font-weight: bold;
|
||||
color: #666;
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
.metadata-value {
|
||||
color: #333;
|
||||
}
|
||||
.thumbnail {
|
||||
max-width: 100%;
|
||||
max-height: 200px;
|
||||
margin: 1rem 0;
|
||||
border-radius: 4px;
|
||||
}
|
||||
.description {
|
||||
background: #f9f9f9;
|
||||
padding: 1rem;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 1rem;
|
||||
white-space: pre-wrap;
|
||||
max-height: 200px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<a href="/" class="back-link">← Back</a>
|
||||
|
||||
{% if thumbnail %}
|
||||
<img src="{{ thumbnail }}" alt="{{ title }}" class="thumbnail">
|
||||
{% endif %}
|
||||
|
||||
<h1>{{ title }}</h1>
|
||||
|
||||
<div class="video-container">
|
||||
<video controls>
|
||||
Your browser does not support HLS.
|
||||
<video controls id="video">
|
||||
Your browser does not support video playback.
|
||||
</video>
|
||||
</div>
|
||||
|
||||
{% if description %}
|
||||
<div class="description">
|
||||
<h3>Description</h3>
|
||||
{{ description }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="metadata">
|
||||
<h2>Video Information</h2>
|
||||
<div class="metadata-grid">
|
||||
{% if uploader %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Uploader</div>
|
||||
<div class="metadata-value">{{ uploader }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if channel %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Channel</div>
|
||||
<div class="metadata-value">{{ channel }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if duration_string %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Duration</div>
|
||||
<div class="metadata-value">{{ duration_string }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if upload_date %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Upload Date</div>
|
||||
<div class="metadata-value">{{ upload_date }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if view_count %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Views</div>
|
||||
<div class="metadata-value">{{ "{:,}".format(view_count) }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if like_count %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Likes</div>
|
||||
<div class="metadata-value">{{ "{:,}".format(like_count) }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if comment_count %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Comments</div>
|
||||
<div class="metadata-value">{{ "{:,}".format(comment_count) }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if categories %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Categories</div>
|
||||
<div class="metadata-value">{% for cat in categories %}{{ cat }}{% if not loop.last %}, {% endif %}{% endfor %}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if language %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Language</div>
|
||||
<div class="metadata-value">{{ language }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if extractor %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Source</div>
|
||||
<div class="metadata-value">{{ extractor }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if resolution %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Resolution</div>
|
||||
<div class="metadata-value">{{ resolution }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if format %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Format</div>
|
||||
<div class="metadata-value">{{ format }}{% if format_note %} ({{ format_note }}){% endif %}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if filesize_approx %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Size (approx)</div>
|
||||
<div class="metadata-value">{{ filesize_approx }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if tags %}
|
||||
<div class="metadata">
|
||||
<h2>Tags</h2>
|
||||
<div class="metadata-value">
|
||||
{% for tag in tags %}
|
||||
<span style="display: inline-block; background: #e0e0e0; padding: 2px 8px; border-radius: 4px; margin: 2px;">{{ tag }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if hls_url %}
|
||||
<div class="metadata">
|
||||
<h2>Stream URLs</h2>
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">HLS URL</div>
|
||||
<div class="metadata-value" style="word-break: break-all; font-size: 0.85rem;">{{ hls_url[:200] }}{% if hls_url|length > 200 %}...{% endif %}</div>
|
||||
</div>
|
||||
{% if direct_url %}
|
||||
<div class="metadata-item">
|
||||
<div class="metadata-label">Direct URL</div>
|
||||
<div class="metadata-value" style="word-break: break-all; font-size: 0.85rem;">{{ direct_url[:200] }}{% if direct_url|length > 200 %}...{% endif %}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/hls.js@latest"></script>
|
||||
<script>
|
||||
const video = document.querySelector('video');
|
||||
const hlsUrl = '{{ proxy_hls_url }}';
|
||||
const video = document.getElementById('video');
|
||||
const hlsUrl = {{ proxy_hls_url | tojson }};
|
||||
const directUrl = {{ direct_url | tojson }};
|
||||
|
||||
if (Hls.isSupported()) {
|
||||
const hls = new Hls();
|
||||
hls.loadSource(hlsUrl);
|
||||
hls.attachMedia(video);
|
||||
} else if (video.canPlayType('application/vnd.apple.mpegurl')) {
|
||||
video.src = hlsUrl;
|
||||
if (hlsUrl && hlsUrl !== 'null') {
|
||||
if (Hls.isSupported()) {
|
||||
const hls = new Hls();
|
||||
hls.loadSource(hlsUrl);
|
||||
hls.attachMedia(video);
|
||||
} else if (video.canPlayType('application/vnd.apple.mpegurl')) {
|
||||
video.src = hlsUrl;
|
||||
} else {
|
||||
loadDirectUrl();
|
||||
}
|
||||
} else if (directUrl && directUrl !== 'null') {
|
||||
loadDirectUrl();
|
||||
}
|
||||
|
||||
function loadDirectUrl() {
|
||||
if (directUrl && directUrl !== 'null') {
|
||||
video.src = directUrl;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
</html>
|
||||
+73
-116
@@ -1,139 +1,96 @@
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import threading
|
||||
import requests
|
||||
import pytest
|
||||
import sys
|
||||
import urllib.parse
|
||||
import http.server
|
||||
import socketserver
|
||||
import time
|
||||
import urllib.request
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
SERVER_PORT = 5005
|
||||
|
||||
|
||||
TEST_VIDEO_DIR = "/tmp/yt-dlp-test-video"
|
||||
TEST_VIDEO_M3U8 = f"{TEST_VIDEO_DIR}/index.m3u8"
|
||||
SERVER_PORT = 5002
|
||||
TEST_HTTP_PORT = 8898
|
||||
|
||||
|
||||
def generate_test_video():
|
||||
os.makedirs(TEST_VIDEO_DIR, exist_ok=True)
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-f", "lavfi", "-i", "testsrc=duration=5:size=320x240:rate=24",
|
||||
"-f", "lavfi", "-i", "sine=frequency=440:duration=5",
|
||||
"-c:v", "libx264", "-c:a", "aac", "-strict", "experimental",
|
||||
"-hls_time", "1", "-hls_list_size", "0",
|
||||
"-hls_segment_filename", f"{TEST_VIDEO_DIR}/segment%03d.ts",
|
||||
TEST_VIDEO_M3U8
|
||||
]
|
||||
subprocess.run(cmd, capture_output=True, timeout=60)
|
||||
|
||||
assert os.path.exists(TEST_VIDEO_M3U8), "HLS manifest not generated"
|
||||
segments = [f for f in os.listdir(TEST_VIDEO_DIR) if f.endswith(".ts")]
|
||||
assert len(segments) > 0, "No segments generated"
|
||||
|
||||
|
||||
class QuietHTTPHandler(http.server.SimpleHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
|
||||
class ReusableTCPServer(socketserver.TCPServer):
|
||||
allow_reuse_address = True
|
||||
|
||||
|
||||
def serve_test_video():
|
||||
os.chdir(TEST_VIDEO_DIR)
|
||||
with ReusableTCPServer(("127.0.0.1", TEST_HTTP_PORT), QuietHTTPHandler) as httpd:
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
def start_flask_app():
|
||||
import app as flask_app
|
||||
flask_app.app.run(host="127.0.0.1", port=SERVER_PORT, debug=False, use_reloader=False)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def test_servers():
|
||||
print("\nGenerating test video...")
|
||||
generate_test_video()
|
||||
|
||||
print(f"Starting HTTP server for test video on port {TEST_HTTP_PORT}...")
|
||||
http_thread = threading.Thread(target=serve_test_video, daemon=True)
|
||||
http_thread.start()
|
||||
time.sleep(1)
|
||||
|
||||
for _ in range(10):
|
||||
def wait_server():
|
||||
for _ in range(20):
|
||||
try:
|
||||
requests.get(f"http://127.0.0.1:{TEST_HTTP_PORT}/", timeout=1)
|
||||
break
|
||||
except:
|
||||
urllib.request.urlopen(f"http://127.0.0.1:{SERVER_PORT}/", timeout=1)
|
||||
return
|
||||
except Exception:
|
||||
time.sleep(0.5)
|
||||
print("HTTP server ready")
|
||||
|
||||
print(f"Starting Flask proxy server on port {SERVER_PORT}...")
|
||||
flask_thread = threading.Thread(target=start_flask_app, daemon=True)
|
||||
flask_thread.start()
|
||||
time.sleep(2)
|
||||
print("Flask server ready")
|
||||
|
||||
yield
|
||||
|
||||
print("\nCleaning up...")
|
||||
raise RuntimeError("Server not ready")
|
||||
|
||||
|
||||
def test_direct_hls_access(test_servers):
|
||||
"""Test that we can access the test HLS video directly"""
|
||||
response = requests.get(f"http://127.0.0.1:{TEST_HTTP_PORT}/index.m3u8", timeout=5)
|
||||
assert response.status_code == 200
|
||||
assert "#EXTM3U" in response.text
|
||||
print("Direct HLS access: OK")
|
||||
def test_full_proxy_flow():
|
||||
"""
|
||||
AGENTS.md compliant integration test:
|
||||
- real video URL
|
||||
- goes through proxy
|
||||
- yt-dlp consumes stream (like browser)
|
||||
"""
|
||||
|
||||
import threading
|
||||
# ensure project root is on PYTHONPATH
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if ROOT not in sys.path:
|
||||
sys.path.insert(0, ROOT)
|
||||
import app
|
||||
|
||||
def test_hls_playlist_proxy(test_servers):
|
||||
"""Test proxying HLS playlist"""
|
||||
video_url = f"http://127.0.0.1:{TEST_HTTP_PORT}/index.m3u8"
|
||||
proxy_url = f"http://127.0.0.1:{SERVER_PORT}/hls?url={urllib.parse.quote(video_url, safe='')}"
|
||||
# start server
|
||||
t = threading.Thread(
|
||||
target=lambda: app.app.run(host="127.0.0.1", port=SERVER_PORT, debug=False, use_reloader=False),
|
||||
daemon=True,
|
||||
)
|
||||
t.start()
|
||||
|
||||
response = requests.get(proxy_url, timeout=10)
|
||||
assert response.status_code == 200
|
||||
assert "#EXTM3U" in response.text
|
||||
assert ".ts" in response.text
|
||||
print("HLS playlist proxy: OK")
|
||||
wait_server()
|
||||
|
||||
video_urls = [
|
||||
"https://rt.pornhub.com/view_video.php?viewkey=ph5e7df37a9faf5",
|
||||
"https://rt.pornhub.com/view_video.php?viewkey=69c13273df690",
|
||||
]
|
||||
|
||||
def test_hls_segment_proxy(test_servers):
|
||||
"""Test proxying HLS segment"""
|
||||
video_url = f"http://127.0.0.1:{TEST_HTTP_PORT}/index.m3u8"
|
||||
proxy_url = f"http://127.0.0.1:{SERVER_PORT}/hls?url={urllib.parse.quote(video_url, safe='')}&path=segment000.ts"
|
||||
from utils import get_video_id
|
||||
|
||||
response = requests.get(proxy_url, timeout=10)
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) > 0
|
||||
print("HLS segment proxy: OK")
|
||||
def fetch(url):
|
||||
with urllib.request.urlopen(url, timeout=10) as r:
|
||||
status = r.status
|
||||
data = r.read().decode("utf-8", errors="ignore")
|
||||
print(f"[HTTP] {url} -> {status}")
|
||||
assert status == 200, f"Request failed: {url}"
|
||||
return data
|
||||
|
||||
def parse_playlist(text):
|
||||
return [l.strip() for l in text.split("\n") if l.strip() and not l.startswith("#")]
|
||||
|
||||
def test_player_page(test_servers):
|
||||
"""Test player page renders"""
|
||||
video_url = f"http://127.0.0.1:{TEST_HTTP_PORT}/index.m3u8"
|
||||
player_url = f"http://127.0.0.1:{SERVER_PORT}/player?url={urllib.parse.quote(video_url, safe='')}"
|
||||
def is_media_playlist(text):
|
||||
return "#EXTINF" in text
|
||||
|
||||
response = requests.get(player_url, timeout=10)
|
||||
assert response.status_code == 200
|
||||
assert "video" in response.text.lower()
|
||||
print("Player page: OK")
|
||||
def descend_to_media(url):
|
||||
text = fetch(url)
|
||||
depth = 0
|
||||
while not is_media_playlist(text):
|
||||
depth += 1
|
||||
assert depth <= 5, "Playlist nesting too deep"
|
||||
entries = parse_playlist(text)
|
||||
assert entries, "Empty playlist while descending"
|
||||
next_url = entries[0] if entries[0].startswith("http") else base + entries[0]
|
||||
text = fetch(next_url)
|
||||
return text
|
||||
|
||||
for video_url in video_urls:
|
||||
video_id = get_video_id(video_url)
|
||||
base = f"http://127.0.0.1:{SERVER_PORT}"
|
||||
index_url = f"{base}/hls/{video_id}/index.m3u8"
|
||||
|
||||
def test_index_page(test_servers):
|
||||
"""Test index page renders"""
|
||||
response = requests.get(f"http://127.0.0.1:{SERVER_PORT}/", timeout=10)
|
||||
assert response.status_code == 200
|
||||
assert "video" in response.text.lower()
|
||||
print("Index page: OK")
|
||||
print(f"\n[TEST] Simulated player: {video_url}")
|
||||
|
||||
media = descend_to_media(index_url)
|
||||
segs = parse_playlist(media)
|
||||
assert segs, "Empty media playlist"
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s"])
|
||||
for i, seg in enumerate(segs[:3], start=1):
|
||||
seg_url = base + seg
|
||||
with urllib.request.urlopen(seg_url, timeout=10) as r:
|
||||
status = r.status
|
||||
data = r.read()
|
||||
print(f"[SEG {i}] {seg_url} -> {status}, {len(data)} bytes")
|
||||
assert status == 200, f"Segment failed: {seg_url}"
|
||||
assert len(data) > 0, "Empty segment"
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
import pytest
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from utils import is_valid_url, extract_video_id, sanitize_path, get_error_message
|
||||
import dlp
|
||||
|
||||
|
||||
class TestURLValidation:
|
||||
def test_valid_youtube_url(self):
|
||||
assert is_valid_url("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
|
||||
assert is_valid_url("https://youtu.be/dQw4w9WgXcQ")
|
||||
|
||||
def test_valid_youtu_be(self):
|
||||
assert is_valid_url("https://youtu.be/abc123")
|
||||
|
||||
def test_valid_pornhub_url(self):
|
||||
assert is_valid_url("https://www.pornhub.com/view_video.php?viewkey=abc123")
|
||||
|
||||
def test_invalid_url(self):
|
||||
assert not is_valid_url("")
|
||||
assert not is_valid_url("not-a-url")
|
||||
|
||||
def test_disallowed_domain(self):
|
||||
os.environ["VALIDATION_ENABLED"] = "true"
|
||||
assert not is_valid_url("https://evil.com/video")
|
||||
|
||||
|
||||
class TestVideoIDExtraction:
|
||||
def test_extract_youtube_id(self):
|
||||
assert extract_video_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ") == "dQw4w9WgXcQ"
|
||||
assert extract_video_id("https://youtu.be/dQw4w9WgXcQ") == "dQw4w9WgXcQ"
|
||||
|
||||
def test_extract_pornhub_id(self):
|
||||
result = extract_video_id("https://www.pornhub.com/view_video.php?viewkey=ph123456")
|
||||
assert result == "ph123456"
|
||||
|
||||
def test_extract_invalid(self):
|
||||
assert extract_video_id("https://example.com/video") == ""
|
||||
|
||||
|
||||
class TestPathSanitization:
|
||||
def test_sanitize_normal_path(self):
|
||||
assert sanitize_path("path/to/file") == "path/to/file"
|
||||
|
||||
def test_sanitize_prevents_traversal(self):
|
||||
assert sanitize_path("../etc/passwd") == "etc/passwd"
|
||||
assert sanitize_path("path/../etc/passwd") == "path/etc/passwd"
|
||||
|
||||
|
||||
class TestCacheMechanics:
|
||||
def test_cache_basic(self):
|
||||
dlp._session_cache.clear()
|
||||
dlp._cache_timestamps.clear()
|
||||
|
||||
test_data = {"test": "data"}
|
||||
dlp._set_cached_session("http://test.com/video", test_data)
|
||||
|
||||
cached = dlp._get_cached_session("http://test.com/video")
|
||||
assert cached == test_data
|
||||
|
||||
def test_cache_expiry(self):
|
||||
dlp.CACHE_TTL = 1
|
||||
dlp._session_cache.clear()
|
||||
dlp._cache_timestamps.clear()
|
||||
|
||||
dlp._set_cached_session("http://test.com/video", {"data": "test"})
|
||||
import time
|
||||
time.sleep(1.1)
|
||||
|
||||
assert dlp._is_cache_expired("http://test.com/video") is True
|
||||
|
||||
dlp.CACHE_TTL = 31536000
|
||||
|
||||
|
||||
class TestErrorMessages:
|
||||
def test_get_error_message(self):
|
||||
assert "Bad Request" in get_error_message(400)
|
||||
assert "Forbidden" in get_error_message(403)
|
||||
assert "Not Found" in get_error_message(404)
|
||||
assert "Internal Server Error" in get_error_message(500)
|
||||
|
||||
|
||||
class TestFlaskApp:
|
||||
def test_index_route(self):
|
||||
from app import app
|
||||
with app.test_client() as client:
|
||||
response = client.get("/")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_player_route_missing_url(self):
|
||||
from app import app
|
||||
with app.test_client() as client:
|
||||
response = client.get("/player")
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_player_route_invalid_url(self):
|
||||
from app import app
|
||||
with app.test_client() as client:
|
||||
response = client.get("/player?url=https://evil.com/video")
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_hls_proxy_invalid_path(self):
|
||||
from app import app
|
||||
with app.test_client() as client:
|
||||
response = client.get("/hls")
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
@@ -69,3 +69,17 @@ def get_error_message(status_code: int) -> str:
|
||||
503: "Service Unavailable",
|
||||
}
|
||||
return errors.get(status_code, "Unknown error")
|
||||
import hashlib
|
||||
|
||||
# simple in-memory mapping: video_id -> original URL
|
||||
_video_map = {}
|
||||
|
||||
|
||||
def get_video_id(url: str) -> str:
|
||||
vid = hashlib.md5(url.encode()).hexdigest()
|
||||
_video_map[vid] = url
|
||||
return vid
|
||||
|
||||
|
||||
def resolve_video_id(vid: str) -> str | None:
|
||||
return _video_map.get(vid)
|
||||
|
||||
Reference in New Issue
Block a user