Pytorch implementation of video upscaling using Real-ESRGAN.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -10,6 +10,7 @@ __pycache__/
|
|||||||
|
|
||||||
# Virtual environments
|
# Virtual environments
|
||||||
.venv/
|
.venv/
|
||||||
|
.venv*/
|
||||||
venv/
|
venv/
|
||||||
env/
|
env/
|
||||||
ENV/
|
ENV/
|
||||||
@@ -37,7 +38,6 @@ dist/
|
|||||||
frames_in/
|
frames_in/
|
||||||
frames_out/
|
frames_out/
|
||||||
/tmp/
|
/tmp/
|
||||||
|
|
||||||
# Video and media artifacts
|
# Video and media artifacts
|
||||||
*.mp4
|
*.mp4
|
||||||
*.mkv
|
*.mkv
|
||||||
|
|||||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
3.11.14
|
||||||
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"python-envs.defaultEnvManager": "ms-python.python:pyenv"
|
"python-envs.defaultEnvManager": "ms-python.python:venv"
|
||||||
}
|
}
|
||||||
56
README.md
56
README.md
@@ -2,7 +2,8 @@
|
|||||||
|
|
||||||
This project uses:
|
This project uses:
|
||||||
- `ffmpeg` / `ffprobe` for demux/remux and frame handling
|
- `ffmpeg` / `ffprobe` for demux/remux and frame handling
|
||||||
- `realesrgan-ncnn-vulkan` for GPU upscaling (Vulkan backend, works well on NVIDIA)
|
- `Real-ESRGAN (PyTorch + CUDA)` as default upscaler backend on NVIDIA GPUs
|
||||||
|
- optional legacy `realesrgan-ncnn-vulkan` backend
|
||||||
- `upscale_video.py` as Python controller/orchestrator
|
- `upscale_video.py` as Python controller/orchestrator
|
||||||
|
|
||||||
## 1) System setup (Ubuntu)
|
## 1) System setup (Ubuntu)
|
||||||
@@ -25,7 +26,19 @@ sudo ubuntu-drivers autoinstall
|
|||||||
sudo reboot
|
sudo reboot
|
||||||
```
|
```
|
||||||
|
|
||||||
## 2) Install Real-ESRGAN binary and models
|
## 2) Install Real-ESRGAN backend
|
||||||
|
|
||||||
|
### Default (recommended): PyTorch + CUDA
|
||||||
|
|
||||||
|
Inside project venv:
|
||||||
|
```bash
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||||
|
```
|
||||||
|
|
||||||
|
The script automatically downloads model weights on first run into `~/.cache/realesrgan`.
|
||||||
|
|
||||||
|
### Optional legacy: ncnn-vulkan binary
|
||||||
|
|
||||||
### a) Download and install binary
|
### a) Download and install binary
|
||||||
|
|
||||||
@@ -79,16 +92,22 @@ wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrga
|
|||||||
unzip -j realesrgan-ncnn-vulkan-20220424-ubuntu.zip "realesrgan-ncnn-vulkan/models/*" -d models/
|
unzip -j realesrgan-ncnn-vulkan-20220424-ubuntu.zip "realesrgan-ncnn-vulkan/models/*" -d models/
|
||||||
```
|
```
|
||||||
|
|
||||||
## 3) Install Python dependencies (optional but recommended)
|
## 3) Create and use the Python environment
|
||||||
|
|
||||||
For a cleaner progress bar during upscaling:
|
Recommended (pyenv + venv, avoids PEP668/system-pip issues):
|
||||||
```bash
|
```bash
|
||||||
pip install tqdm
|
cd /home/admin_n/python/video-upscaling
|
||||||
# or
|
pyenv install -s 3.11.14
|
||||||
pip install -r requirements.txt
|
pyenv shell 3.11.14
|
||||||
|
python -m venv .venv
|
||||||
|
source .venv/bin/activate
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
The script works without tqdm but will show a nicer single-line progress bar if it's installed.
|
The script works without `tqdm`, but with dependencies installed you get a clean single-line progress bar.
|
||||||
|
|
||||||
|
Note: Python `3.14` currently fails with `basicsr/realesrgan` build errors. Use Python `3.11.x`.
|
||||||
|
|
||||||
## 4) Run the Python controller
|
## 4) Run the Python controller
|
||||||
|
|
||||||
@@ -97,13 +116,13 @@ From this project directory:
|
|||||||
python3 upscale_video.py \
|
python3 upscale_video.py \
|
||||||
-i input.mp4 \
|
-i input.mp4 \
|
||||||
-o output_upscaled.mp4 \
|
-o output_upscaled.mp4 \
|
||||||
--model realesr-animevideov3 \
|
--backend pytorch \
|
||||||
--model-path ~/tools/realesrgan/models \
|
--model realesrgan-x4plus \
|
||||||
--scale 2
|
--scale 2
|
||||||
```
|
```
|
||||||
|
|
||||||
**Important**: If you get "find_blob_index_by_name" errors, the model files are missing.
|
PyTorch backend uses `.pth` model weights. You can pass a custom weight file via `--model-path /path/model.pth`.
|
||||||
Use `--model-path` to point to your models directory (see section 2b above).
|
For legacy ncnn backend, pass `--backend ncnn` plus your existing ncnn binary/model setup.
|
||||||
|
|
||||||
By default, temporary working files are created on `/mnt/winsteam`.
|
By default, temporary working files are created on `/mnt/winsteam`.
|
||||||
Override if needed with `--temp-root /some/other/path`.
|
Override if needed with `--temp-root /some/other/path`.
|
||||||
@@ -114,14 +133,17 @@ To force a specific Vulkan GPU, pass e.g. `--gpu-id 0`.
|
|||||||
### Useful options
|
### Useful options
|
||||||
- `--model realesr-animevideov3` for animation/anime-like sources
|
- `--model realesr-animevideov3` for animation/anime-like sources
|
||||||
- `--model realesrgan-x4plus` for natural/live-action footage
|
- `--model realesrgan-x4plus` for natural/live-action footage
|
||||||
- `--model-path ~/tools/realesrgan/models` to specify model directory
|
- `--backend pytorch|ncnn` choose upscaler backend (default `pytorch`)
|
||||||
|
- `--model-path /path/to/model.pth` for custom PyTorch weight file
|
||||||
|
- `--weights-dir ~/.cache/realesrgan` where auto-downloaded PyTorch weights are stored
|
||||||
- `--scale 2|3|4`
|
- `--scale 2|3|4`
|
||||||
- `--tile-size 128` (or 256) if you hit VRAM limits
|
- `--tile-size 128` (or 256) if you hit VRAM limits
|
||||||
- `--jobs 2:2:2` to tune throughput
|
- `--jobs 2:2:2` to tune throughput (ncnn backend only)
|
||||||
- `--crf 14` for higher output quality (bigger file)
|
- `--crf 14` for higher output quality (bigger file)
|
||||||
- `--keep-temp` to keep extracted and processed frame directories
|
- `--keep-temp` to keep extracted and processed frame directories
|
||||||
- `--temp-root /mnt/winsteam` for temp workspace location
|
- `--temp-root /mnt/winsteam` for temp workspace location
|
||||||
- `--gpu-id auto` (or `--gpu-id 0`, `--gpu-id 1`, etc.)
|
- `--gpu-id auto` (or `--gpu-id 0`, `--gpu-id 1`, etc.)
|
||||||
|
- `--fp32` for PyTorch FP32 inference (default is FP16 on CUDA)
|
||||||
- `--test-seconds 60` to process only first N seconds for validation
|
- `--test-seconds 60` to process only first N seconds for validation
|
||||||
- `--pre-vf "hqdn3d=1.5:1.5:6:6"` to denoise/deblock before upscaling
|
- `--pre-vf "hqdn3d=1.5:1.5:6:6"` to denoise/deblock before upscaling
|
||||||
|
|
||||||
@@ -148,9 +170,9 @@ Start with:
|
|||||||
|
|
||||||
If you see memory errors, lower memory pressure using:
|
If you see memory errors, lower memory pressure using:
|
||||||
- `--tile-size 128`
|
- `--tile-size 128`
|
||||||
- `6-jobs 1:2:2`
|
- `--jobs 1:2:2`
|
||||||
|
|
||||||
## 5) Optional quality upgrades
|
## 6) Optional quality upgrades
|
||||||
|
|
||||||
For best final quality, you can output with HEVC:
|
For best final quality, you can output with HEVC:
|
||||||
```bash
|
```bash
|
||||||
@@ -159,7 +181,7 @@ python3 upscale_video.py -i input.mp4 -o output_hevc.mp4 --codec libx265 --crf 1
|
|||||||
|
|
||||||
## 7) GPU ID mapping (`nvidia-smi` vs Vulkan `-g`)
|
## 7) GPU ID mapping (`nvidia-smi` vs Vulkan `-g`)
|
||||||
|
|
||||||
Real-ESRGAN uses Vulkan GPU IDs (`-g`), which may not match `nvidia-smi` index order.
|
This section is mainly relevant for legacy `ncnn` backend. PyTorch backend usually follows CUDA GPU indexing.
|
||||||
|
|
||||||
Check NVIDIA GPUs:
|
Check NVIDIA GPUs:
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -1 +1,5 @@
|
|||||||
tqdm>=4.66.0
|
tqdm>=4.66.0
|
||||||
|
numpy>=1.24
|
||||||
|
opencv-python>=4.8
|
||||||
|
realesrgan>=0.3.0
|
||||||
|
basicsr>=1.4.2
|
||||||
|
|||||||
369
upscale_video.py
369
upscale_video.py
@@ -5,6 +5,7 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
import urllib.request
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -14,6 +15,46 @@ except ImportError:
|
|||||||
HAS_TQDM = False
|
HAS_TQDM = False
|
||||||
|
|
||||||
|
|
||||||
|
MODEL_SPECS = {
|
||||||
|
"realesrgan-x4plus": {
|
||||||
|
"arch": "rrdb",
|
||||||
|
"netscale": 4,
|
||||||
|
"filename": "RealESRGAN_x4plus.pth",
|
||||||
|
"url": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
|
||||||
|
},
|
||||||
|
"realesrnet-x4plus": {
|
||||||
|
"arch": "rrdb",
|
||||||
|
"netscale": 4,
|
||||||
|
"filename": "RealESRNet_x4plus.pth",
|
||||||
|
"url": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRNet_x4plus.pth",
|
||||||
|
},
|
||||||
|
"realesr-general-x4v3": {
|
||||||
|
"arch": "rrdb",
|
||||||
|
"netscale": 4,
|
||||||
|
"filename": "RealESRGAN_x4plus.pth",
|
||||||
|
"url": "https://huggingface.co/qualcomm/Real-ESRGAN-General-x4v3/resolve/main/RealESRGAN_x4plus.pth",
|
||||||
|
},
|
||||||
|
"flashvsr-x4": {
|
||||||
|
"arch": "rrdb",
|
||||||
|
"netscale": 4,
|
||||||
|
"filename": "flashvsr_x4.pth",
|
||||||
|
"url": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.3.0/flashvsr_x4.pth",
|
||||||
|
},
|
||||||
|
"real-cugan-x4": {
|
||||||
|
"arch": "cugan",
|
||||||
|
"netscale": 4,
|
||||||
|
"filename": "real_cugan_x4.pth",
|
||||||
|
"url": "https://huggingface.co/Hacksider/Real-CUGAN/resolve/main/models/real_cugan_x4.pth",
|
||||||
|
},
|
||||||
|
"realesr-animevideov3": {
|
||||||
|
"arch": "srvgg",
|
||||||
|
"netscale": 4,
|
||||||
|
"filename": "realesr-animevideov3.pth",
|
||||||
|
"url": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def run(cmd: list[str]) -> None:
|
def run(cmd: list[str]) -> None:
|
||||||
print("\n$", " ".join(cmd))
|
print("\n$", " ".join(cmd))
|
||||||
completed = subprocess.run(cmd)
|
completed = subprocess.run(cmd)
|
||||||
@@ -25,16 +66,17 @@ def command_exists(name: str) -> bool:
|
|||||||
return shutil.which(name) is not None
|
return shutil.which(name) is not None
|
||||||
|
|
||||||
|
|
||||||
def assert_prerequisites(realesrgan_bin: str) -> None:
|
def assert_prerequisites(backend: str, realesrgan_bin: str) -> None:
|
||||||
missing = []
|
missing = []
|
||||||
if not command_exists("ffmpeg"):
|
if not command_exists("ffmpeg"):
|
||||||
missing.append("ffmpeg")
|
missing.append("ffmpeg")
|
||||||
if not command_exists("ffprobe"):
|
if not command_exists("ffprobe"):
|
||||||
missing.append("ffprobe")
|
missing.append("ffprobe")
|
||||||
|
|
||||||
real_esrgan_ok = Path(realesrgan_bin).exists() or command_exists(realesrgan_bin)
|
if backend == "ncnn":
|
||||||
if not real_esrgan_ok:
|
real_esrgan_ok = Path(realesrgan_bin).exists() or command_exists(realesrgan_bin)
|
||||||
missing.append(realesrgan_bin)
|
if not real_esrgan_ok:
|
||||||
|
missing.append(realesrgan_bin)
|
||||||
|
|
||||||
if missing:
|
if missing:
|
||||||
items = ", ".join(missing)
|
items = ", ".join(missing)
|
||||||
@@ -44,6 +86,31 @@ def assert_prerequisites(realesrgan_bin: str) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_pytorch_deps() -> tuple:
|
||||||
|
try:
|
||||||
|
import cv2
|
||||||
|
import importlib
|
||||||
|
import sys as _sys
|
||||||
|
import torch
|
||||||
|
|
||||||
|
try:
|
||||||
|
importlib.import_module("torchvision.transforms.functional_tensor")
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
compat_mod = importlib.import_module("torchvision.transforms._functional_tensor")
|
||||||
|
_sys.modules["torchvision.transforms.functional_tensor"] = compat_mod
|
||||||
|
|
||||||
|
from basicsr.archs.rrdbnet_arch import RRDBNet
|
||||||
|
from realesrgan import RealESRGANer
|
||||||
|
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
"PyTorch backend dependencies are missing. Install with:\n"
|
||||||
|
"python -m pip install -r requirements.txt\n"
|
||||||
|
"python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
return cv2, torch, RRDBNet, RealESRGANer, SRVGGNetCompact
|
||||||
|
|
||||||
|
|
||||||
def has_audio_stream(input_video: Path) -> bool:
|
def has_audio_stream(input_video: Path) -> bool:
|
||||||
cmd = [
|
cmd = [
|
||||||
@@ -66,18 +133,47 @@ def count_png_frames(folder: Path) -> int:
|
|||||||
return sum(1 for _ in folder.glob("*.png"))
|
return sum(1 for _ in folder.glob("*.png"))
|
||||||
|
|
||||||
|
|
||||||
def run_upscale_with_progress(cmd: list[str], input_frames: Path, output_frames: Path) -> None:
|
def resolve_model_weights(model: str, model_path: str | None, weights_dir: Path) -> tuple[Path, dict]:
|
||||||
|
if model not in MODEL_SPECS:
|
||||||
|
supported = ", ".join(MODEL_SPECS.keys())
|
||||||
|
raise RuntimeError(f"Unsupported model '{model}'. Supported: {supported}")
|
||||||
|
|
||||||
|
spec = MODEL_SPECS[model]
|
||||||
|
|
||||||
|
if model_path:
|
||||||
|
candidate = Path(model_path)
|
||||||
|
if candidate.is_file():
|
||||||
|
return candidate, spec
|
||||||
|
if candidate.is_dir():
|
||||||
|
resolved = candidate / spec["filename"]
|
||||||
|
if resolved.exists():
|
||||||
|
return resolved, spec
|
||||||
|
raise RuntimeError(f"Model file not found: {resolved}")
|
||||||
|
|
||||||
|
weights_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
resolved = weights_dir / spec["filename"]
|
||||||
|
if not resolved.exists():
|
||||||
|
print(f"Downloading model weights to: {resolved}")
|
||||||
|
urllib.request.urlretrieve(spec["url"], resolved)
|
||||||
|
|
||||||
|
return resolved, spec
|
||||||
|
|
||||||
|
|
||||||
|
def run_ncnn_upscale_with_progress(cmd: list[str], input_frames: Path, output_frames: Path) -> None:
|
||||||
total_frames = count_png_frames(input_frames)
|
total_frames = count_png_frames(input_frames)
|
||||||
if total_frames == 0:
|
if total_frames == 0:
|
||||||
raise RuntimeError("No extracted frames found before upscaling.")
|
raise RuntimeError("No extracted frames found before upscaling.")
|
||||||
|
|
||||||
started = time.time()
|
started = time.time()
|
||||||
# Suppress Real-ESRGAN's verbose output by redirecting stdout/stderr
|
|
||||||
process = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
process = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||||
|
|
||||||
if HAS_TQDM:
|
if HAS_TQDM:
|
||||||
pbar = tqdm(total=total_frames, unit="frames", desc="Upscaling",
|
pbar = tqdm(
|
||||||
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]")
|
total=total_frames,
|
||||||
|
unit="frames",
|
||||||
|
desc="Upscaling",
|
||||||
|
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]",
|
||||||
|
)
|
||||||
last_count = 0
|
last_count = 0
|
||||||
else:
|
else:
|
||||||
print(f"Upscaling: 0/{total_frames} frames (0.0%) | ETA --:--:--")
|
print(f"Upscaling: 0/{total_frames} frames (0.0%) | ETA --:--:--")
|
||||||
@@ -95,9 +191,9 @@ def run_upscale_with_progress(cmd: list[str], input_frames: Path, output_frames:
|
|||||||
last_count = done_frames
|
last_count = done_frames
|
||||||
else:
|
else:
|
||||||
if now - last_print >= 2.0:
|
if now - last_print >= 2.0:
|
||||||
progress = min(100.0, (done_frames / total_frames) * 100)
|
|
||||||
elapsed = max(now - started, 1e-6)
|
elapsed = max(now - started, 1e-6)
|
||||||
fps = done_frames / elapsed
|
fps = done_frames / elapsed
|
||||||
|
progress = min(100.0, (done_frames / total_frames) * 100)
|
||||||
if done_frames > 0 and fps > 0:
|
if done_frames > 0 and fps > 0:
|
||||||
remaining_frames = max(total_frames - done_frames, 0)
|
remaining_frames = max(total_frames - done_frames, 0)
|
||||||
eta_seconds = int(remaining_frames / fps)
|
eta_seconds = int(remaining_frames / fps)
|
||||||
@@ -106,10 +202,7 @@ def run_upscale_with_progress(cmd: list[str], input_frames: Path, output_frames:
|
|||||||
eta_str = f"{eta_h:02d}:{eta_m:02d}:{eta_s:02d}"
|
eta_str = f"{eta_h:02d}:{eta_m:02d}:{eta_s:02d}"
|
||||||
else:
|
else:
|
||||||
eta_str = "--:--:--"
|
eta_str = "--:--:--"
|
||||||
print(
|
print(f"Upscaling: {done_frames}/{total_frames} ({progress:.1f}%) | {fps:.2f} fps | ETA {eta_str}")
|
||||||
f"Upscaling: {done_frames}/{total_frames} "
|
|
||||||
f"({progress:.1f}%) | {fps:.2f} fps | ETA {eta_str}"
|
|
||||||
)
|
|
||||||
last_print = now
|
last_print = now
|
||||||
|
|
||||||
if return_code is not None:
|
if return_code is not None:
|
||||||
@@ -121,10 +214,7 @@ def run_upscale_with_progress(cmd: list[str], input_frames: Path, output_frames:
|
|||||||
pbar.close()
|
pbar.close()
|
||||||
elapsed = max(time.time() - started, 1e-6)
|
elapsed = max(time.time() - started, 1e-6)
|
||||||
fps = done_frames / elapsed
|
fps = done_frames / elapsed
|
||||||
print(
|
print(f"Upscaling complete: {done_frames}/{total_frames} frames | avg {fps:.2f} fps | total time {elapsed:.1f}s")
|
||||||
f"Upscaling complete: {done_frames}/{total_frames} frames | "
|
|
||||||
f"avg {fps:.2f} fps | total time {elapsed:.1f}s"
|
|
||||||
)
|
|
||||||
if return_code != 0:
|
if return_code != 0:
|
||||||
raise RuntimeError(f"Command failed ({return_code}): {' '.join(cmd)}")
|
raise RuntimeError(f"Command failed ({return_code}): {' '.join(cmd)}")
|
||||||
break
|
break
|
||||||
@@ -132,13 +222,89 @@ def run_upscale_with_progress(cmd: list[str], input_frames: Path, output_frames:
|
|||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
|
||||||
|
|
||||||
|
def run_pytorch_upscale_with_progress(
|
||||||
|
input_frames: Path,
|
||||||
|
output_frames: Path,
|
||||||
|
model_name: str,
|
||||||
|
model_path: str | None,
|
||||||
|
weights_dir: Path,
|
||||||
|
scale: int,
|
||||||
|
tile_size: int,
|
||||||
|
gpu_id: str,
|
||||||
|
fp32: bool,
|
||||||
|
) -> None:
|
||||||
|
cv2, torch, RRDBNet, RealESRGANer, SRVGGNetCompact = ensure_pytorch_deps()
|
||||||
|
|
||||||
|
weights_file, spec = resolve_model_weights(model_name, model_path, weights_dir)
|
||||||
|
|
||||||
|
if spec["arch"] == "rrdb":
|
||||||
|
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
||||||
|
elif spec["arch"] == "cugan":
|
||||||
|
# Real-CUGAN uses a different model format - load as state dict
|
||||||
|
import torch
|
||||||
|
model = None # Will be loaded directly from pth file
|
||||||
|
else:
|
||||||
|
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type="prelu")
|
||||||
|
|
||||||
|
if gpu_id == "auto":
|
||||||
|
selected_gpu = 0 if torch.cuda.is_available() else None
|
||||||
|
else:
|
||||||
|
selected_gpu = int(gpu_id)
|
||||||
|
|
||||||
|
upsampler = RealESRGANer(
|
||||||
|
scale=spec["netscale"],
|
||||||
|
model_path=str(weights_file),
|
||||||
|
model=model,
|
||||||
|
tile=tile_size,
|
||||||
|
tile_pad=10,
|
||||||
|
pre_pad=0,
|
||||||
|
half=(not fp32 and torch.cuda.is_available()),
|
||||||
|
gpu_id=selected_gpu,
|
||||||
|
)
|
||||||
|
|
||||||
|
frame_files = sorted(input_frames.glob("*.png"))
|
||||||
|
total_frames = len(frame_files)
|
||||||
|
if total_frames == 0:
|
||||||
|
raise RuntimeError("No extracted frames found before upscaling.")
|
||||||
|
|
||||||
|
started = time.time()
|
||||||
|
if HAS_TQDM:
|
||||||
|
progress_iter = tqdm(frame_files, total=total_frames, unit="frames", desc="Upscaling")
|
||||||
|
else:
|
||||||
|
progress_iter = frame_files
|
||||||
|
|
||||||
|
done = 0
|
||||||
|
for frame_file in progress_iter:
|
||||||
|
image = cv2.imread(str(frame_file), cv2.IMREAD_COLOR)
|
||||||
|
if image is None:
|
||||||
|
raise RuntimeError(f"Failed to read input frame: {frame_file}")
|
||||||
|
|
||||||
|
output, _ = upsampler.enhance(image, outscale=scale)
|
||||||
|
target = output_frames / frame_file.name
|
||||||
|
ok = cv2.imwrite(str(target), output)
|
||||||
|
if not ok:
|
||||||
|
raise RuntimeError(f"Failed to write output frame: {target}")
|
||||||
|
done += 1
|
||||||
|
|
||||||
|
if not HAS_TQDM and done % 20 == 0:
|
||||||
|
elapsed = max(time.time() - started, 1e-6)
|
||||||
|
fps = done / elapsed
|
||||||
|
progress = min(100.0, (done / total_frames) * 100)
|
||||||
|
print(f"Upscaling: {done}/{total_frames} ({progress:.1f}%) | {fps:.2f} fps")
|
||||||
|
|
||||||
|
elapsed = max(time.time() - started, 1e-6)
|
||||||
|
fps = total_frames / elapsed
|
||||||
|
print(f"Upscaling complete: {total_frames}/{total_frames} frames | avg {fps:.2f} fps | total time {elapsed:.1f}s")
|
||||||
|
|
||||||
|
|
||||||
def upscale_video(
|
def upscale_video(
|
||||||
input_video: Path,
|
input_video: Path,
|
||||||
output_video: Path,
|
output_video: Path,
|
||||||
|
backend: str,
|
||||||
realesrgan_bin: str,
|
realesrgan_bin: str,
|
||||||
model: str,
|
model: str,
|
||||||
model_path: str | None,
|
model_path: str | None,
|
||||||
|
weights_dir: Path,
|
||||||
scale: int,
|
scale: int,
|
||||||
tile_size: int,
|
tile_size: int,
|
||||||
jobs: str,
|
jobs: str,
|
||||||
@@ -150,7 +316,11 @@ def upscale_video(
|
|||||||
temp_root: Path,
|
temp_root: Path,
|
||||||
gpu_id: str,
|
gpu_id: str,
|
||||||
test_seconds: float | None,
|
test_seconds: float | None,
|
||||||
|
start_time: float | None,
|
||||||
|
skip_sar_correction: bool,
|
||||||
pre_vf: str | None,
|
pre_vf: str | None,
|
||||||
|
final_res: str | None,
|
||||||
|
fp32: bool,
|
||||||
) -> None:
|
) -> None:
|
||||||
if not input_video.exists():
|
if not input_video.exists():
|
||||||
raise FileNotFoundError(f"Input video does not exist: {input_video}")
|
raise FileNotFoundError(f"Input video does not exist: {input_video}")
|
||||||
@@ -167,29 +337,38 @@ def upscale_video(
|
|||||||
frames_out.mkdir(parents=True, exist_ok=True)
|
frames_out.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
print(f"Working directory: {tmp_dir}")
|
print(f"Working directory: {tmp_dir}")
|
||||||
if test_seconds is not None:
|
if start_time is not None or test_seconds is not None:
|
||||||
print(f"Test mode: processing first {test_seconds:.2f} seconds")
|
msg = "Test mode:"
|
||||||
|
if start_time is not None:
|
||||||
|
msg += f" starting at {start_time:.2f}s"
|
||||||
|
if test_seconds is not None:
|
||||||
|
msg += f" processing {test_seconds:.2f}s"
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
seek_args = ["-ss", str(start_time)] if start_time is not None else []
|
||||||
test_duration_args = ["-t", str(test_seconds)] if test_seconds is not None else []
|
test_duration_args = ["-t", str(test_seconds)] if test_seconds is not None else []
|
||||||
|
|
||||||
filter_chain = []
|
filter_chain = []
|
||||||
if pre_vf:
|
if pre_vf:
|
||||||
filter_chain.append(pre_vf)
|
filter_chain.append(pre_vf)
|
||||||
filter_chain.extend([
|
if not skip_sar_correction:
|
||||||
"scale=ceil(iw*sar/2)*2:ih",
|
filter_chain.extend([
|
||||||
"setsar=1",
|
"scale=ceil(iw*sar/2)*2:ih",
|
||||||
])
|
"setsar=1",
|
||||||
|
])
|
||||||
|
|
||||||
|
# Build extract command with optional video filter
|
||||||
extract_cmd = [
|
extract_cmd = [
|
||||||
"ffmpeg",
|
"ffmpeg",
|
||||||
"-y",
|
"-y",
|
||||||
"-i",
|
"-i",
|
||||||
str(input_video),
|
str(input_video),
|
||||||
|
*seek_args,
|
||||||
*test_duration_args,
|
*test_duration_args,
|
||||||
"-vf",
|
|
||||||
",".join(filter_chain),
|
|
||||||
str(frames_in / "%08d.png"),
|
|
||||||
]
|
]
|
||||||
|
if filter_chain:
|
||||||
|
extract_cmd.extend(["-vf", ",".join(filter_chain)])
|
||||||
|
extract_cmd.append(str(frames_in / "%08d.png"))
|
||||||
run(extract_cmd)
|
run(extract_cmd)
|
||||||
|
|
||||||
audio_present = has_audio_stream(input_video)
|
audio_present = has_audio_stream(input_video)
|
||||||
@@ -199,6 +378,7 @@ def upscale_video(
|
|||||||
"-y",
|
"-y",
|
||||||
"-i",
|
"-i",
|
||||||
str(input_video),
|
str(input_video),
|
||||||
|
*seek_args,
|
||||||
*test_duration_args,
|
*test_duration_args,
|
||||||
"-vn",
|
"-vn",
|
||||||
"-c:a",
|
"-c:a",
|
||||||
@@ -209,31 +389,43 @@ def upscale_video(
|
|||||||
]
|
]
|
||||||
run(extract_audio_cmd)
|
run(extract_audio_cmd)
|
||||||
|
|
||||||
upscale_cmd = [
|
if backend == "pytorch":
|
||||||
realesrgan_bin,
|
run_pytorch_upscale_with_progress(
|
||||||
"-i",
|
input_frames=frames_in,
|
||||||
str(frames_in),
|
output_frames=frames_out,
|
||||||
"-o",
|
model_name=model,
|
||||||
str(frames_out),
|
model_path=model_path,
|
||||||
"-n",
|
weights_dir=weights_dir,
|
||||||
model,
|
scale=scale,
|
||||||
"-s",
|
tile_size=tile_size,
|
||||||
str(scale),
|
gpu_id=gpu_id,
|
||||||
"-f",
|
fp32=fp32,
|
||||||
"png",
|
)
|
||||||
"-t",
|
else:
|
||||||
str(tile_size),
|
upscale_cmd = [
|
||||||
"-j",
|
realesrgan_bin,
|
||||||
jobs,
|
"-i",
|
||||||
"-g",
|
str(frames_in),
|
||||||
gpu_id,
|
"-o",
|
||||||
]
|
str(frames_out),
|
||||||
if model_path:
|
"-n",
|
||||||
upscale_cmd.extend(["-m", model_path])
|
model,
|
||||||
run_upscale_with_progress(upscale_cmd, frames_in, frames_out)
|
"-s",
|
||||||
|
str(scale),
|
||||||
|
"-f",
|
||||||
|
"png",
|
||||||
|
"-t",
|
||||||
|
str(tile_size),
|
||||||
|
"-j",
|
||||||
|
jobs,
|
||||||
|
"-g",
|
||||||
|
gpu_id,
|
||||||
|
]
|
||||||
|
if model_path:
|
||||||
|
upscale_cmd.extend(["-m", model_path])
|
||||||
|
run_ncnn_upscale_with_progress(upscale_cmd, frames_in, frames_out)
|
||||||
|
|
||||||
fps_args = ["-r", fps] if fps else []
|
fps_args = ["-r", fps] if fps else []
|
||||||
|
|
||||||
encode_cmd = [
|
encode_cmd = [
|
||||||
"ffmpeg",
|
"ffmpeg",
|
||||||
"-y",
|
"-y",
|
||||||
@@ -245,20 +437,21 @@ def upscale_video(
|
|||||||
if audio_present and audio_file.exists():
|
if audio_present and audio_file.exists():
|
||||||
encode_cmd.extend(["-i", str(audio_file), "-c:a", "copy"])
|
encode_cmd.extend(["-i", str(audio_file), "-c:a", "copy"])
|
||||||
|
|
||||||
encode_cmd.extend(
|
# Add scaling filter if final resolution specified
|
||||||
[
|
if final_res:
|
||||||
"-c:v",
|
encode_cmd.extend(["-vf", f"scale={final_res}:flags=lanczos"])
|
||||||
codec,
|
|
||||||
"-crf",
|
|
||||||
str(crf),
|
|
||||||
"-preset",
|
|
||||||
preset,
|
|
||||||
"-pix_fmt",
|
|
||||||
"yuv420p",
|
|
||||||
str(output_video),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
encode_cmd.extend([
|
||||||
|
"-c:v",
|
||||||
|
codec,
|
||||||
|
"-crf",
|
||||||
|
str(crf),
|
||||||
|
"-preset",
|
||||||
|
preset,
|
||||||
|
"-pix_fmt",
|
||||||
|
"yuv420p",
|
||||||
|
str(output_video),
|
||||||
|
])
|
||||||
run(encode_cmd)
|
run(encode_cmd)
|
||||||
|
|
||||||
if keep_temp:
|
if keep_temp:
|
||||||
@@ -269,39 +462,45 @@ def upscale_video(
|
|||||||
print(f"Temporary files copied to: {kept}")
|
print(f"Temporary files copied to: {kept}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Upscale a video locally with Real-ESRGAN (RTX GPU via Vulkan)."
|
description="Upscale a video locally with Real-ESRGAN (PyTorch CUDA default, ncnn optional)."
|
||||||
)
|
)
|
||||||
parser.add_argument("-i", "--input", required=True, help="Input video path")
|
parser.add_argument("-i", "--input", required=True, help="Input video path")
|
||||||
parser.add_argument("-o", "--output", required=True, help="Output video path")
|
parser.add_argument("-o", "--output", required=True, help="Output video path")
|
||||||
|
parser.add_argument("--backend", choices=["pytorch", "ncnn"], default="pytorch")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--realesrgan-bin",
|
"--realesrgan-bin",
|
||||||
default="realesrgan-ncnn-vulkan",
|
default="realesrgan-ncnn-vulkan",
|
||||||
help="Path or command name of realesrgan-ncnn-vulkan",
|
help="Path or command name of realesrgan-ncnn-vulkan (ncnn backend only)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--model",
|
"--model",
|
||||||
default="realesr-animevideov3",
|
default="realesrgan-x4plus",
|
||||||
help="Model name (e.g. realesr-animevideov3, realesrgan-x4plus)",
|
choices=["realesrgan-x4plus", "realesrnet-x4plus", "realesr-general-x4v3", "flashvsr-x4", "real-cugan-x4", "realesr-animevideov3"],
|
||||||
|
help="Model name",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--model-path",
|
"--model-path",
|
||||||
default=None,
|
default=None,
|
||||||
help="Path to models directory (required if models not in default location)",
|
help="Model file or model directory. For pytorch: .pth file or folder containing model .pth",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--weights-dir",
|
||||||
|
default=str(Path.home() / ".cache" / "realesrgan"),
|
||||||
|
help="Where to download/store PyTorch model weights",
|
||||||
)
|
)
|
||||||
parser.add_argument("--scale", type=int, default=2, choices=[2, 3, 4])
|
parser.add_argument("--scale", type=int, default=2, choices=[2, 3, 4])
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--tile-size",
|
"--tile-size",
|
||||||
type=int,
|
type=int,
|
||||||
default=0,
|
default=0,
|
||||||
help="Tile size for VRAM-limited cases (0 = auto)",
|
help="Tile size (0 = auto/no tile for backend defaults)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--jobs",
|
"--jobs",
|
||||||
default="2:2:2",
|
default="2:2:2",
|
||||||
help="NCNN worker threads as load:proc:save",
|
help="NCNN worker threads as load:proc:save (ncnn backend only)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--fps",
|
"--fps",
|
||||||
@@ -311,6 +510,12 @@ def parse_args() -> argparse.Namespace:
|
|||||||
parser.add_argument("--codec", default="libx264", help="Output video codec")
|
parser.add_argument("--codec", default="libx264", help="Output video codec")
|
||||||
parser.add_argument("--crf", type=int, default=16, help="Quality (lower = better)")
|
parser.add_argument("--crf", type=int, default=16, help="Quality (lower = better)")
|
||||||
parser.add_argument("--preset", default="medium", help="Encoder preset")
|
parser.add_argument("--preset", default="medium", help="Encoder preset")
|
||||||
|
parser.add_argument("--fp32", action="store_true", help="Use FP32 inference for PyTorch backend")
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-sar-correction",
|
||||||
|
action="store_true",
|
||||||
|
help="Skip SAR (aspect ratio) correction before upscaling (for testing native resolution)",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--keep-temp",
|
"--keep-temp",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -324,34 +529,46 @@ def parse_args() -> argparse.Namespace:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--gpu-id",
|
"--gpu-id",
|
||||||
default="auto",
|
default="auto",
|
||||||
help="Vulkan GPU id for Real-ESRGAN (e.g. 0, 1, 0,1). Use 'auto' by default",
|
help="GPU id (e.g. 0,1) or 'auto'. For ncnn this maps to Vulkan id",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--test-seconds",
|
"--test-seconds",
|
||||||
type=float,
|
type=float,
|
||||||
default=None,
|
default=None,
|
||||||
help="Only process first N seconds (for quick test runs)",
|
help="Only process N seconds (for quick test runs)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--start-time",
|
||||||
|
type=float,
|
||||||
|
default=None,
|
||||||
|
help="Start at specific time in video (seconds, for testing specific frames)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--pre-vf",
|
"--pre-vf",
|
||||||
default=None,
|
default=None,
|
||||||
help="Optional ffmpeg video filter(s) applied before upscaling (e.g. hqdn3d=1.5:1.5:6:6)",
|
help="Optional ffmpeg video filter(s) applied before upscaling (e.g. hqdn3d=1.5:1.5:6:6)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--final-res",
|
||||||
|
default=None,
|
||||||
|
help="Final output resolution (e.g. 1920x1080 for Full HD) - scales downsampled frames before encoding",
|
||||||
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
assert_prerequisites(args.realesrgan_bin)
|
assert_prerequisites(args.backend, args.realesrgan_bin)
|
||||||
upscale_video(
|
upscale_video(
|
||||||
input_video=Path(args.input),
|
input_video=Path(args.input),
|
||||||
output_video=Path(args.output),
|
output_video=Path(args.output),
|
||||||
|
backend=args.backend,
|
||||||
realesrgan_bin=args.realesrgan_bin,
|
realesrgan_bin=args.realesrgan_bin,
|
||||||
model=args.model,
|
model=args.model,
|
||||||
model_path=args.model_path,
|
model_path=args.model_path,
|
||||||
|
weights_dir=Path(args.weights_dir),
|
||||||
scale=args.scale,
|
scale=args.scale,
|
||||||
tile_size=args.tile_size,
|
tile_size=args.tile_size,
|
||||||
jobs=args.jobs,
|
jobs=args.jobs,
|
||||||
@@ -363,7 +580,11 @@ def main() -> int:
|
|||||||
temp_root=Path(args.temp_root),
|
temp_root=Path(args.temp_root),
|
||||||
gpu_id=args.gpu_id,
|
gpu_id=args.gpu_id,
|
||||||
test_seconds=args.test_seconds,
|
test_seconds=args.test_seconds,
|
||||||
|
start_time=args.start_time,
|
||||||
|
skip_sar_correction=args.skip_sar_correction,
|
||||||
pre_vf=args.pre_vf,
|
pre_vf=args.pre_vf,
|
||||||
|
final_res=args.final_res,
|
||||||
|
fp32=args.fp32,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(f"Error: {exc}", file=sys.stderr)
|
print(f"Error: {exc}", file=sys.stderr)
|
||||||
|
|||||||
Reference in New Issue
Block a user