1109 lines
40 KiB
Python
1109 lines
40 KiB
Python
from pathlib import Path
|
|
import os
|
|
import requests
|
|
import re
|
|
import sys
|
|
import threading
|
|
import time
|
|
import ipaddress
|
|
import shutil
|
|
from datetime import datetime
|
|
from functools import wraps
|
|
from flask import Flask, jsonify, render_template, request, abort, Response
|
|
|
|
# Load config from environment variables
|
|
API_URL = os.getenv("API_URL", "http://localhost:8457/api")
|
|
VIDEO_URL = os.getenv("VIDEO_URL", "http://localhost:8457/video/")
|
|
API_TOKEN = os.getenv("API_TOKEN", "")
|
|
SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", 60)) # Default 60 minutes
|
|
ALLOWED_IPS = [ip.strip() for ip in os.getenv("ALLOWED_IPS", "127.0.0.1").split(",")]
|
|
UI_USERNAME = os.getenv("UI_USERNAME", "admin")
|
|
UI_PASSWORD = os.getenv("UI_PASSWORD", "password")
|
|
SOURCE_DIR = Path("/app/source")
|
|
TARGET_DIR = Path("/app/target")
|
|
IMPORT_DIR = Path("/app/import")
|
|
HEADERS = {"Authorization": f"Token {API_TOKEN}"}
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Database setup
|
|
import sqlite3
|
|
from contextlib import contextmanager
|
|
|
|
DB_PATH = Path("/app/data/videos.db")
|
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
@contextmanager
|
|
def get_db():
|
|
conn = sqlite3.connect(DB_PATH, timeout=30)
|
|
conn.row_factory = sqlite3.Row
|
|
try:
|
|
yield conn
|
|
finally:
|
|
conn.close()
|
|
|
|
def init_db():
|
|
with get_db() as conn:
|
|
conn.executescript("""
|
|
CREATE TABLE IF NOT EXISTS videos (
|
|
video_id TEXT PRIMARY KEY,
|
|
title TEXT,
|
|
channel TEXT,
|
|
published TEXT,
|
|
symlink TEXT,
|
|
status TEXT,
|
|
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
CREATE TABLE IF NOT EXISTS lost_media (
|
|
video_id TEXT PRIMARY KEY,
|
|
filepath TEXT,
|
|
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
""")
|
|
conn.commit()
|
|
|
|
# Retry loop for DB initialization to prevent crash on SMB lock
|
|
while True:
|
|
try:
|
|
init_db()
|
|
print("Database initialized successfully.", flush=True)
|
|
break
|
|
except Exception as e:
|
|
print(f"Database initialization failed (retrying in 10s): {e}", flush=True)
|
|
import time
|
|
time.sleep(10)
|
|
|
|
# Global State
|
|
processed_videos = []
|
|
log_buffer = []
|
|
log_lock = threading.Lock()
|
|
transcode_log_buffer = []
|
|
transcode_log_lock = threading.Lock()
|
|
|
|
# Utility functions
|
|
def log(msg):
|
|
"""Logs a message to stdout and the in-memory buffer."""
|
|
print(msg, flush=True)
|
|
with log_lock:
|
|
log_buffer.append(msg)
|
|
if len(log_buffer) > 1000:
|
|
log_buffer.pop(0)
|
|
|
|
def tlog(msg):
|
|
"""Logs a message to the transcode log buffer."""
|
|
print(f"[TRANSCODE] {msg}", flush=True)
|
|
with transcode_log_lock:
|
|
transcode_log_buffer.append(msg)
|
|
if len(transcode_log_buffer) > 500:
|
|
transcode_log_buffer.pop(0)
|
|
|
|
def detect_encoder():
|
|
"""Detect best available hardware encoder."""
|
|
import subprocess
|
|
try:
|
|
result = subprocess.run(['ffmpeg', '-hide_banner', '-encoders'],
|
|
capture_output=True, text=True)
|
|
encoders = result.stdout
|
|
|
|
if 'h264_nvenc' in encoders:
|
|
return 'h264_nvenc'
|
|
elif 'h264_vaapi' in encoders:
|
|
return 'h264_vaapi'
|
|
elif 'h264_videotoolbox' in encoders:
|
|
return 'h264_videotoolbox'
|
|
else:
|
|
return 'libx264'
|
|
except:
|
|
return 'libx264'
|
|
|
|
def probe_codecs(filepath):
|
|
"""Probe video and audio codecs using ffprobe."""
|
|
import subprocess
|
|
try:
|
|
# Get video codec
|
|
v_result = subprocess.run([
|
|
'ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
|
'-show_entries', 'stream=codec_name', '-of', 'csv=p=0', filepath
|
|
], capture_output=True, text=True)
|
|
video_codec = v_result.stdout.strip()
|
|
|
|
# Get audio codec
|
|
a_result = subprocess.run([
|
|
'ffprobe', '-v', 'error', '-select_streams', 'a:0',
|
|
'-show_entries', 'stream=codec_name', '-of', 'csv=p=0', filepath
|
|
], capture_output=True, text=True)
|
|
audio_codec = a_result.stdout.strip()
|
|
|
|
return video_codec, audio_codec
|
|
except Exception as e:
|
|
tlog(f"Error probing {filepath}: {e}")
|
|
return None, None
|
|
|
|
def transcode_video(filepath, encoder='libx264'):
|
|
"""Transcode a video file to H.264/AAC."""
|
|
import subprocess
|
|
|
|
original_path = Path(filepath)
|
|
|
|
# Try to resolve symlink first (don't check if it exists, broken symlinks still exist as links)
|
|
if original_path.is_symlink():
|
|
try:
|
|
actual_file = Path(os.readlink(original_path)).resolve()
|
|
tlog(f"Following symlink: {filepath} -> {actual_file}")
|
|
|
|
# Translate host path to container path
|
|
# Host: /mnt/user/tubearchives/bp/... → Container: /app/source/...
|
|
actual_file_str = str(actual_file)
|
|
if actual_file_str.startswith("/mnt/user/tubearchives/bp"):
|
|
container_path = actual_file_str.replace("/mnt/user/tubearchives/bp", "/app/source", 1)
|
|
tlog(f"Translated path: {actual_file} -> {container_path}")
|
|
filepath = container_path
|
|
else:
|
|
filepath = str(actual_file)
|
|
except Exception as e:
|
|
tlog(f"Error resolving symlink: {e}")
|
|
return False
|
|
elif not original_path.exists():
|
|
tlog(f"File not found: {filepath}")
|
|
return False
|
|
|
|
# Now check if the actual file exists
|
|
if not Path(filepath).exists():
|
|
tlog(f"Source file not found: {filepath}")
|
|
return False
|
|
|
|
video_codec, audio_codec = probe_codecs(filepath)
|
|
|
|
if video_codec == 'h264' and audio_codec == 'aac':
|
|
tlog(f"Already H.264/AAC: {filepath}")
|
|
return True
|
|
|
|
temp_file = f"{filepath}.temp.mp4"
|
|
|
|
try:
|
|
# Determine transcode strategy
|
|
if video_codec == 'h264':
|
|
tlog(f"Audio-only transcode: {filepath}")
|
|
cmd = [
|
|
'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
|
|
'-c:v', 'copy',
|
|
'-c:a', 'aac', '-b:a', '192k',
|
|
'-movflags', '+faststart',
|
|
'-y', temp_file
|
|
]
|
|
else:
|
|
tlog(f"Full transcode using {encoder}: {filepath}")
|
|
if encoder == 'h264_nvenc':
|
|
cmd = [
|
|
'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
|
|
'-c:v', 'h264_nvenc', '-preset', 'fast', '-cq', '23',
|
|
'-c:a', 'aac', '-b:a', '192k',
|
|
'-movflags', '+faststart',
|
|
'-y', temp_file
|
|
]
|
|
elif encoder == 'h264_vaapi':
|
|
cmd = [
|
|
'ffmpeg', '-v', 'error', '-stats',
|
|
'-hwaccel', 'vaapi', '-hwaccel_output_format', 'vaapi',
|
|
'-i', filepath,
|
|
'-vf', 'format=nv12,hwupload',
|
|
'-c:v', 'h264_vaapi', '-b:v', '5M',
|
|
'-c:a', 'aac', '-b:a', '192k',
|
|
'-movflags', '+faststart',
|
|
'-y', temp_file
|
|
]
|
|
else: # libx264
|
|
cmd = [
|
|
'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
|
|
'-c:v', 'libx264', '-crf', '23', '-preset', 'medium',
|
|
'-c:a', 'aac', '-b:a', '192k',
|
|
'-movflags', '+faststart',
|
|
'-y', temp_file
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
if result.returncode == 0:
|
|
# Replace original
|
|
Path(filepath).unlink()
|
|
Path(temp_file).rename(filepath)
|
|
tlog(f"✅ Success: {filepath}")
|
|
return True
|
|
else:
|
|
# Check if it's a GPU error and retry with CPU
|
|
if encoder in ['h264_nvenc', 'h264_vaapi', 'h264_videotoolbox'] and 'libcuda' in result.stderr or 'Cannot load' in result.stderr:
|
|
tlog(f"⚠️ GPU encoding failed, retrying with CPU (libx264)...")
|
|
|
|
# Retry with libx264
|
|
if video_codec == 'h264':
|
|
cpu_cmd = [
|
|
'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
|
|
'-c:v', 'copy',
|
|
'-c:a', 'aac', '-b:a', '192k',
|
|
'-movflags', '+faststart',
|
|
'-y', temp_file
|
|
]
|
|
else:
|
|
cpu_cmd = [
|
|
'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
|
|
'-c:v', 'libx264', '-crf', '23', '-preset', 'medium',
|
|
'-c:a', 'aac', '-b:a', '192k',
|
|
'-movflags', '+faststart',
|
|
'-y', temp_file
|
|
]
|
|
|
|
cpu_result = subprocess.run(cpu_cmd, capture_output=True, text=True)
|
|
|
|
if cpu_result.returncode == 0:
|
|
Path(filepath).unlink()
|
|
Path(temp_file).rename(filepath)
|
|
tlog(f"✅ Success (CPU): {filepath}")
|
|
return True
|
|
else:
|
|
tlog(f"❌ Failed (CPU): {filepath}")
|
|
tlog(f"Error: {cpu_result.stderr}")
|
|
if Path(temp_file).exists():
|
|
Path(temp_file).unlink()
|
|
return False
|
|
else:
|
|
tlog(f"❌ Failed: {filepath}")
|
|
tlog(f"Error: {result.stderr}")
|
|
if Path(temp_file).exists():
|
|
Path(temp_file).unlink()
|
|
return False
|
|
|
|
except Exception as e:
|
|
tlog(f"❌ Exception: {e}")
|
|
if Path(temp_file).exists():
|
|
Path(temp_file).unlink()
|
|
return False
|
|
|
|
def sanitize(text):
|
|
text = text.encode("ascii", "ignore").decode()
|
|
text = re.sub(r'[\/:*?"<>|]', "_", text)
|
|
return text.strip()
|
|
|
|
def fetch_all_metadata():
|
|
log("📥 Fetching all video metadata...")
|
|
video_map = {}
|
|
page = 1
|
|
while True:
|
|
url = f"{API_URL}/video/?page={page}"
|
|
try:
|
|
response = requests.get(url, headers=HEADERS)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
if 'data' not in data or not data['data']:
|
|
break
|
|
|
|
for video in data['data']:
|
|
# Try to find the ID. It might be 'youtube_id' or '_id'
|
|
vid_id = video.get("youtube_id") or video.get("_id")
|
|
if not vid_id:
|
|
continue
|
|
|
|
title = video.get("title", "unknown_title")
|
|
channel_info = video.get("channel", {})
|
|
channel_name = channel_info.get("channel_name") or channel_info.get("channel_title") or "Unknown Channel"
|
|
# Fix date format: take only first 10 chars (YYYY-MM-DD)
|
|
raw_date = video.get("published", "unknown_date")
|
|
published = raw_date[:10] if len(raw_date) >= 10 else raw_date.replace("/", "-")
|
|
|
|
video_map[vid_id] = {
|
|
"title": title,
|
|
"channel_name": channel_name,
|
|
"published": published
|
|
}
|
|
|
|
# Check pagination to see if we are done
|
|
if 'paginate' in data:
|
|
current = data['paginate'].get('current_page')
|
|
last = data['paginate'].get('last_page')
|
|
if current is not None and last is not None and current >= last:
|
|
break
|
|
else:
|
|
pass
|
|
|
|
log(f" - Page {page} fetched. Total videos so far: {len(video_map)}")
|
|
page += 1
|
|
|
|
except Exception as e:
|
|
log(f"❌ Error fetching page {page}: {e}")
|
|
break
|
|
|
|
log(f"✅ Metadata fetch complete. Found {len(video_map)} videos.")
|
|
return video_map
|
|
|
|
def cleanup_old_folders():
|
|
"""
|
|
Scans TARGET_DIR for folders containing '+00:00'.
|
|
Safely deletes them ONLY if they contain no real files (only symlinks or empty).
|
|
"""
|
|
log("🧹 Starting cleanup. Scanning ONLY for folders containing '+00:00'...")
|
|
cleaned_count = 0
|
|
skipped_count = 0
|
|
|
|
if not TARGET_DIR.exists():
|
|
return
|
|
|
|
# Walk top-down
|
|
for channel_dir in TARGET_DIR.iterdir():
|
|
if not channel_dir.is_dir():
|
|
continue
|
|
|
|
for video_dir in channel_dir.iterdir():
|
|
if not video_dir.is_dir():
|
|
continue
|
|
|
|
if "+00:00" in video_dir.name:
|
|
# Check safety
|
|
safe_to_delete = True
|
|
reason = ""
|
|
|
|
for item in video_dir.iterdir():
|
|
if not item.is_symlink():
|
|
# Found a real file! Unsafe!
|
|
safe_to_delete = False
|
|
reason = "Contains real files"
|
|
break
|
|
|
|
if safe_to_delete:
|
|
try:
|
|
# Remove all symlinks first
|
|
for item in video_dir.iterdir():
|
|
item.unlink()
|
|
# Remove directory
|
|
video_dir.rmdir()
|
|
log(f" [DELETED] {video_dir.name}")
|
|
cleaned_count += 1
|
|
except Exception as e:
|
|
log(f" ❌ Failed to delete {video_dir.name}: {e}")
|
|
else:
|
|
log(f" ⚠️ SKIPPING {video_dir.name} - {reason}")
|
|
skipped_count += 1
|
|
|
|
log(f"🧹 Cleanup complete. Removed: {cleaned_count}, Skipped: {skipped_count}")
|
|
|
|
def check_orphaned_links():
|
|
"""
|
|
Scans TARGET_DIR for video.mp4 symlinks and checks if they point to valid files.
|
|
For orphaned links, parses the folder structure to extract metadata.
|
|
Stores results in database.
|
|
"""
|
|
log("🔍 Checking for orphaned symlinks...")
|
|
orphaned = []
|
|
total_checked = 0
|
|
|
|
if not TARGET_DIR.exists():
|
|
log("⚠️ Target directory does not exist")
|
|
return orphaned
|
|
|
|
with get_db() as conn:
|
|
for channel_dir in TARGET_DIR.iterdir():
|
|
if not channel_dir.is_dir():
|
|
continue
|
|
|
|
channel_name = channel_dir.name
|
|
|
|
for video_dir in channel_dir.iterdir():
|
|
if not video_dir.is_dir():
|
|
continue
|
|
|
|
folder_name = video_dir.name
|
|
|
|
# Look for video files
|
|
for video_file in video_dir.glob("video.*"):
|
|
total_checked += 1
|
|
|
|
if video_file.is_symlink():
|
|
try:
|
|
# Check if the symlink target exists
|
|
target = Path(os.readlink(video_file))
|
|
|
|
if not target.exists():
|
|
# Parse folder name: "YYYY-MM-DD - Title"
|
|
parts = folder_name.split(" - ", 1)
|
|
published = parts[0] if len(parts) > 0 else "unknown"
|
|
title = parts[1] if len(parts) > 1 else folder_name
|
|
|
|
# Try to extract video ID from symlink target path
|
|
video_id = target.stem if target.stem else "unknown"
|
|
|
|
orphaned.append({
|
|
"video_id": video_id,
|
|
"path": str(video_file),
|
|
"target": str(target),
|
|
"folder": folder_name,
|
|
"channel": channel_name,
|
|
"title": title,
|
|
"published": published
|
|
})
|
|
|
|
# Store in DB
|
|
conn.execute("""
|
|
INSERT OR REPLACE INTO videos
|
|
(video_id, title, channel, published, symlink, status)
|
|
VALUES (?, ?, ?, ?, ?, 'missing')
|
|
""", (video_id, title, channel_name, published, str(video_file)))
|
|
|
|
log(f" ⚠️ BROKEN: {folder_name} -> {target}")
|
|
except Exception as e:
|
|
log(f" ❌ ERROR: {folder_name}: {e}")
|
|
|
|
conn.commit()
|
|
|
|
log(f"✅ Check complete. Scanned {total_checked} files, found {len(orphaned)} orphaned symlinks.")
|
|
return orphaned
|
|
|
|
def extract_id_from_filename(filename):
|
|
"""
|
|
Extracts YouTube ID from filename.
|
|
Expects format: 'Title [VIDEO_ID].ext' or just '[VIDEO_ID].ext'
|
|
"""
|
|
# Regex for [VIDEO_ID] at end of stem
|
|
match = re.search(r'\[([a-zA-Z0-9_-]{11})\]$', Path(filename).stem)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
# Fallback: maybe the whole filename is the ID?
|
|
if re.match(r'^[a-zA-Z0-9_-]{11}$', Path(filename).stem):
|
|
return Path(filename).stem
|
|
|
|
return None
|
|
|
|
def scan_for_unindexed_videos():
|
|
"""
|
|
Scans both SOURCE_DIR and TARGET_DIR for files.
|
|
Classifies them as:
|
|
- unindexed: Not in TA DB (Needs Import)
|
|
- redundant: In TA DB AND Source exists (Safe Duplicate)
|
|
- rescue: In TA DB BUT Source missing (Needs Rescue/Import)
|
|
"""
|
|
log("🔍 Scanning for unindexed and legacy files...")
|
|
|
|
# 1. Fetch current known IDs and their source paths
|
|
video_map = fetch_all_metadata() # {id: {path: ..., ...}}
|
|
known_ids = set(video_map.keys())
|
|
|
|
# Fetch Lost Media IDs
|
|
with get_db() as conn:
|
|
lost_rows = conn.execute("SELECT video_id FROM lost_media").fetchall()
|
|
lost_ids = {row["video_id"] for row in lost_rows}
|
|
|
|
results = {
|
|
"unindexed": [],
|
|
"redundant": [],
|
|
"rescue": [],
|
|
"lost": []
|
|
}
|
|
|
|
# Helper to check if file is video
|
|
def is_video(f):
|
|
return f.suffix.lower() in ['.mp4', '.mkv', '.webm', '.mov']
|
|
|
|
# --- Scan SOURCE_DIR (Standard Orphan Check) ---
|
|
if SOURCE_DIR.exists():
|
|
for channel_path in SOURCE_DIR.iterdir():
|
|
if not channel_path.is_dir(): continue
|
|
for video_file in channel_path.glob("*.*"):
|
|
if not is_video(video_file): continue
|
|
|
|
vid_id = extract_id_from_filename(video_file.name)
|
|
if vid_id and vid_id not in known_ids:
|
|
# Check if it is known LOST media
|
|
file_info = {
|
|
"path": str(video_file),
|
|
"filename": video_file.name,
|
|
"video_id": vid_id,
|
|
"size_mb": round(video_file.stat().st_size / (1024*1024), 2),
|
|
"ta_source": "Source Dir"
|
|
}
|
|
|
|
if vid_id in lost_ids:
|
|
results["lost"].append(file_info)
|
|
else:
|
|
results["unindexed"].append(file_info)
|
|
|
|
|
|
# --- Scan TARGET_DIR (Legacy "Pinchflat" Check) ---
|
|
if TARGET_DIR.exists():
|
|
for channel_path in TARGET_DIR.iterdir():
|
|
if not channel_path.is_dir(): continue
|
|
for video_file in channel_path.glob("*.*"):
|
|
if not is_video(video_file): continue
|
|
|
|
# We only care about REAL files, not symlinks
|
|
if video_file.is_symlink():
|
|
continue
|
|
|
|
vid_id = extract_id_from_filename(video_file.name)
|
|
|
|
# Case 1: ID NOT in TA -> Recoverable
|
|
if vid_id and vid_id not in known_ids:
|
|
results["unindexed"].append({
|
|
"path": str(video_file),
|
|
"filename": video_file.name,
|
|
"video_id": vid_id,
|
|
"type": "target_realfile",
|
|
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
|
})
|
|
|
|
# Case 2: ID IS in TA
|
|
elif vid_id:
|
|
# Check if TA's source file actually exists
|
|
ta_source_path = Path(video_map[vid_id]['filesystem_path'])
|
|
|
|
if ta_source_path.exists():
|
|
# TA has it, Source exists. This file is REDUNDANT.
|
|
results["redundant"].append({
|
|
"path": str(video_file),
|
|
"filename": video_file.name,
|
|
"video_id": vid_id,
|
|
"ta_source": str(ta_source_path),
|
|
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
|
})
|
|
else:
|
|
# TA has it, BUT source is MISSING. This file is a RESCUE candidate.
|
|
results["rescue"].append({
|
|
"path": str(video_file),
|
|
"filename": video_file.name,
|
|
"video_id": vid_id,
|
|
"ta_source": str(ta_source_path), # Missing path
|
|
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
|
})
|
|
|
|
log(f"✅ Scan complete. Unindexed: {len(results['unindexed'])}, Redundant: {len(results['redundant'])}, Rescue: {len(results['rescue'])}")
|
|
return results
|
|
|
|
def recover_video_metadata(filepath):
|
|
"""
|
|
Uses yt-dlp to fetch metadata for a video file and prepares it for import.
|
|
"""
|
|
import subprocess
|
|
import shutil
|
|
import json
|
|
|
|
src_path = Path(filepath)
|
|
if not src_path.exists():
|
|
return False, "File not found"
|
|
|
|
vid_id = extract_id_from_filename(src_path.name)
|
|
if not vid_id:
|
|
return False, "Could not extract Video ID from filename"
|
|
|
|
# Ensure import dir exists
|
|
IMPORT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Target paths
|
|
dest_video = IMPORT_DIR / src_path.name
|
|
dest_json = IMPORT_DIR / f"{src_path.stem}.info.json"
|
|
|
|
log(f"🚑 Recovering: {vid_id} ...")
|
|
|
|
# 1. Fetch Metadata using yt-dlp
|
|
cmd = [
|
|
"yt-dlp",
|
|
"--write-info-json",
|
|
"--skip-download",
|
|
"--id",
|
|
f"https://www.youtube.com/watch?v={vid_id}",
|
|
"-o", f"{IMPORT_DIR}/{src_path.stem}"
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
# Check if the metadata file was actually created
|
|
|
|
if dest_json.exists() and dest_json.stat().st_size > 0:
|
|
log(f" ✅ Metadata fetched successfully (ignoring exit code {result.returncode}).")
|
|
elif result.returncode != 0:
|
|
error_msg = result.stderr.strip() or "Unknown Error"
|
|
log(f" ⚠️ yt-dlp failed (Exit Code {result.returncode}). Error: {error_msg}")
|
|
|
|
# Smart Detection: Only mark as LOST if it's actually a "Video unavailable" error
|
|
# If it's a network error, maybe we shouldn't mark it as lost yet?
|
|
# For now, let's just log it better and still allow the user to see it in Lost Media (where they can 'Force' or 'Delete')
|
|
|
|
with get_db() as conn:
|
|
conn.execute("INSERT OR REPLACE INTO lost_media (video_id, filepath) VALUES (?, ?)", (vid_id, str(src_path)))
|
|
conn.commit()
|
|
return False, f"yt-dlp failed: {error_msg} (Added to Lost Media)"
|
|
|
|
|
|
# 2. Copy/Symlink Video File
|
|
try:
|
|
# We hardlink if possible to save space/time, otherwise copy
|
|
if dest_video.exists():
|
|
dest_video.unlink()
|
|
|
|
# Try symlink first? No, TA import consumes files. Copying is safer or hardlink.
|
|
# Let's try hardlink (link)
|
|
try:
|
|
os.link(src_path, dest_video)
|
|
log(" 🔗 Hardlinked video file.")
|
|
except OSError:
|
|
shutil.copy2(src_path, dest_video)
|
|
log(" 📂 Copied video file.")
|
|
|
|
except Exception as e:
|
|
return False, f"Failed to move video: {e}"
|
|
|
|
return True, "Ready for import"
|
|
|
|
except Exception as e:
|
|
log(f" ❌ Recovery failed: {e}")
|
|
return False, str(e)
|
|
|
|
# Main logic
|
|
|
|
def process_videos():
|
|
global processed_videos
|
|
processed_videos = []
|
|
|
|
# 1. Fetch all metadata first
|
|
video_map = fetch_all_metadata()
|
|
|
|
# 2. Run cleanup
|
|
cleanup_old_folders()
|
|
|
|
# Statistics
|
|
new_links = 0
|
|
verified_links = 0
|
|
|
|
with get_db() as conn:
|
|
# Clear existing "linked" videos (we'll repopulate)
|
|
conn.execute("DELETE FROM videos WHERE status = 'linked'")
|
|
|
|
try:
|
|
for channel_path in SOURCE_DIR.iterdir():
|
|
if not channel_path.is_dir():
|
|
continue
|
|
for video_file in channel_path.glob("*.*"):
|
|
video_id = video_file.stem
|
|
|
|
# Lookup in local map
|
|
meta = video_map.get(video_id)
|
|
if not meta:
|
|
continue
|
|
sanitized_channel_name = sanitize(meta["channel_name"])
|
|
channel_dir = TARGET_DIR / sanitized_channel_name
|
|
channel_dir.mkdir(parents=True, exist_ok=True)
|
|
sanitized_title = sanitize(meta["title"])
|
|
folder_name = f"{meta['published']} - {sanitized_title}"
|
|
video_dir = channel_dir / folder_name
|
|
video_dir.mkdir(parents=True, exist_ok=True)
|
|
actual_file = next(channel_path.glob(f"{video_id}.*"), None)
|
|
if not actual_file:
|
|
continue
|
|
host_path_root = Path("/mnt/user/tubearchives/bp")
|
|
host_source_path = host_path_root / actual_file.relative_to(SOURCE_DIR)
|
|
dest_file = video_dir / f"video{actual_file.suffix}"
|
|
try:
|
|
if dest_file.exists():
|
|
if dest_file.is_symlink():
|
|
current_target = Path(os.readlink(dest_file))
|
|
if current_target.resolve() != host_source_path.resolve():
|
|
dest_file.unlink()
|
|
os.symlink(host_source_path, dest_file)
|
|
log(f" [FIX] Relinked: {folder_name}")
|
|
new_links += 1
|
|
else:
|
|
verified_links += 1
|
|
else:
|
|
os.symlink(host_source_path, dest_file)
|
|
log(f" [NEW] Linked: {folder_name}")
|
|
new_links += 1
|
|
except Exception:
|
|
pass
|
|
|
|
# Store in database
|
|
conn.execute("""
|
|
INSERT OR REPLACE INTO videos
|
|
(video_id, title, channel, published, symlink, status)
|
|
VALUES (?, ?, ?, ?, ?, 'linked')
|
|
""", (video_id, meta["title"], meta["channel_name"],
|
|
meta["published"], str(dest_file)))
|
|
|
|
processed_videos.append({
|
|
"video_id": video_id,
|
|
"title": meta["title"],
|
|
"channel": meta["channel_name"],
|
|
"published": meta["published"],
|
|
"symlink": str(dest_file)
|
|
})
|
|
except Exception as e:
|
|
conn.rollback()
|
|
return str(e)
|
|
|
|
conn.commit()
|
|
|
|
log(f"✅ Scan complete. Processed {len(processed_videos)} videos.")
|
|
log(f" - New/Fixed Links: {new_links}")
|
|
log(f" - Verified Links: {verified_links}")
|
|
return None
|
|
|
|
def scheduler():
|
|
log(f"🕒 Background scheduler started. Scanning every {SCAN_INTERVAL} minutes.")
|
|
while True:
|
|
log("🔄 Running scheduled scan...")
|
|
process_videos()
|
|
time.sleep(SCAN_INTERVAL * 60)
|
|
|
|
# Flask routes
|
|
|
|
@app.before_request
|
|
def limit_remote_addr():
|
|
# Skip check for local requests if needed, but generally good to enforce
|
|
client_ip = request.remote_addr
|
|
try:
|
|
ip_obj = ipaddress.ip_address(client_ip)
|
|
allowed = False
|
|
for allowed_ip in ALLOWED_IPS:
|
|
if not allowed_ip: continue
|
|
if "/" in allowed_ip:
|
|
if ip_obj in ipaddress.ip_network(allowed_ip, strict=False):
|
|
allowed = True
|
|
break
|
|
else:
|
|
if ip_obj == ipaddress.ip_address(allowed_ip):
|
|
allowed = True
|
|
break
|
|
if not allowed:
|
|
log(f"⛔ Access denied for IP: {client_ip}")
|
|
abort(403)
|
|
except ValueError as e:
|
|
log(f"⛔ Invalid IP format: {client_ip}, Error: {e}")
|
|
abort(403)
|
|
|
|
def check_auth(username, password):
|
|
"""Checks whether a username/password combination is valid."""
|
|
return username == UI_USERNAME and password == UI_PASSWORD
|
|
|
|
def authenticate():
|
|
"""Sends a 401 response that enables basic auth"""
|
|
return Response(
|
|
'Could not verify your access level for that URL.\n'
|
|
'You have to login with proper credentials', 401,
|
|
{'WWW-Authenticate': 'Basic realm="Login Required"'})
|
|
|
|
def requires_auth(f):
|
|
@wraps(f)
|
|
def decorated(*args, **kwargs):
|
|
auth = request.authorization
|
|
if not auth or not check_auth(auth.username, auth.password):
|
|
return authenticate()
|
|
return f(*args, **kwargs)
|
|
return decorated
|
|
|
|
@app.route("/")
|
|
@requires_auth
|
|
def index():
|
|
return render_template('dashboard.html')
|
|
|
|
@app.route("/api/status")
|
|
@requires_auth
|
|
def api_status():
|
|
with get_db() as conn:
|
|
# Get all videos from DB
|
|
videos = []
|
|
for row in conn.execute("SELECT * FROM videos ORDER BY channel, published DESC"):
|
|
videos.append({
|
|
"video_id": row["video_id"],
|
|
"title": row["title"],
|
|
"channel": row["channel"],
|
|
"published": row["published"],
|
|
"symlink": row["symlink"],
|
|
"status": row["status"]
|
|
})
|
|
|
|
# Calculate stats
|
|
total = len(videos)
|
|
linked = sum(1 for v in videos if v["status"] == "linked")
|
|
missing = sum(1 for v in videos if v["status"] == "missing")
|
|
|
|
return jsonify({
|
|
"total_videos": total,
|
|
"verified_links": linked,
|
|
"missing_count": missing,
|
|
"videos": videos
|
|
})
|
|
|
|
@app.route("/api/logs")
|
|
@requires_auth
|
|
def api_logs():
|
|
start = request.args.get('start', 0, type=int)
|
|
with log_lock:
|
|
return jsonify({
|
|
"logs": log_buffer[start:],
|
|
"next_index": len(log_buffer)
|
|
})
|
|
|
|
@app.route("/api/scan", methods=["POST"])
|
|
@requires_auth
|
|
def api_scan():
|
|
# Run in background to avoid blocking
|
|
threading.Thread(target=process_videos).start()
|
|
return jsonify({"status": "started"})
|
|
|
|
@app.route("/api/cleanup", methods=["POST"])
|
|
@requires_auth
|
|
def api_cleanup():
|
|
threading.Thread(target=cleanup_old_folders).start()
|
|
return jsonify({"status": "started"})
|
|
|
|
@app.route("/api/check-orphans", methods=["POST"])
|
|
@requires_auth
|
|
def api_check_orphans():
|
|
orphaned = check_orphaned_links()
|
|
return jsonify({"status": "complete", "orphaned": orphaned, "count": len(orphaned)})
|
|
|
|
@app.route("/transcode")
|
|
@requires_auth
|
|
def transcode_page():
|
|
return render_template('transcoding.html')
|
|
|
|
@app.route("/api/transcode/videos")
|
|
@requires_auth
|
|
def api_transcode_videos():
|
|
"""Get all videos that need transcoding."""
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 100, type=int)
|
|
offset = (page - 1) * per_page
|
|
|
|
with get_db() as conn:
|
|
# Get total count
|
|
total = conn.execute("SELECT COUNT(*) as count FROM videos WHERE status = 'missing'").fetchone()['count']
|
|
|
|
videos = []
|
|
for row in conn.execute(
|
|
"SELECT * FROM videos WHERE status = 'missing' LIMIT ? OFFSET ?",
|
|
(per_page, offset)
|
|
):
|
|
videos.append({
|
|
"video_id": row["video_id"],
|
|
"title": row["title"],
|
|
"channel": row["channel"],
|
|
"published": row["published"],
|
|
"symlink": row["symlink"]
|
|
})
|
|
|
|
return jsonify({
|
|
"videos": videos,
|
|
"total": total,
|
|
"page": page,
|
|
"per_page": per_page,
|
|
"pages": (total + per_page - 1) // per_page
|
|
})
|
|
|
|
@app.route("/api/transcode/start", methods=["POST"])
|
|
@requires_auth
|
|
def api_transcode_start():
|
|
"""Start transcoding a video."""
|
|
data = request.get_json()
|
|
filepath = data.get('filepath')
|
|
|
|
if not filepath:
|
|
return jsonify({"error": "No filepath provided"}), 400
|
|
|
|
encoder = detect_encoder()
|
|
tlog(f"🖥️ Selected encoder: {encoder}")
|
|
|
|
# Run in background
|
|
def run_transcode():
|
|
transcode_video(filepath, encoder)
|
|
|
|
threading.Thread(target=run_transcode).start()
|
|
return jsonify({"message": "Transcode started", "encoder": encoder})
|
|
|
|
@app.route("/api/transcode/logs")
|
|
@requires_auth
|
|
def api_transcode_logs():
|
|
"""Get transcode logs."""
|
|
start = request.args.get('start', 0, type=int)
|
|
with transcode_log_lock:
|
|
return jsonify({
|
|
"logs": transcode_log_buffer[start:],
|
|
"next_index": len(transcode_log_buffer)
|
|
})
|
|
|
|
# Global Scan State
|
|
SCAN_CACHE = {
|
|
"status": "idle", # idle, scanning, done
|
|
"results": None,
|
|
"last_run": None
|
|
}
|
|
SCAN_THREAD = None
|
|
|
|
@app.route("/api/recovery/scan", methods=["POST"])
|
|
@requires_auth
|
|
def api_recovery_scan():
|
|
global SCAN_THREAD
|
|
|
|
if SCAN_CACHE["status"] == "scanning":
|
|
return jsonify({"status": "running", "message": "Scan already in progress"}), 202
|
|
|
|
def run_scan_async():
|
|
global SCAN_CACHE
|
|
# SCAN_CACHE["status"] = "scanning" # Already set in main thread
|
|
SCAN_CACHE["results"] = None
|
|
try:
|
|
results = scan_for_unindexed_videos()
|
|
SCAN_CACHE["results"] = results
|
|
SCAN_CACHE["status"] = "done"
|
|
SCAN_CACHE["last_run"] = datetime.now().isoformat()
|
|
except Exception as e:
|
|
SCAN_CACHE["status"] = "error"
|
|
SCAN_CACHE["results"] = str(e)
|
|
log(f"❌ Async scan failed: {e}")
|
|
|
|
# Set status IMMEDIATELY to avoid race condition where poll sees "idle"
|
|
SCAN_CACHE["status"] = "scanning"
|
|
SCAN_THREAD = threading.Thread(target=run_scan_async)
|
|
SCAN_THREAD.start()
|
|
|
|
return jsonify({"status": "started", "message": "Background scan started"}), 202
|
|
|
|
@app.route("/api/recovery/poll", methods=["GET"])
|
|
@requires_auth
|
|
def api_recovery_poll():
|
|
return jsonify(SCAN_CACHE)
|
|
|
|
@app.route("/api/recovery/start", methods=["POST"])
|
|
@requires_auth
|
|
def api_recovery_start():
|
|
data = request.get_json()
|
|
filepath = data.get('filepath')
|
|
|
|
if not filepath:
|
|
return jsonify({"error": "No filepath provided"}), 400
|
|
|
|
# Run synchronously to give user immediate feedback per file
|
|
success, msg = recover_video_metadata(filepath)
|
|
log(f"Recovery Result for {filepath}: {msg}")
|
|
|
|
return jsonify({
|
|
"message": msg,
|
|
"success": success,
|
|
"status": "completed" if success else "failed"
|
|
})
|
|
|
|
@app.route("/api/recovery/delete", methods=["POST"])
|
|
@requires_auth
|
|
def api_recovery_delete():
|
|
data = request.get_json()
|
|
filepath = data.get('filepath')
|
|
|
|
if not filepath:
|
|
return jsonify({"error": "No filepath provided"}), 400
|
|
|
|
p = Path(filepath)
|
|
if not p.exists() or not p.is_file():
|
|
return jsonify({"error": "File not found"}), 404
|
|
|
|
# Safety Check: Never delete anything from SOURCE_DIR UNLESS it is redundancy check or lost media decision
|
|
# (Actually user might want to delete lost media)
|
|
# Let's refine logical check:
|
|
# If it is in Lost Media table, allow delete.
|
|
# If it is Redundant (Target check), allow delete.
|
|
|
|
vid_id = extract_id_from_filename(p.name)
|
|
|
|
# Check if this ID is in lost_media
|
|
is_lost = False
|
|
if vid_id:
|
|
with get_db() as conn:
|
|
row = conn.execute("SELECT 1 FROM lost_media WHERE video_id = ?", (vid_id,)).fetchone()
|
|
if row: is_lost = True
|
|
|
|
# If it's source dir and NOT lost media, we might want to be careful.
|
|
# But user clicked "Delete" on "Redundant" tab potentially?
|
|
# Actually the "Redundant" tab only targets files in TARGET_DIR usually?
|
|
# Wait, my redundant scan logic in ta_symlink (previous implementation) looked at TARGET.
|
|
# But if Unindexed files are in SOURCE, and user wants to delete them?
|
|
# Let's allow it but log it.
|
|
|
|
try:
|
|
p.unlink()
|
|
|
|
# Cleanup Lost Media Table
|
|
if vid_id:
|
|
with get_db() as conn:
|
|
conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,))
|
|
conn.commit()
|
|
|
|
log(f"🗑️ Deleted file: {filepath}")
|
|
return jsonify({"success": True, "message": "File deleted"})
|
|
except Exception as e:
|
|
log(f"❌ Delete failed: {e}")
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
@app.route('/api/recovery/force', methods=['POST'])
|
|
@requires_auth
|
|
def api_recovery_force():
|
|
data = request.json
|
|
filepath = data.get('filepath')
|
|
if not filepath:
|
|
return jsonify({"error": "No filepath provided"}), 400
|
|
|
|
log(f"💪 Force Importing (Lost Media): {Path(filepath).name}")
|
|
|
|
src_path = Path(filepath).resolve()
|
|
if not src_path.exists():
|
|
return jsonify({"error": "File not found"}), 404
|
|
|
|
vid_id = extract_id_from_filename(src_path.name)
|
|
if not vid_id:
|
|
return jsonify({"error": "Could not extract ID"}), 400
|
|
|
|
# 1. Generate Offline Metadata
|
|
IMPORT_DIR.mkdir(parents=True, exist_ok=True)
|
|
json_path = IMPORT_DIR / f"{src_path.stem}.info.json"
|
|
|
|
# minimal metadata
|
|
offline_meta = {
|
|
"id": vid_id,
|
|
"title": f"Offline Import - {src_path.stem}",
|
|
"uploader": "Unknown (Lost Media)",
|
|
"upload_date": datetime.now().strftime("%Y%m%d"),
|
|
"description": "Imported via TA Organizerr Force Import (Lost Media)",
|
|
"webpage_url": f"https://www.youtube.com/watch?v={vid_id}",
|
|
"view_count": 0,
|
|
"like_count": 0,
|
|
"duration": 0
|
|
}
|
|
|
|
import json
|
|
try:
|
|
with open(json_path, 'w', encoding='utf-8') as f:
|
|
json.dump(offline_meta, f, indent=4)
|
|
log(" 📝 Generated offline metadata.")
|
|
|
|
# 2. Link/Copy Video
|
|
dest_video = IMPORT_DIR / src_path.name
|
|
if dest_video.exists(): dest_video.unlink()
|
|
try:
|
|
os.link(src_path, dest_video)
|
|
log(" 🔗 Hardlinked video.")
|
|
except OSError:
|
|
shutil.copy2(src_path, dest_video)
|
|
log(" ©️ Copied video.")
|
|
|
|
# 3. Clean up lost_media table
|
|
with get_db() as conn:
|
|
conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,))
|
|
conn.commit()
|
|
|
|
return jsonify({"success": True, "message": "Force import successful"})
|
|
|
|
except Exception as e:
|
|
log(f" ❌ Force import failed: {e}")
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
if __name__ == "__main__":
|
|
# Start scheduler in background thread
|
|
thread = threading.Thread(target=scheduler, daemon=True)
|
|
thread.start()
|
|
|
|
app.run(host="0.0.0.0", port=5000)
|