ta-organizerr/ta_symlink.py

from pathlib import Path
import os
import requests
import re
import sys
import threading
import time
import ipaddress
from flask import Flask, jsonify, render_template, request, abort

# Load config from environment variables
API_URL = os.getenv("API_URL", "http://localhost:8457/api")
VIDEO_URL = os.getenv("VIDEO_URL", "http://localhost:8457/video/")
API_TOKEN = os.getenv("API_TOKEN", "")
SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", 60)) # Default 60 minutes
ALLOWED_IPS = [ip.strip() for ip in os.getenv("ALLOWED_IPS", "127.0.0.1").split(",")]
SOURCE_DIR = Path("/app/source")
TARGET_DIR = Path("/app/target")
HEADERS = {"Authorization": f"Token {API_TOKEN}"}

app = Flask(__name__)

# Database setup
import sqlite3
from contextlib import contextmanager

DB_PATH = Path("/app/data/videos.db")
DB_PATH.parent.mkdir(parents=True, exist_ok=True)

@contextmanager
def get_db():
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    try:
        yield conn
    finally:
        conn.close()

def init_db():
    with get_db() as conn:
        conn.execute("""
            CREATE TABLE IF NOT EXISTS videos (
                video_id TEXT PRIMARY KEY,
                title TEXT,
                channel TEXT,
                published TEXT,
                symlink TEXT,
                status TEXT,
                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        conn.commit()

init_db()

# Global State
processed_videos = []
log_buffer = []
log_lock = threading.Lock()
transcode_log_buffer = []
transcode_log_lock = threading.Lock()

# Utility functions
def log(msg):
    """Logs a message to stdout and the in-memory buffer."""
    print(msg, flush=True)
    with log_lock:
        log_buffer.append(msg)
        if len(log_buffer) > 1000:
            log_buffer.pop(0)

def tlog(msg):
    """Logs a message to the transcode log buffer."""
    print(f"[TRANSCODE] {msg}", flush=True)
    with transcode_log_lock:
        transcode_log_buffer.append(msg)
        if len(transcode_log_buffer) > 500:
            transcode_log_buffer.pop(0)

def detect_encoder():
    """Detect best available hardware encoder."""
    import subprocess
    try:
        result = subprocess.run(['ffmpeg', '-hide_banner', '-encoders'],
                              capture_output=True, text=True)
        encoders = result.stdout

        if 'h264_nvenc' in encoders:
            return 'h264_nvenc'
        elif 'h264_vaapi' in encoders:
            return 'h264_vaapi'
        elif 'h264_videotoolbox' in encoders:
            return 'h264_videotoolbox'
        else:
            return 'libx264'
    except:
        return 'libx264'

def probe_codecs(filepath):
    """Probe video and audio codecs using ffprobe."""
    import subprocess
    try:
        # Get video codec
        v_result = subprocess.run([
            'ffprobe', '-v', 'error', '-select_streams', 'v:0',
            '-show_entries', 'stream=codec_name', '-of', 'csv=p=0', filepath
        ], capture_output=True, text=True)
        video_codec = v_result.stdout.strip()

        # Get audio codec
        a_result = subprocess.run([
            'ffprobe', '-v', 'error', '-select_streams', 'a:0',
            '-show_entries', 'stream=codec_name', '-of', 'csv=p=0', filepath
        ], capture_output=True, text=True)
        audio_codec = a_result.stdout.strip()

        return video_codec, audio_codec
    except Exception as e:
        tlog(f"Error probing {filepath}: {e}")
        return None, None

def transcode_video(filepath, encoder='libx264'):
    """Transcode a video file to H.264/AAC."""
    import subprocess

    original_path = Path(filepath)

    # Try to resolve symlink first (don't check if it exists, broken symlinks still exist as links)
    if original_path.is_symlink():
        try:
            actual_file = Path(os.readlink(original_path)).resolve()
            tlog(f"Following symlink: {filepath} -> {actual_file}")

            # Translate host path to container path
            # Host: /mnt/user/tubearchives/bp/... → Container: /app/source/...
            actual_file_str = str(actual_file)
            if actual_file_str.startswith("/mnt/user/tubearchives/bp"):
                container_path = actual_file_str.replace("/mnt/user/tubearchives/bp", "/app/source", 1)
                tlog(f"Translated path: {actual_file} -> {container_path}")
                filepath = container_path
            else:
                filepath = str(actual_file)
        except Exception as e:
            tlog(f"Error resolving symlink: {e}")
            return False
    elif not original_path.exists():
        tlog(f"File not found: {filepath}")
        return False

    # Now check if the actual file exists
    if not Path(filepath).exists():
        tlog(f"Source file not found: {filepath}")
        return False

    video_codec, audio_codec = probe_codecs(filepath)

    if video_codec == 'h264' and audio_codec == 'aac':
        tlog(f"Already H.264/AAC: {filepath}")
        return True

    temp_file = f"{filepath}.temp.mp4"

    try:
        # Determine transcode strategy
        if video_codec == 'h264':
            tlog(f"Audio-only transcode: {filepath}")
            cmd = [
                'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
                '-c:v', 'copy',
                '-c:a', 'aac', '-b:a', '192k',
                '-movflags', '+faststart',
                '-y', temp_file
            ]
        else:
            tlog(f"Full transcode using {encoder}: {filepath}")
            if encoder == 'h264_nvenc':
                cmd = [
                    'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
                    '-c:v', 'h264_nvenc', '-preset', 'fast', '-cq', '23',
                    '-c:a', 'aac', '-b:a', '192k',
                    '-movflags', '+faststart',
                    '-y', temp_file
                ]
            elif encoder == 'h264_vaapi':
                cmd = [
                    'ffmpeg', '-v', 'error', '-stats',
                    '-hwaccel', 'vaapi', '-hwaccel_output_format', 'vaapi',
                    '-i', filepath,
                    '-vf', 'format=nv12,hwupload',
                    '-c:v', 'h264_vaapi', '-b:v', '5M',
                    '-c:a', 'aac', '-b:a', '192k',
                    '-movflags', '+faststart',
                    '-y', temp_file
                ]
            else:  # libx264
                cmd = [
                    'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
                    '-c:v', 'libx264', '-crf', '23', '-preset', 'medium',
                    '-c:a', 'aac', '-b:a', '192k',
                    '-movflags', '+faststart',
                    '-y', temp_file
                ]

        result = subprocess.run(cmd, capture_output=True, text=True)

        if result.returncode == 0:
            # Replace original
            Path(filepath).unlink()
            Path(temp_file).rename(filepath)
            tlog(f"✅ Success: {filepath}")
            return True
        else:
            # Check if it's a GPU error and retry with CPU
            if encoder in ['h264_nvenc', 'h264_vaapi', 'h264_videotoolbox'] and 'libcuda' in result.stderr or 'Cannot load' in result.stderr:
                tlog(f"⚠️ GPU encoding failed, retrying with CPU (libx264)...")

                # Retry with libx264
                if video_codec == 'h264':
                    cpu_cmd = [
                        'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
                        '-c:v', 'copy',
                        '-c:a', 'aac', '-b:a', '192k',
                        '-movflags', '+faststart',
                        '-y', temp_file
                    ]
                else:
                    cpu_cmd = [
                        'ffmpeg', '-v', 'error', '-stats', '-i', filepath,
                        '-c:v', 'libx264', '-crf', '23', '-preset', 'medium',
                        '-c:a', 'aac', '-b:a', '192k',
                        '-movflags', '+faststart',
                        '-y', temp_file
                    ]

                cpu_result = subprocess.run(cpu_cmd, capture_output=True, text=True)

                if cpu_result.returncode == 0:
                    Path(filepath).unlink()
                    Path(temp_file).rename(filepath)
                    tlog(f"✅ Success (CPU): {filepath}")
                    return True
                else:
                    tlog(f"❌ Failed (CPU): {filepath}")
                    tlog(f"Error: {cpu_result.stderr}")
                    if Path(temp_file).exists():
                        Path(temp_file).unlink()
                    return False
            else:
                tlog(f"❌ Failed: {filepath}")
                tlog(f"Error: {result.stderr}")
                if Path(temp_file).exists():
                    Path(temp_file).unlink()
                return False

    except Exception as e:
        tlog(f"❌ Exception: {e}")
        if Path(temp_file).exists():
            Path(temp_file).unlink()
        return False

def sanitize(text):
    text = text.encode("ascii", "ignore").decode()
    text = re.sub(r'[\/:*?"<>|]', "_", text)
    return text.strip()

def fetch_all_metadata():
    log("📥 Fetching all video metadata...")
    video_map = {}
    page = 1
    while True:
        url = f"{API_URL}/video/?page={page}"
        try:
            response = requests.get(url, headers=HEADERS)
            response.raise_for_status()
            data = response.json()

            if 'data' not in data or not data['data']:
                break

            for video in data['data']:
                # Try to find the ID. It might be 'youtube_id' or '_id'
                vid_id = video.get("youtube_id") or video.get("_id")
                if not vid_id:
                    continue

                title = video.get("title", "unknown_title")
                channel_info = video.get("channel", {})
                channel_name = channel_info.get("channel_name") or channel_info.get("channel_title") or "Unknown Channel"
                # Fix date format: take only first 10 chars (YYYY-MM-DD)
                raw_date = video.get("published", "unknown_date")
                published = raw_date[:10] if len(raw_date) >= 10 else raw_date.replace("/", "-")

                video_map[vid_id] = {
                    "title": title,
                    "channel_name": channel_name,
                    "published": published
                }

            # Check pagination to see if we are done
            if 'paginate' in data:
                current = data['paginate'].get('current_page')
                last = data['paginate'].get('last_page')
                if current is not None and last is not None and current >= last:
                    break
            else:
                pass

            log(f"   - Page {page} fetched. Total videos so far: {len(video_map)}")
            page += 1

        except Exception as e:
            log(f"❌ Error fetching page {page}: {e}")
            break

    log(f"✅ Metadata fetch complete. Found {len(video_map)} videos.")
    return video_map

def cleanup_old_folders():
    """
    Scans TARGET_DIR for folders containing '+00:00'.
    Safely deletes them ONLY if they contain no real files (only symlinks or empty).
    """
    log("🧹 Starting cleanup. Scanning ONLY for folders containing '+00:00'...")
    cleaned_count = 0
    skipped_count = 0

    if not TARGET_DIR.exists():
        return

    # Walk top-down
    for channel_dir in TARGET_DIR.iterdir():
        if not channel_dir.is_dir():
            continue

        for video_dir in channel_dir.iterdir():
            if not video_dir.is_dir():
                continue

            if "+00:00" in video_dir.name:
                # Check safety
                safe_to_delete = True
                reason = ""

                for item in video_dir.iterdir():
                    if not item.is_symlink():
                        # Found a real file! Unsafe!
                        safe_to_delete = False
                        reason = "Contains real files"
                        break

                if safe_to_delete:
                    try:
                        # Remove all symlinks first
                        for item in video_dir.iterdir():
                            item.unlink()
                        # Remove directory
                        video_dir.rmdir()
                        log(f"   [DELETED] {video_dir.name}")
                        cleaned_count += 1
                    except Exception as e:
                        log(f"   ❌ Failed to delete {video_dir.name}: {e}")
                else:
                    log(f"   ⚠️ SKIPPING {video_dir.name} - {reason}")
                    skipped_count += 1

    log(f"🧹 Cleanup complete. Removed: {cleaned_count}, Skipped: {skipped_count}")

def check_orphaned_links():
    """
    Scans TARGET_DIR for video.mp4 symlinks and checks if they point to valid files.
    For orphaned links, parses the folder structure to extract metadata.
    Stores results in database.
    """
    log("🔍 Checking for orphaned symlinks...")
    orphaned = []
    total_checked = 0

    if not TARGET_DIR.exists():
        log("⚠️ Target directory does not exist")
        return orphaned

    with get_db() as conn:
        for channel_dir in TARGET_DIR.iterdir():
            if not channel_dir.is_dir():
                continue

            channel_name = channel_dir.name

            for video_dir in channel_dir.iterdir():
                if not video_dir.is_dir():
                    continue

                folder_name = video_dir.name

                # Look for video files
                for video_file in video_dir.glob("video.*"):
                    total_checked += 1

                    if video_file.is_symlink():
                        try:
                            # Check if the symlink target exists
                            target = Path(os.readlink(video_file))

                            if not target.exists():
                                # Parse folder name: "YYYY-MM-DD - Title"
                                parts = folder_name.split(" - ", 1)
                                published = parts[0] if len(parts) > 0 else "unknown"
                                title = parts[1] if len(parts) > 1 else folder_name

                                # Try to extract video ID from symlink target path
                                video_id = target.stem if target.stem else "unknown"

                                orphaned.append({
                                    "video_id": video_id,
                                    "path": str(video_file),
                                    "target": str(target),
                                    "folder": folder_name,
                                    "channel": channel_name,
                                    "title": title,
                                    "published": published
                                })

                                # Store in DB
                                conn.execute("""
                                    INSERT OR REPLACE INTO videos
                                    (video_id, title, channel, published, symlink, status)
                                    VALUES (?, ?, ?, ?, ?, 'missing')
                                """, (video_id, title, channel_name, published, str(video_file)))

                                log(f"   ⚠️ BROKEN: {folder_name} -> {target}")
                        except Exception as e:
                            log(f"   ❌ ERROR: {folder_name}: {e}")

        conn.commit()

    log(f"✅ Check complete. Scanned {total_checked} files, found {len(orphaned)} orphaned symlinks.")
    return orphaned

# Main logic

def process_videos():
    global processed_videos
    processed_videos = []

    # 1. Fetch all metadata first
    video_map = fetch_all_metadata()

    # 2. Run cleanup
    cleanup_old_folders()

    # Statistics
    new_links = 0
    verified_links = 0

    with get_db() as conn:
        # Clear existing "linked" videos (we'll repopulate)
        conn.execute("DELETE FROM videos WHERE status = 'linked'")

        try:
            for channel_path in SOURCE_DIR.iterdir():
                if not channel_path.is_dir():
                    continue
                for video_file in channel_path.glob("*.*"):
                    video_id = video_file.stem

                    # Lookup in local map
                    meta = video_map.get(video_id)
                    if not meta:
                        continue
                    sanitized_channel_name = sanitize(meta["channel_name"])
                    channel_dir = TARGET_DIR / sanitized_channel_name
                    channel_dir.mkdir(parents=True, exist_ok=True)
                    sanitized_title = sanitize(meta["title"])
                    folder_name = f"{meta['published']} - {sanitized_title}"
                    video_dir = channel_dir / folder_name
                    video_dir.mkdir(parents=True, exist_ok=True)
                    actual_file = next(channel_path.glob(f"{video_id}.*"), None)
                    if not actual_file:
                        continue
                    host_path_root = Path("/mnt/user/tubearchives/bp")
                    host_source_path = host_path_root / actual_file.relative_to(SOURCE_DIR)
                    dest_file = video_dir / f"video{actual_file.suffix}"
                    try:
                        if dest_file.exists():
                            if dest_file.is_symlink():
                                current_target = Path(os.readlink(dest_file))
                                if current_target.resolve() != host_source_path.resolve():
                                    dest_file.unlink()
                                    os.symlink(host_source_path, dest_file)
                                    log(f"   [FIX] Relinked: {folder_name}")
                                    new_links += 1
                                else:
                                    verified_links += 1
                        else:
                            os.symlink(host_source_path, dest_file)
                            log(f"   [NEW] Linked: {folder_name}")
                            new_links += 1
                    except Exception:
                        pass

                    # Store in database
                    conn.execute("""
                        INSERT OR REPLACE INTO videos
                        (video_id, title, channel, published, symlink, status)
                        VALUES (?, ?, ?, ?, ?, 'linked')
                    """, (video_id, meta["title"], meta["channel_name"],
                          meta["published"], str(dest_file)))

                    processed_videos.append({
                        "video_id": video_id,
                        "title": meta["title"],
                        "channel": meta["channel_name"],
                        "published": meta["published"],
                        "symlink": str(dest_file)
                    })
        except Exception as e:
            conn.rollback()
            return str(e)

        conn.commit()

    log(f"✅ Scan complete. Processed {len(processed_videos)} videos.")
    log(f"   - New/Fixed Links: {new_links}")
    log(f"   - Verified Links:  {verified_links}")
    return None

def scheduler():
    log(f"🕒 Background scheduler started. Scanning every {SCAN_INTERVAL} minutes.")
    while True:
        log("🔄 Running scheduled scan...")
        process_videos()
        time.sleep(SCAN_INTERVAL * 60)

# Flask routes

@app.before_request
def limit_remote_addr():
    # Skip check for local requests if needed, but generally good to enforce
    client_ip = request.remote_addr
    try:
        ip_obj = ipaddress.ip_address(client_ip)
        allowed = False
        for allowed_ip in ALLOWED_IPS:
            if not allowed_ip: continue
            if "/" in allowed_ip:
                if ip_obj in ipaddress.ip_network(allowed_ip, strict=False):
                    allowed = True
                    break
            else:
                if ip_obj == ipaddress.ip_address(allowed_ip):
                    allowed = True
                    break
        if not allowed:
            log(f"⛔ Access denied for IP: {client_ip}")
            abort(403)
    except ValueError as e:
        log(f"⛔ Invalid IP format: {client_ip}, Error: {e}")
        abort(403)

@app.route("/")
def index():
    return render_template('dashboard.html')

@app.route("/api/status")
def api_status():
    with get_db() as conn:
        # Get all videos from DB
        videos = []
        for row in conn.execute("SELECT * FROM videos ORDER BY channel, published DESC"):
            videos.append({
                "video_id": row["video_id"],
                "title": row["title"],
                "channel": row["channel"],
                "published": row["published"],
                "symlink": row["symlink"],
                "status": row["status"]
            })

        # Calculate stats
        total = len(videos)
        linked = sum(1 for v in videos if v["status"] == "linked")
        missing = sum(1 for v in videos if v["status"] == "missing")

        return jsonify({
            "total_videos": total,
            "verified_links": linked,
            "missing_count": missing,
            "videos": videos
        })

@app.route("/api/logs")
def api_logs():
    start = request.args.get('start', 0, type=int)
    with log_lock:
        return jsonify({
            "logs": log_buffer[start:],
            "next_index": len(log_buffer)
        })

@app.route("/api/scan", methods=["POST"])
def api_scan():
    # Run in background to avoid blocking
    threading.Thread(target=process_videos).start()
    return jsonify({"status": "started"})

@app.route("/api/cleanup", methods=["POST"])
def api_cleanup():
    threading.Thread(target=cleanup_old_folders).start()
    return jsonify({"status": "started"})

@app.route("/api/check-orphans", methods=["POST"])
def api_check_orphans():
    orphaned = check_orphaned_links()
    return jsonify({"status": "complete", "orphaned": orphaned, "count": len(orphaned)})

@app.route("/transcode")
def transcode_page():
    return render_template('transcoding.html')

@app.route("/api/transcode/videos")
def api_transcode_videos():
    """Get all videos that need transcoding."""
    page = request.args.get('page', 1, type=int)
    per_page = request.args.get('per_page', 100, type=int)
    offset = (page - 1) * per_page

    with get_db() as conn:
        # Get total count
        total = conn.execute("SELECT COUNT(*) as count FROM videos WHERE status = 'missing'").fetchone()['count']

        videos = []
        for row in conn.execute(
            "SELECT * FROM videos WHERE status = 'missing' LIMIT ? OFFSET ?",
            (per_page, offset)
        ):
            videos.append({
                "video_id": row["video_id"],
                "title": row["title"],
                "channel": row["channel"],
                "published": row["published"],
                "symlink": row["symlink"]
            })

        return jsonify({
            "videos": videos,
            "total": total,
            "page": page,
            "per_page": per_page,
            "pages": (total + per_page - 1) // per_page
        })

@app.route("/api/transcode/start", methods=["POST"])
def api_transcode_start():
    """Start transcoding a video."""
    data = request.get_json()
    filepath = data.get('filepath')

    if not filepath:
        return jsonify({"error": "No filepath provided"}), 400

    encoder = detect_encoder()
    tlog(f"🖥️  Selected encoder: {encoder}")

    # Run in background
    def run_transcode():
        transcode_video(filepath, encoder)

    threading.Thread(target=run_transcode).start()
    return jsonify({"message": "Transcode started", "encoder": encoder})

@app.route("/api/transcode/logs")
def api_transcode_logs():
    """Get transcode logs."""
    start = request.args.get('start', 0, type=int)
    with transcode_log_lock:
        return jsonify({
            "logs": transcode_log_buffer[start:],
            "next_index": len(transcode_log_buffer)
        })

if __name__ == "__main__":
    # Start scheduler in background thread
    thread = threading.Thread(target=scheduler, daemon=True)
    thread.start()

    app.run(host="0.0.0.0", port=5000)