diff --git a/docker-compose.yml b/docker-compose.yml index 2c64d0b..4594ddd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,10 @@ services: ta-organizer: - build: . + build: /mnt/user/appdata/dockerbuildings container_name: ta-organizer volumes: - - ./source:/app/source:ro - - ./target:/app/target + - /mnt/user/appdata/dockerbuildings/source:/app/source:ro + - /mnt/user/appdata/dockerbuildings/target:/app/target environment: - - API_TOKEN=${API_TOKEN} - env_file: .env + - SCAN_INTERVAL=${SCAN_INTERVAL:-60} + env_file: /mnt/user/appdata/dockerbuildings/.env diff --git a/ta-organizerr.tar.gz b/ta-organizerr.tar.gz new file mode 100644 index 0000000..59e03c1 Binary files /dev/null and b/ta-organizerr.tar.gz differ diff --git a/ta_symlink.py b/ta_symlink.py index 0497085..fd53f1a 100644 --- a/ta_symlink.py +++ b/ta_symlink.py @@ -4,12 +4,15 @@ import os import requests import re import sys +import threading +import time from flask import Flask, jsonify, render_template_string, request # Load config from environment variables API_URL = os.getenv("API_URL", "http://localhost:8457/api") VIDEO_URL = os.getenv("VIDEO_URL", "http://localhost:8457/video/") API_TOKEN = os.getenv("API_TOKEN", "") +SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", 60)) # Default 60 minutes SOURCE_DIR = Path("/app/source") TARGET_DIR = Path("/app/target") HEADERS = {"Authorization": f"Token {API_TOKEN}"} @@ -24,41 +27,137 @@ def sanitize(text): text = re.sub(r'[\/:*?"<>|]', "_", text) return text.strip() -def fetch_video_metadata(video_id): - url = f"{API_URL}/video/{video_id}/" - try: - response = requests.get(url, headers=HEADERS) - response.raise_for_status() - data = response.json() +def fetch_all_metadata(): + print("๐Ÿ“ฅ Fetching all video metadata...", flush=True) + video_map = {} + page = 1 + while True: + url = f"{API_URL}/video/?page={page}" + try: + response = requests.get(url, headers=HEADERS) + response.raise_for_status() + data = response.json() + + if 'data' not in data or not data['data']: + break + + for video in data['data']: + # Try to find the ID. It might be 'youtube_id' or '_id' + vid_id = video.get("youtube_id") or video.get("_id") + if not vid_id: + continue + + title = video.get("title", "unknown_title") + channel_info = video.get("channel", {}) + channel_name = channel_info.get("channel_name") or channel_info.get("channel_title") or "Unknown Channel" + # Fix date format: take only first 10 chars (YYYY-MM-DD) + raw_date = video.get("published", "unknown_date") + published = raw_date[:10] if len(raw_date) >= 10 else raw_date.replace("/", "-") + + video_map[vid_id] = { + "title": title, + "channel_name": channel_name, + "published": published + } + + # Check pagination to see if we are done + if 'paginate' in data: + current = data['paginate'].get('current_page') + last = data['paginate'].get('last_page') + if current is not None and last is not None and current >= last: + break + else: + # Fallback if no pagination info, just stop if empty data (handled above) or arbitrary limit? + # If we got data but no pagination, maybe it's a single page result? + # But we loop until no data. + pass - title = data.get("title", "unknown_title") - channel_info = data.get("channel", {}) - channel_id = channel_info.get("channel_id", "unknown_channel") - channel_name = channel_info.get("channel_name") or channel_info.get("channel_title") or "Unknown Channel" - published = data.get("published", "unknown_date").replace("/", "-") + print(f" - Page {page} fetched. Total videos so far: {len(video_map)}", flush=True) + page += 1 + + except Exception as e: + print(f"โŒ Error fetching page {page}: {e}", flush=True) + # If a page fails, maybe we should stop or retry? For now, let's stop to avoid infinite loops on auth error + break + + print(f"โœ… Metadata fetch complete. Found {len(video_map)} videos.", flush=True) + return video_map - return { - "title": title, - "channel_id": channel_id, - "channel_name": channel_name, - "published": published - } - except Exception as e: - print(f"โŒ Error fetching metadata for {video_id}: {e}", flush=True) - return None +def cleanup_old_folders(): + """ + Scans TARGET_DIR for folders containing '+00:00'. + Safely deletes them ONLY if they contain no real files (only symlinks or empty). + """ + print("๐Ÿงน Starting cleanup. Scanning ONLY for folders containing '+00:00'...", flush=True) + cleaned_count = 0 + skipped_count = 0 + + if not TARGET_DIR.exists(): + return + + # Walk top-down + for channel_dir in TARGET_DIR.iterdir(): + if not channel_dir.is_dir(): + continue + + for video_dir in channel_dir.iterdir(): + if not video_dir.is_dir(): + continue + + if "+00:00" in video_dir.name: + # Check safety + safe_to_delete = True + reason = "" + + for item in video_dir.iterdir(): + if not item.is_symlink(): + # Found a real file! Unsafe! + safe_to_delete = False + reason = "Contains real files" + break + + if safe_to_delete: + try: + # Remove all symlinks first + for item in video_dir.iterdir(): + item.unlink() + # Remove directory + video_dir.rmdir() + print(f" [DELETED] {video_dir.name}", flush=True) + cleaned_count += 1 + except Exception as e: + print(f" โŒ Failed to delete {video_dir.name}: {e}", flush=True) + else: + print(f" โš ๏ธ SKIPPING {video_dir.name} - {reason}", flush=True) + skipped_count += 1 + + print(f"๐Ÿงน Cleanup complete. Removed: {cleaned_count}, Skipped: {skipped_count}", flush=True) # Main logic def process_videos(): global processed_videos processed_videos = [] + + # 1. Fetch all metadata first + video_map = fetch_all_metadata() + + # 2. Run cleanup + cleanup_old_folders() + + # Statistics + new_links = 0 + verified_links = 0 + try: for channel_path in SOURCE_DIR.iterdir(): if not channel_path.is_dir(): continue for video_file in channel_path.glob("*.*"): video_id = video_file.stem - meta = fetch_video_metadata(video_id) + + # 2. Lookup in local map + meta = video_map.get(video_id) if not meta: continue sanitized_channel_name = sanitize(meta["channel_name"]) @@ -81,8 +180,14 @@ def process_videos(): if current_target.resolve() != host_source_path.resolve(): dest_file.unlink() os.symlink(host_source_path, dest_file) + print(f" [FIX] Relinked: {folder_name}", flush=True) + new_links += 1 + else: + verified_links += 1 else: os.symlink(host_source_path, dest_file) + print(f" [NEW] Linked: {folder_name}", flush=True) + new_links += 1 except Exception: pass processed_videos.append({ @@ -94,8 +199,18 @@ def process_videos(): }) except Exception as e: return str(e) + + print(f"โœ… Scan complete. Processed {len(processed_videos)} videos.", flush=True) + print(f" - New/Fixed Links: {new_links}", flush=True) + print(f" - Verified Links: {verified_links}", flush=True) return None +def scheduler(): + print(f"๐Ÿ•’ Background scheduler started. Scanning every {SCAN_INTERVAL} minutes.", flush=True) + while True: + print("๐Ÿ”„ Running scheduled scan...", flush=True) + process_videos() + time.sleep(SCAN_INTERVAL * 60) # Flask routes @app.route("/") @@ -146,4 +261,8 @@ def api_videos(): return jsonify(processed_videos) if __name__ == "__main__": + # Start scheduler in background thread + thread = threading.Thread(target=scheduler, daemon=True) + thread.start() + app.run(host="0.0.0.0", port=5000) diff --git a/test_api.py b/test_api.py new file mode 100644 index 0000000..7d51e6a --- /dev/null +++ b/test_api.py @@ -0,0 +1,62 @@ +import requests +import os +import json + +# Manually load .env +try: + with open('.env', 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + key, value = line.split('=', 1) + os.environ[key] = value +except FileNotFoundError: + print("Warning: .env file not found") + +API_URL = os.getenv("API_URL") +API_TOKEN = os.getenv("API_TOKEN") + +headers = {"Authorization": f"Token {API_TOKEN}"} + +print(f"Testing API at: {API_URL}") + +def test_endpoint(path): + url = f"{API_URL}{path}" + print(f"\n--- Testing {url} ---") + try: + response = requests.get(url, headers=headers, timeout=5) + print(f"Status Code: {response.status_code}") + try: + data = response.json() + print("Response JSON (truncated):") + print(json.dumps(data, indent=2)[:500] + "..." if len(str(data)) > 500 else json.dumps(data, indent=2)) + return data + except json.JSONDecodeError: + print("Response is not JSON") + print(response.text[:200]) + return None + except Exception as e: + print(f"Error: {e}") + return None + +# Test Root API +test_endpoint("") + +# Test Search Parameters +target_id = "K1Uw_YVgCBsww" +print(f"\n--- Testing Search Params for {target_id} ---") + +# Test Page Size +print(f"\n--- Testing Page Size ---") + +sizes = [12, 50, 100] + +for size in sizes: + url = f"/video/?page_size={size}" + print(f"Testing {url}...") + data = test_endpoint(url) + if data and isinstance(data, dict) and 'data' in data: + count = len(data['data']) + print(f"Requested {size}, got {count} items.") + if 'paginate' in data: + print(f"Pagination meta: {data['paginate']}")