feat: Introduce scheduled background scanning, old folder cleanup, and optimize metadata fetching with updated Docker configuration and API tests.
This commit is contained in:
parent
1ecb31ae12
commit
94f077944b
4 changed files with 207 additions and 26 deletions
|
|
@ -1,10 +1,10 @@
|
||||||
services:
|
services:
|
||||||
ta-organizer:
|
ta-organizer:
|
||||||
build: .
|
build: /mnt/user/appdata/dockerbuildings
|
||||||
container_name: ta-organizer
|
container_name: ta-organizer
|
||||||
volumes:
|
volumes:
|
||||||
- ./source:/app/source:ro
|
- /mnt/user/appdata/dockerbuildings/source:/app/source:ro
|
||||||
- ./target:/app/target
|
- /mnt/user/appdata/dockerbuildings/target:/app/target
|
||||||
environment:
|
environment:
|
||||||
- API_TOKEN=${API_TOKEN}
|
- SCAN_INTERVAL=${SCAN_INTERVAL:-60}
|
||||||
env_file: .env
|
env_file: /mnt/user/appdata/dockerbuildings/.env
|
||||||
|
|
|
||||||
BIN
ta-organizerr.tar.gz
Normal file
BIN
ta-organizerr.tar.gz
Normal file
Binary file not shown.
161
ta_symlink.py
161
ta_symlink.py
|
|
@ -4,12 +4,15 @@ import os
|
||||||
import requests
|
import requests
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
from flask import Flask, jsonify, render_template_string, request
|
from flask import Flask, jsonify, render_template_string, request
|
||||||
|
|
||||||
# Load config from environment variables
|
# Load config from environment variables
|
||||||
API_URL = os.getenv("API_URL", "http://localhost:8457/api")
|
API_URL = os.getenv("API_URL", "http://localhost:8457/api")
|
||||||
VIDEO_URL = os.getenv("VIDEO_URL", "http://localhost:8457/video/")
|
VIDEO_URL = os.getenv("VIDEO_URL", "http://localhost:8457/video/")
|
||||||
API_TOKEN = os.getenv("API_TOKEN", "")
|
API_TOKEN = os.getenv("API_TOKEN", "")
|
||||||
|
SCAN_INTERVAL = int(os.getenv("SCAN_INTERVAL", 60)) # Default 60 minutes
|
||||||
SOURCE_DIR = Path("/app/source")
|
SOURCE_DIR = Path("/app/source")
|
||||||
TARGET_DIR = Path("/app/target")
|
TARGET_DIR = Path("/app/target")
|
||||||
HEADERS = {"Authorization": f"Token {API_TOKEN}"}
|
HEADERS = {"Authorization": f"Token {API_TOKEN}"}
|
||||||
|
|
@ -24,41 +27,137 @@ def sanitize(text):
|
||||||
text = re.sub(r'[\/:*?"<>|]', "_", text)
|
text = re.sub(r'[\/:*?"<>|]', "_", text)
|
||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
def fetch_video_metadata(video_id):
|
def fetch_all_metadata():
|
||||||
url = f"{API_URL}/video/{video_id}/"
|
print("📥 Fetching all video metadata...", flush=True)
|
||||||
try:
|
video_map = {}
|
||||||
response = requests.get(url, headers=HEADERS)
|
page = 1
|
||||||
response.raise_for_status()
|
while True:
|
||||||
data = response.json()
|
url = f"{API_URL}/video/?page={page}"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=HEADERS)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
title = data.get("title", "unknown_title")
|
if 'data' not in data or not data['data']:
|
||||||
channel_info = data.get("channel", {})
|
break
|
||||||
channel_id = channel_info.get("channel_id", "unknown_channel")
|
|
||||||
channel_name = channel_info.get("channel_name") or channel_info.get("channel_title") or "Unknown Channel"
|
|
||||||
published = data.get("published", "unknown_date").replace("/", "-")
|
|
||||||
|
|
||||||
return {
|
for video in data['data']:
|
||||||
"title": title,
|
# Try to find the ID. It might be 'youtube_id' or '_id'
|
||||||
"channel_id": channel_id,
|
vid_id = video.get("youtube_id") or video.get("_id")
|
||||||
"channel_name": channel_name,
|
if not vid_id:
|
||||||
"published": published
|
continue
|
||||||
}
|
|
||||||
except Exception as e:
|
title = video.get("title", "unknown_title")
|
||||||
print(f"❌ Error fetching metadata for {video_id}: {e}", flush=True)
|
channel_info = video.get("channel", {})
|
||||||
return None
|
channel_name = channel_info.get("channel_name") or channel_info.get("channel_title") or "Unknown Channel"
|
||||||
|
# Fix date format: take only first 10 chars (YYYY-MM-DD)
|
||||||
|
raw_date = video.get("published", "unknown_date")
|
||||||
|
published = raw_date[:10] if len(raw_date) >= 10 else raw_date.replace("/", "-")
|
||||||
|
|
||||||
|
video_map[vid_id] = {
|
||||||
|
"title": title,
|
||||||
|
"channel_name": channel_name,
|
||||||
|
"published": published
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check pagination to see if we are done
|
||||||
|
if 'paginate' in data:
|
||||||
|
current = data['paginate'].get('current_page')
|
||||||
|
last = data['paginate'].get('last_page')
|
||||||
|
if current is not None and last is not None and current >= last:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Fallback if no pagination info, just stop if empty data (handled above) or arbitrary limit?
|
||||||
|
# If we got data but no pagination, maybe it's a single page result?
|
||||||
|
# But we loop until no data.
|
||||||
|
pass
|
||||||
|
|
||||||
|
print(f" - Page {page} fetched. Total videos so far: {len(video_map)}", flush=True)
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error fetching page {page}: {e}", flush=True)
|
||||||
|
# If a page fails, maybe we should stop or retry? For now, let's stop to avoid infinite loops on auth error
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"✅ Metadata fetch complete. Found {len(video_map)} videos.", flush=True)
|
||||||
|
return video_map
|
||||||
|
|
||||||
|
def cleanup_old_folders():
|
||||||
|
"""
|
||||||
|
Scans TARGET_DIR for folders containing '+00:00'.
|
||||||
|
Safely deletes them ONLY if they contain no real files (only symlinks or empty).
|
||||||
|
"""
|
||||||
|
print("🧹 Starting cleanup. Scanning ONLY for folders containing '+00:00'...", flush=True)
|
||||||
|
cleaned_count = 0
|
||||||
|
skipped_count = 0
|
||||||
|
|
||||||
|
if not TARGET_DIR.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
# Walk top-down
|
||||||
|
for channel_dir in TARGET_DIR.iterdir():
|
||||||
|
if not channel_dir.is_dir():
|
||||||
|
continue
|
||||||
|
|
||||||
|
for video_dir in channel_dir.iterdir():
|
||||||
|
if not video_dir.is_dir():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "+00:00" in video_dir.name:
|
||||||
|
# Check safety
|
||||||
|
safe_to_delete = True
|
||||||
|
reason = ""
|
||||||
|
|
||||||
|
for item in video_dir.iterdir():
|
||||||
|
if not item.is_symlink():
|
||||||
|
# Found a real file! Unsafe!
|
||||||
|
safe_to_delete = False
|
||||||
|
reason = "Contains real files"
|
||||||
|
break
|
||||||
|
|
||||||
|
if safe_to_delete:
|
||||||
|
try:
|
||||||
|
# Remove all symlinks first
|
||||||
|
for item in video_dir.iterdir():
|
||||||
|
item.unlink()
|
||||||
|
# Remove directory
|
||||||
|
video_dir.rmdir()
|
||||||
|
print(f" [DELETED] {video_dir.name}", flush=True)
|
||||||
|
cleaned_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ Failed to delete {video_dir.name}: {e}", flush=True)
|
||||||
|
else:
|
||||||
|
print(f" ⚠️ SKIPPING {video_dir.name} - {reason}", flush=True)
|
||||||
|
skipped_count += 1
|
||||||
|
|
||||||
|
print(f"🧹 Cleanup complete. Removed: {cleaned_count}, Skipped: {skipped_count}", flush=True)
|
||||||
|
|
||||||
# Main logic
|
# Main logic
|
||||||
|
|
||||||
def process_videos():
|
def process_videos():
|
||||||
global processed_videos
|
global processed_videos
|
||||||
processed_videos = []
|
processed_videos = []
|
||||||
|
|
||||||
|
# 1. Fetch all metadata first
|
||||||
|
video_map = fetch_all_metadata()
|
||||||
|
|
||||||
|
# 2. Run cleanup
|
||||||
|
cleanup_old_folders()
|
||||||
|
|
||||||
|
# Statistics
|
||||||
|
new_links = 0
|
||||||
|
verified_links = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for channel_path in SOURCE_DIR.iterdir():
|
for channel_path in SOURCE_DIR.iterdir():
|
||||||
if not channel_path.is_dir():
|
if not channel_path.is_dir():
|
||||||
continue
|
continue
|
||||||
for video_file in channel_path.glob("*.*"):
|
for video_file in channel_path.glob("*.*"):
|
||||||
video_id = video_file.stem
|
video_id = video_file.stem
|
||||||
meta = fetch_video_metadata(video_id)
|
|
||||||
|
# 2. Lookup in local map
|
||||||
|
meta = video_map.get(video_id)
|
||||||
if not meta:
|
if not meta:
|
||||||
continue
|
continue
|
||||||
sanitized_channel_name = sanitize(meta["channel_name"])
|
sanitized_channel_name = sanitize(meta["channel_name"])
|
||||||
|
|
@ -81,8 +180,14 @@ def process_videos():
|
||||||
if current_target.resolve() != host_source_path.resolve():
|
if current_target.resolve() != host_source_path.resolve():
|
||||||
dest_file.unlink()
|
dest_file.unlink()
|
||||||
os.symlink(host_source_path, dest_file)
|
os.symlink(host_source_path, dest_file)
|
||||||
|
print(f" [FIX] Relinked: {folder_name}", flush=True)
|
||||||
|
new_links += 1
|
||||||
|
else:
|
||||||
|
verified_links += 1
|
||||||
else:
|
else:
|
||||||
os.symlink(host_source_path, dest_file)
|
os.symlink(host_source_path, dest_file)
|
||||||
|
print(f" [NEW] Linked: {folder_name}", flush=True)
|
||||||
|
new_links += 1
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
processed_videos.append({
|
processed_videos.append({
|
||||||
|
|
@ -94,8 +199,18 @@ def process_videos():
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return str(e)
|
return str(e)
|
||||||
|
|
||||||
|
print(f"✅ Scan complete. Processed {len(processed_videos)} videos.", flush=True)
|
||||||
|
print(f" - New/Fixed Links: {new_links}", flush=True)
|
||||||
|
print(f" - Verified Links: {verified_links}", flush=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def scheduler():
|
||||||
|
print(f"🕒 Background scheduler started. Scanning every {SCAN_INTERVAL} minutes.", flush=True)
|
||||||
|
while True:
|
||||||
|
print("🔄 Running scheduled scan...", flush=True)
|
||||||
|
process_videos()
|
||||||
|
time.sleep(SCAN_INTERVAL * 60)
|
||||||
|
|
||||||
# Flask routes
|
# Flask routes
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
|
|
@ -146,4 +261,8 @@ def api_videos():
|
||||||
return jsonify(processed_videos)
|
return jsonify(processed_videos)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
# Start scheduler in background thread
|
||||||
|
thread = threading.Thread(target=scheduler, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
app.run(host="0.0.0.0", port=5000)
|
app.run(host="0.0.0.0", port=5000)
|
||||||
|
|
|
||||||
62
test_api.py
Normal file
62
test_api.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
# Manually load .env
|
||||||
|
try:
|
||||||
|
with open('.env', 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line and not line.startswith('#'):
|
||||||
|
key, value = line.split('=', 1)
|
||||||
|
os.environ[key] = value
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("Warning: .env file not found")
|
||||||
|
|
||||||
|
API_URL = os.getenv("API_URL")
|
||||||
|
API_TOKEN = os.getenv("API_TOKEN")
|
||||||
|
|
||||||
|
headers = {"Authorization": f"Token {API_TOKEN}"}
|
||||||
|
|
||||||
|
print(f"Testing API at: {API_URL}")
|
||||||
|
|
||||||
|
def test_endpoint(path):
|
||||||
|
url = f"{API_URL}{path}"
|
||||||
|
print(f"\n--- Testing {url} ---")
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=headers, timeout=5)
|
||||||
|
print(f"Status Code: {response.status_code}")
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
print("Response JSON (truncated):")
|
||||||
|
print(json.dumps(data, indent=2)[:500] + "..." if len(str(data)) > 500 else json.dumps(data, indent=2))
|
||||||
|
return data
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Response is not JSON")
|
||||||
|
print(response.text[:200])
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Test Root API
|
||||||
|
test_endpoint("")
|
||||||
|
|
||||||
|
# Test Search Parameters
|
||||||
|
target_id = "K1Uw_YVgCBsww"
|
||||||
|
print(f"\n--- Testing Search Params for {target_id} ---")
|
||||||
|
|
||||||
|
# Test Page Size
|
||||||
|
print(f"\n--- Testing Page Size ---")
|
||||||
|
|
||||||
|
sizes = [12, 50, 100]
|
||||||
|
|
||||||
|
for size in sizes:
|
||||||
|
url = f"/video/?page_size={size}"
|
||||||
|
print(f"Testing {url}...")
|
||||||
|
data = test_endpoint(url)
|
||||||
|
if data and isinstance(data, dict) and 'data' in data:
|
||||||
|
count = len(data['data'])
|
||||||
|
print(f"Requested {size}, got {count} items.")
|
||||||
|
if 'paginate' in data:
|
||||||
|
print(f"Pagination meta: {data['paginate']}")
|
||||||
Loading…
Add table
Add a link
Reference in a new issue