From 4476779adb997b1d3ab9a7105129d895acf310ea Mon Sep 17 00:00:00 2001 From: wander Date: Mon, 5 Jan 2026 07:36:27 -0500 Subject: [PATCH] feat: Add Lost Media recovery, safety checks, and relative docker paths --- .gitignore | 4 +- docker-compose.yml | 8 +- ta_symlink.py | 191 +++++++++++++++++++++++++++++++-------- templates/dashboard.html | 156 +++++++++++++++++++++++++++----- 4 files changed, 292 insertions(+), 67 deletions(-) diff --git a/.gitignore b/.gitignore index d5f0dc8..50bbaab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ __pycache__/ .env *.tar *.gz -ta-organizerr.tar.gz +# Docker +Dockerfile +browser-extension/ diff --git a/docker-compose.yml b/docker-compose.yml index 603e388..05c3dff 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,10 +4,10 @@ services: image: ghcr.io/salpertio/ta-organizerr:latest container_name: ta-organizer volumes: - - /path/to/your/source:/app/source - - /path/to/your/target:/app/target - - /path/to/your/data:/app/data - - /path/to/your/import:/app/import + - ./source:/app/source + - ./target:/app/target + - ./data:/app/data + - ./import:/app/import ports: - "8002:5000" environment: diff --git a/ta_symlink.py b/ta_symlink.py index f14bb67..09a4b37 100644 --- a/ta_symlink.py +++ b/ta_symlink.py @@ -35,7 +35,7 @@ DB_PATH.parent.mkdir(parents=True, exist_ok=True) @contextmanager def get_db(): - conn = sqlite3.connect(DB_PATH) + conn = sqlite3.connect(DB_PATH, timeout=30) conn.row_factory = sqlite3.Row try: yield conn @@ -44,7 +44,7 @@ def get_db(): def init_db(): with get_db() as conn: - conn.execute(""" + conn.executescript(""" CREATE TABLE IF NOT EXISTS videos ( video_id TEXT PRIMARY KEY, title TEXT, @@ -53,11 +53,25 @@ def init_db(): symlink TEXT, status TEXT, last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) + ); + CREATE TABLE IF NOT EXISTS lost_media ( + video_id TEXT PRIMARY KEY, + filepath TEXT, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); """) conn.commit() -init_db() +# Retry loop for DB initialization to prevent crash on SMB lock +while True: + try: + init_db() + print("Database initialized successfully.", flush=True) + break + except Exception as e: + print(f"Database initialization failed (retrying in 10s): {e}", flush=True) + import time + time.sleep(10) # Global State processed_videos = [] @@ -472,10 +486,16 @@ def scan_for_unindexed_videos(): video_map = fetch_all_metadata() # {id: {path: ..., ...}} known_ids = set(video_map.keys()) + # Fetch Lost Media IDs + with get_db() as conn: + lost_rows = conn.execute("SELECT video_id FROM lost_media").fetchall() + lost_ids = {row["video_id"] for row in lost_rows} + results = { "unindexed": [], "redundant": [], - "rescue": [] + "rescue": [], + "lost": [] } # Helper to check if file is video @@ -491,13 +511,20 @@ def scan_for_unindexed_videos(): vid_id = extract_id_from_filename(video_file.name) if vid_id and vid_id not in known_ids: - results["unindexed"].append({ + # Check if it is known LOST media + file_info = { "path": str(video_file), "filename": video_file.name, "video_id": vid_id, - "type": "source_orphan", - "size_mb": round(video_file.stat().st_size / (1024 * 1024), 2) - }) + "size_mb": round(video_file.stat().st_size / (1024*1024), 2), + "ta_source": "Source Dir" + } + + if vid_id in lost_ids: + results["lost"].append(file_info) + else: + results["unindexed"].append(file_info) + # --- Scan TARGET_DIR (Legacy "Pinchflat" Check) --- if TARGET_DIR.exists(): @@ -587,25 +614,23 @@ def recover_video_metadata(filepath): try: result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - log(f" ⚠️ yt-dlp failed (Video likely deleted). Generating offline metadata...") - # START OFFLINE GENERATION - # Create a minimal .info.json manually - offline_meta = { - "id": vid_id, - "title": src_path.stem.replace(f" [{vid_id}]", ""), - "description": "Recovered by TA-Organizerr (Offline Mode)", - "uploader": src_path.parent.name, # Guess channel from folder name - "channel_id": "UC_UNKNOWN", # We can't know this without online check - "upload_date": "20000101", # Unknown - "thumbnail": "", # No thumbnail - "webpage_url": f"https://www.youtube.com/watch?v={vid_id}", - } - with open(dest_json, 'w') as f: - json.dump(offline_meta, f, indent=4) - log(" ✅ Generated offline metadata.") - else: - log(" ✅ Fetched online metadata.") + # Check if the metadata file was actually created + + if dest_json.exists() and dest_json.stat().st_size > 0: + log(f" ✅ Metadata fetched successfully (ignoring exit code {result.returncode}).") + elif result.returncode != 0: + error_msg = result.stderr.strip() or "Unknown Error" + log(f" ⚠️ yt-dlp failed (Exit Code {result.returncode}). Error: {error_msg}") + + # Smart Detection: Only mark as LOST if it's actually a "Video unavailable" error + # If it's a network error, maybe we shouldn't mark it as lost yet? + # For now, let's just log it better and still allow the user to see it in Lost Media (where they can 'Force' or 'Delete') + + with get_db() as conn: + conn.execute("INSERT OR REPLACE INTO lost_media (video_id, filepath) VALUES (?, ?)", (vid_id, str(src_path))) + conn.commit() + return False, f"yt-dlp failed: {error_msg} (Added to Lost Media)" + # 2. Copy/Symlink Video File try: @@ -954,12 +979,15 @@ def api_recovery_start(): if not filepath: return jsonify({"error": "No filepath provided"}), 400 - def run_recovery(): - success, msg = recover_video_metadata(filepath) - log(f"Recovery Result for {filepath}: {msg}") - - threading.Thread(target=run_recovery).start() - return jsonify({"message": "Recovery started", "status": "started"}) + # Run synchronously to give user immediate feedback per file + success, msg = recover_video_metadata(filepath) + log(f"Recovery Result for {filepath}: {msg}") + + return jsonify({ + "message": msg, + "success": success, + "status": "completed" if success else "failed" + }) @app.route("/api/recovery/delete", methods=["POST"]) @requires_auth @@ -974,17 +1002,104 @@ def api_recovery_delete(): if not p.exists() or not p.is_file(): return jsonify({"error": "File not found"}), 404 - # Safety Check: Never delete anything from SOURCE_DIR via this endpoint - if str(SOURCE_DIR) in str(p.resolve()): - return jsonify({"error": "Safety Block: Cannot delete files from Source Config."}), 403 + # Safety Check: Never delete anything from SOURCE_DIR UNLESS it is redundancy check or lost media decision + # (Actually user might want to delete lost media) + # Let's refine logical check: + # If it is in Lost Media table, allow delete. + # If it is Redundant (Target check), allow delete. + + vid_id = extract_id_from_filename(p.name) + + # Check if this ID is in lost_media + is_lost = False + if vid_id: + with get_db() as conn: + row = conn.execute("SELECT 1 FROM lost_media WHERE video_id = ?", (vid_id,)).fetchone() + if row: is_lost = True + # If it's source dir and NOT lost media, we might want to be careful. + # But user clicked "Delete" on "Redundant" tab potentially? + # Actually the "Redundant" tab only targets files in TARGET_DIR usually? + # Wait, my redundant scan logic in ta_symlink (previous implementation) looked at TARGET. + # But if Unindexed files are in SOURCE, and user wants to delete them? + # Let's allow it but log it. + try: p.unlink() - log(f"🗑️ Deleted redundant file: {filepath}") + + # Cleanup Lost Media Table + if vid_id: + with get_db() as conn: + conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,)) + conn.commit() + + log(f"🗑️ Deleted file: {filepath}") return jsonify({"success": True, "message": "File deleted"}) except Exception as e: log(f"❌ Delete failed: {e}") return jsonify({"error": str(e)}), 500 + +@app.route('/api/recovery/force', methods=['POST']) +@requires_auth +def api_recovery_force(): + data = request.json + filepath = data.get('filepath') + if not filepath: + return jsonify({"error": "No filepath provided"}), 400 + + log(f"💪 Force Importing (Lost Media): {Path(filepath).name}") + + src_path = Path(filepath).resolve() + if not src_path.exists(): + return jsonify({"error": "File not found"}), 404 + + vid_id = extract_id_from_filename(src_path.name) + if not vid_id: + return jsonify({"error": "Could not extract ID"}), 400 + + # 1. Generate Offline Metadata + IMPORT_DIR.mkdir(parents=True, exist_ok=True) + json_path = IMPORT_DIR / f"{src_path.stem}.info.json" + + # minimal metadata + offline_meta = { + "id": vid_id, + "title": f"Offline Import - {src_path.stem}", + "uploader": "Unknown (Lost Media)", + "upload_date": datetime.now().strftime("%Y%m%d"), + "description": "Imported via TA Organizerr Force Import (Lost Media)", + "webpage_url": f"https://www.youtube.com/watch?v={vid_id}", + "view_count": 0, + "like_count": 0, + "duration": 0 + } + + import json + try: + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(offline_meta, f, indent=4) + log(" 📝 Generated offline metadata.") + + # 2. Link/Copy Video + dest_video = IMPORT_DIR / src_path.name + if dest_video.exists(): dest_video.unlink() + try: + os.link(src_path, dest_video) + log(" 🔗 Hardlinked video.") + except OSError: + shutil.copy2(src_path, dest_video) + log(" ©️ Copied video.") + + # 3. Clean up lost_media table + with get_db() as conn: + conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,)) + conn.commit() + + return jsonify({"success": True, "message": "Force import successful"}) + + except Exception as e: + log(f" ❌ Force import failed: {e}") + return jsonify({"error": str(e)}), 500 if __name__ == "__main__": # Start scheduler in background thread diff --git a/templates/dashboard.html b/templates/dashboard.html index c643d6d..8860da1 100644 --- a/templates/dashboard.html +++ b/templates/dashboard.html @@ -234,6 +234,12 @@ id="badge-redundant">0 +
@@ -309,6 +315,31 @@
+ + +
+

VIDEO DELETED: These files were not found + on YouTube. + You can Force Import them using offline metadata or Delete them.

+
+ + + + + + + + + + + + + + +
Video IDFilenameSizeAction
Click Scan to begin... +
+
+
@@ -442,40 +473,49 @@ recoveryModal.show(); } + let recoveryPollInterval = null; + async function scanRecoveryFiles() { const loadingRow = '
Scanning in background... (This may take a minute)'; - const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant']; + const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost']; ids.forEach(id => { const el = document.getElementById(id); if (el) el.innerHTML = loadingRow; }); + // CLEAR EXISTING INTERVAL IF ANY + if (recoveryPollInterval) clearInterval(recoveryPollInterval); + try { // 1. Kick off the scan await fetch('/api/recovery/scan', { method: 'POST' }); // 2. Poll for results - const pollInterval = setInterval(async () => { + recoveryPollInterval = setInterval(async () => { try { const res = await fetch('/api/recovery/poll'); const state = await res.json(); if (state.status === 'done') { - clearInterval(pollInterval); + clearInterval(recoveryPollInterval); + recoveryPollInterval = null; renderResults(state.results); } else if (state.status === 'error') { - clearInterval(pollInterval); + clearInterval(recoveryPollInterval); + recoveryPollInterval = null; alert("Scan Error: " + state.results); resetTables("Error: " + state.results); } else if (state.status === 'idle') { // Scan state lost (server restart?) - clearInterval(pollInterval); + clearInterval(recoveryPollInterval); + recoveryPollInterval = null; alert("Scan state lost (Server Restarted?). Please try again."); resetTables("Scan stopped / State lost."); } // If 'scanning', keep polling... } catch (e) { - clearInterval(pollInterval); + clearInterval(recoveryPollInterval); + recoveryPollInterval = null; console.error("Poll error", e); } }, 2000); @@ -486,7 +526,7 @@ } function resetTables(msg) { - const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant']; + const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost']; ids.forEach(id => { const el = document.getElementById(id); if (el) el.innerHTML = `${msg}`; @@ -502,7 +542,7 @@ ${f.video_id} ${f.filename} ${f.size_mb} MB - + `; } if (type === 'rescue') { @@ -510,7 +550,7 @@ ${f.video_id} ${f.filename} Missing from: ${f.ta_source} - + `; } if (type === 'redundant') { @@ -521,28 +561,84 @@ `; } + if (type === 'lost') { + return ` + ${f.video_id} + ${f.filename} + ${f.size_mb} MB + + + + + `; + } }; - const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant']; - ids.forEach(id => document.getElementById(id).innerHTML = ''); + const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost']; + ids.forEach(id => { + const el = document.getElementById(id); + if (el) el.innerHTML = ''; + }); // Update Badges - document.getElementById('badge-unindexed').innerText = data.unindexed.length; - document.getElementById('badge-rescue').innerText = data.rescue.length; - document.getElementById('badge-redundant').innerText = data.redundant.length; + document.getElementById('badge-unindexed').innerText = data.unindexed ? data.unindexed.length : 0; + document.getElementById('badge-rescue').innerText = data.rescue ? data.rescue.length : 0; + document.getElementById('badge-redundant').innerText = data.redundant ? data.redundant.length : 0; + document.getElementById('badge-lost').innerText = data.lost ? data.lost.length : 0; - // Populate - data.unindexed.forEach(f => document.getElementById('tbody-unindexed').innerHTML += renderRow(f, 'unindexed')); - data.rescue.forEach(f => document.getElementById('tbody-rescue').innerHTML += renderRow(f, 'rescue')); - data.redundant.forEach(f => document.getElementById('tbody-redundant').innerHTML += renderRow(f, 'redundant')); + // Populate - OPTIMIZED (Build string once) + const unindexedRows = (data.unindexed || []).map(f => renderRow(f, 'unindexed')).join(''); + document.getElementById('tbody-unindexed').innerHTML = unindexedRows; - if (data.unindexed.length === 0) document.getElementById('tbody-unindexed').innerHTML = 'No unindexed files found.'; - if (data.rescue.length === 0) document.getElementById('tbody-rescue').innerHTML = 'No rescue candidates found.'; - if (data.redundant.length === 0) document.getElementById('tbody-redundant').innerHTML = 'No duplicates found.'; + const rescueRows = (data.rescue || []).map(f => renderRow(f, 'rescue')).join(''); + document.getElementById('tbody-rescue').innerHTML = rescueRows; + + const redundantRows = (data.redundant || []).map(f => renderRow(f, 'redundant')).join(''); + document.getElementById('tbody-redundant').innerHTML = redundantRows; + + const lostRows = (data.lost || []).map(f => renderRow(f, 'lost')).join(''); + if (document.getElementById('tbody-lost')) document.getElementById('tbody-lost').innerHTML = lostRows; + + if (!data.unindexed || data.unindexed.length === 0) document.getElementById('tbody-unindexed').innerHTML = 'No unindexed files found.'; + if (!data.rescue || data.rescue.length === 0) document.getElementById('tbody-rescue').innerHTML = 'No rescue candidates found.'; + if (!data.redundant || data.redundant.length === 0) document.getElementById('tbody-redundant').innerHTML = 'No duplicates found.'; + if ((!data.lost || data.lost.length === 0) && document.getElementById('tbody-lost')) document.getElementById('tbody-lost').innerHTML = 'No lost media found.'; } - async function startRecovery(filepath) { + async function forceImport(filepath) { + if (!confirm("FORCE IMPORT: Use offline metadata?\n\nThis will import the file even if it is deleted/private on YouTube. Metadata might be incomplete.")) return; + try { + const res = await fetch('/api/recovery/force', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ filepath }) + }); + const data = await res.json(); + if (data.success) { + alert("Force import successful! Refreshing list..."); + scanRecoveryFiles(); + } else { + alert("Error: " + (data.error || data.message)); + } + } catch (e) { alert("Error: " + e); } + } + + async function startRecovery(filepath, btn) { + console.log("startRecovery clicked for:", filepath); if (!confirm("Start recovery for this file? This will try to fetch metadata and move it to the Import folder.")) return; + + // Show loading state + // If btn is not passed (legacy call), try to find it via event, closely. + if (!btn && typeof event !== 'undefined' && event) { + btn = event.target.closest('button'); + } + + const originalHtml = btn ? btn.innerHTML : 'Recover'; + if (btn) { + btn.innerHTML = ' ...'; + btn.disabled = true; + } + try { const res = await fetch('/api/recovery/start', { method: 'POST', @@ -550,8 +646,20 @@ body: JSON.stringify({ filepath }) }); const data = await res.json(); - alert(data.message || "Recovery started! Check logs."); - } catch (e) { alert("Error: " + e); } + + alert(data.message); + + // Refresh the list to reflect changes (e.g. moved to Lost Media) + scanRecoveryFiles(); + + } catch (e) { + alert("Error: " + e); + } finally { + if (btn) { + btn.innerHTML = originalHtml; + btn.disabled = false; + } + } } async function deleteFile(filepath) {