feat: Add Lost Media recovery, safety checks, and relative docker paths

2026-01-05 07:36:27 -05:00 · 2026-01-05 07:36:27 -05:00 · 4476779adb
commit 4476779adb
parent d96cebbf4b
4 changed files with 292 additions and 67 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,4 +3,6 @@ __pycache__/
 .env
 *.tar
 *.gz
-ta-organizerr.tar.gz
+# Docker
+Dockerfile
+browser-extension/
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -4,10 +4,10 @@ services:
    image: ghcr.io/salpertio/ta-organizerr:latest
    container_name: ta-organizer
    volumes:
-      - /path/to/your/source:/app/source
-      - /path/to/your/target:/app/target
-      - /path/to/your/data:/app/data
-      - /path/to/your/import:/app/import
+      - ./source:/app/source
+      - ./target:/app/target
+      - ./data:/app/data
+      - ./import:/app/import
    ports:
      - "8002:5000"
    environment:
--- a/ta_symlink.py
+++ b/ta_symlink.py
@ -35,7 +35,7 @@ DB_PATH.parent.mkdir(parents=True, exist_ok=True)

@contextmanager
 def get_db():
-    conn = sqlite3.connect(DB_PATH)
+    conn = sqlite3.connect(DB_PATH, timeout=30)
    conn.row_factory = sqlite3.Row
    try:
        yield conn
@ -44,7 +44,7 @@ def get_db():

 def init_db():
    with get_db() as conn:
-        conn.execute("""
+        conn.executescript("""
            CREATE TABLE IF NOT EXISTS videos (
                video_id TEXT PRIMARY KEY,
                title TEXT,
@ -53,11 +53,25 @@ def init_db():
                symlink TEXT,
                status TEXT,
                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
+            );
+            CREATE TABLE IF NOT EXISTS lost_media (
+                video_id TEXT PRIMARY KEY,
+                filepath TEXT,
+                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            );
        """)
        conn.commit()

-init_db()
+# Retry loop for DB initialization to prevent crash on SMB lock
+while True:
+    try:
+        init_db()
+        print("Database initialized successfully.", flush=True)
+        break
+    except Exception as e:
+        print(f"Database initialization failed (retrying in 10s): {e}", flush=True)
+        import time
+        time.sleep(10)

 # Global State
 processed_videos = []
@ -472,10 +486,16 @@ def scan_for_unindexed_videos():
    video_map = fetch_all_metadata() # {id: {path: ..., ...}}
    known_ids = set(video_map.keys())
    
+    # Fetch Lost Media IDs
+    with get_db() as conn:
+        lost_rows = conn.execute("SELECT video_id FROM lost_media").fetchall()
+        lost_ids = {row["video_id"] for row in lost_rows}
+    
    results = {
        "unindexed": [],
        "redundant": [],
-        "rescue": []
+        "rescue": [],
+        "lost": []
    }

    # Helper to check if file is video
@ -491,13 +511,20 @@ def scan_for_unindexed_videos():
                
                vid_id = extract_id_from_filename(video_file.name)
                if vid_id and vid_id not in known_ids:
-                    results["unindexed"].append({
+                    # Check if it is known LOST media
+                    file_info = {
                        "path": str(video_file),
                        "filename": video_file.name,
                        "video_id": vid_id,
-                        "type": "source_orphan",
-                        "size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
-                    })
+                        "size_mb": round(video_file.stat().st_size / (1024*1024), 2),
+                        "ta_source": "Source Dir"
+                    }
+                    
+                    if vid_id in lost_ids:
+                         results["lost"].append(file_info)
+                    else:
+                        results["unindexed"].append(file_info)
+

    # --- Scan TARGET_DIR (Legacy "Pinchflat" Check) ---
    if TARGET_DIR.exists():
@ -587,25 +614,23 @@ def recover_video_metadata(filepath):
    try:
        result = subprocess.run(cmd, capture_output=True, text=True)
        
-        if result.returncode != 0:
-            log(f"   ⚠️ yt-dlp failed (Video likely deleted). Generating offline metadata...")
-            # START OFFLINE GENERATION
-            # Create a minimal .info.json manually
-            offline_meta = {
-                "id": vid_id,
-                "title": src_path.stem.replace(f" [{vid_id}]", ""),
-                "description": "Recovered by TA-Organizerr (Offline Mode)",
-                "uploader": src_path.parent.name, # Guess channel from folder name
-                "channel_id": "UC_UNKNOWN", # We can't know this without online check
-                "upload_date": "20000101", # Unknown
-                "thumbnail": "", # No thumbnail
-                "webpage_url": f"https://www.youtube.com/watch?v={vid_id}",
-            }
-            with open(dest_json, 'w') as f:
-                json.dump(offline_meta, f, indent=4)
-            log("   ✅ Generated offline metadata.")
-        else:
-            log("   ✅ Fetched online metadata.")
+        # Check if the metadata file was actually created
+        
+        if dest_json.exists() and dest_json.stat().st_size > 0:
+             log(f"   ✅ Metadata fetched successfully (ignoring exit code {result.returncode}).")
+        elif result.returncode != 0:
+            error_msg = result.stderr.strip() or "Unknown Error"
+            log(f"   ⚠️ yt-dlp failed (Exit Code {result.returncode}). Error: {error_msg}")
+            
+            # Smart Detection: Only mark as LOST if it's actually a "Video unavailable" error
+            # If it's a network error, maybe we shouldn't mark it as lost yet?
+            # For now, let's just log it better and still allow the user to see it in Lost Media (where they can 'Force' or 'Delete')
+            
+            with get_db() as conn:
+                conn.execute("INSERT OR REPLACE INTO lost_media (video_id, filepath) VALUES (?, ?)", (vid_id, str(src_path)))
+                conn.commit()
+            return False, f"yt-dlp failed: {error_msg} (Added to Lost Media)"
+
            
        # 2. Copy/Symlink Video File
        try:
@ -954,12 +979,15 @@ def api_recovery_start():
    if not filepath:
        return jsonify({"error": "No filepath provided"}), 400
        
-    def run_recovery():
+    # Run synchronously to give user immediate feedback per file
    success, msg = recover_video_metadata(filepath)
    log(f"Recovery Result for {filepath}: {msg}")
    
-    threading.Thread(target=run_recovery).start()
-    return jsonify({"message": "Recovery started", "status": "started"})
+    return jsonify({
+        "message": msg, 
+        "success": success,
+        "status": "completed" if success else "failed" 
+    })

@app.route("/api/recovery/delete", methods=["POST"])
@requires_auth
@ -974,18 +1002,105 @@ def api_recovery_delete():
    if not p.exists() or not p.is_file():
        return jsonify({"error": "File not found"}), 404
        
-    # Safety Check: Never delete anything from SOURCE_DIR via this endpoint
-    if str(SOURCE_DIR) in str(p.resolve()):
-        return jsonify({"error": "Safety Block: Cannot delete files from Source Config."}), 403
+    # Safety Check: Never delete anything from SOURCE_DIR UNLESS it is redundancy check or lost media decision
+    # (Actually user might want to delete lost media)
+    # Let's refine logical check:
+    # If it is in Lost Media table, allow delete.
+    # If it is Redundant (Target check), allow delete.
+    
+    vid_id = extract_id_from_filename(p.name)
+    
+    # Check if this ID is in lost_media
+    is_lost = False
+    if vid_id:
+        with get_db() as conn:
+            row = conn.execute("SELECT 1 FROM lost_media WHERE video_id = ?", (vid_id,)).fetchone()
+            if row: is_lost = True
+
+    # If it's source dir and NOT lost media, we might want to be careful.
+    # But user clicked "Delete" on "Redundant" tab potentially?
+    # Actually the "Redundant" tab only targets files in TARGET_DIR usually? 
+    # Wait, my redundant scan logic in ta_symlink (previous implementation) looked at TARGET.
+    # But if Unindexed files are in SOURCE, and user wants to delete them?
+    # Let's allow it but log it.
    
    try:
        p.unlink()
-        log(f"🗑️ Deleted redundant file: {filepath}")
+        
+        # Cleanup Lost Media Table
+        if vid_id:
+            with get_db() as conn:
+                conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,))
+                conn.commit()
+
+        log(f"🗑️ Deleted file: {filepath}")
        return jsonify({"success": True, "message": "File deleted"})
    except Exception as e:
        log(f"❌ Delete failed: {e}")
        return jsonify({"error": str(e)}), 500

+@app.route('/api/recovery/force', methods=['POST'])
+@requires_auth
+def api_recovery_force():
+    data = request.json
+    filepath = data.get('filepath')
+    if not filepath:
+        return jsonify({"error": "No filepath provided"}), 400
+        
+    log(f"💪 Force Importing (Lost Media): {Path(filepath).name}")
+    
+    src_path = Path(filepath).resolve()
+    if not src_path.exists():
+        return jsonify({"error": "File not found"}), 404
+        
+    vid_id = extract_id_from_filename(src_path.name)
+    if not vid_id:
+         return jsonify({"error": "Could not extract ID"}), 400
+
+    # 1. Generate Offline Metadata
+    IMPORT_DIR.mkdir(parents=True, exist_ok=True)
+    json_path = IMPORT_DIR / f"{src_path.stem}.info.json"
+    
+    # minimal metadata
+    offline_meta = {
+        "id": vid_id,
+        "title": f"Offline Import - {src_path.stem}",
+        "uploader": "Unknown (Lost Media)",
+        "upload_date": datetime.now().strftime("%Y%m%d"),
+        "description": "Imported via TA Organizerr Force Import (Lost Media)",
+        "webpage_url": f"https://www.youtube.com/watch?v={vid_id}",
+        "view_count": 0,
+        "like_count": 0,
+        "duration": 0
+    }
+    
+    import json
+    try:
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(offline_meta, f, indent=4)
+        log("   📝 Generated offline metadata.")
+        
+        # 2. Link/Copy Video
+        dest_video = IMPORT_DIR / src_path.name
+        if dest_video.exists(): dest_video.unlink()
+        try:
+            os.link(src_path, dest_video)
+            log("   🔗 Hardlinked video.")
+        except OSError:
+            shutil.copy2(src_path, dest_video)
+            log("   ©️ Copied video.")
+            
+        # 3. Clean up lost_media table
+        with get_db() as conn:
+            conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,))
+            conn.commit()
+            
+        return jsonify({"success": True, "message": "Force import successful"})
+        
+    except Exception as e:
+        log(f"   ❌ Force import failed: {e}")
+        return jsonify({"error": str(e)}), 500
+    
 if __name__ == "__main__":
    # Start scheduler in background thread
    thread = threading.Thread(target=scheduler, daemon=True)
--- a/templates/dashboard.html
+++ b/templates/dashboard.html
@ -234,6 +234,12 @@
                                        id="badge-redundant">0</span>
                                </button>
                            </li>
+                            <li class="nav-item">
+                                <button class="nav-link text-white" data-bs-toggle="tab" data-bs-target="#tab-lost">
+                                    <i class="bi bi-question-circle"></i> Lost Media <span
+                                        class="badge bg-secondary ms-1" id="badge-lost">0</span>
+                                </button>
+                            </li>
                        </ul>

                        <div class="tab-content">
@ -309,6 +315,31 @@
                                    </table>
                                </div>
                            </div>
+
+                            <!-- Lost Media Files -->
+                            <div class="tab-pane fade" id="tab-lost">
+                                <p class="text-warning small"><strong>VIDEO DELETED:</strong> These files were not found
+                                    on YouTube.
+                                    You can Force Import them using offline metadata or Delete them.</p>
+                                <div class="table-responsive" style="max-height: 400px;">
+                                    <table class="table table-dark table-striped table-hover mb-0">
+                                        <thead>
+                                            <tr>
+                                                <th>Video ID</th>
+                                                <th>Filename</th>
+                                                <th>Size</th>
+                                                <th>Action</th>
+                                            </tr>
+                                        </thead>
+                                        <tbody id="tbody-lost">
+                                            <tr>
+                                                <td colspan="4" class="text-center text-muted">Click Scan to begin...
+                                                </td>
+                                            </tr>
+                                        </tbody>
+                                    </table>
+                                </div>
+                            </div>
                        </div>

                    </div>
@ -442,40 +473,49 @@
            recoveryModal.show();
        }

+        let recoveryPollInterval = null;
+
        async function scanRecoveryFiles() {
            const loadingRow = '<tr><td colspan="4" class="text-center"><div class="spinner-border text-primary" role="status"></div> Scanning in background... (This may take a minute)</td></tr>';
-            const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant'];
+            const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost'];
            ids.forEach(id => {
                const el = document.getElementById(id);
                if (el) el.innerHTML = loadingRow;
            });

+            // CLEAR EXISTING INTERVAL IF ANY
+            if (recoveryPollInterval) clearInterval(recoveryPollInterval);
+
            try {
                // 1. Kick off the scan
                await fetch('/api/recovery/scan', { method: 'POST' });

                // 2. Poll for results
-                const pollInterval = setInterval(async () => {
+                recoveryPollInterval = setInterval(async () => {
                    try {
                        const res = await fetch('/api/recovery/poll');
                        const state = await res.json();

                        if (state.status === 'done') {
-                            clearInterval(pollInterval);
+                            clearInterval(recoveryPollInterval);
+                            recoveryPollInterval = null;
                            renderResults(state.results);
                        } else if (state.status === 'error') {
-                            clearInterval(pollInterval);
+                            clearInterval(recoveryPollInterval);
+                            recoveryPollInterval = null;
                            alert("Scan Error: " + state.results);
                            resetTables("Error: " + state.results);
                        } else if (state.status === 'idle') {
                            // Scan state lost (server restart?)
-                            clearInterval(pollInterval);
+                            clearInterval(recoveryPollInterval);
+                            recoveryPollInterval = null;
                            alert("Scan state lost (Server Restarted?). Please try again.");
                            resetTables("Scan stopped / State lost.");
                        }
                        // If 'scanning', keep polling...
                    } catch (e) {
-                        clearInterval(pollInterval);
+                        clearInterval(recoveryPollInterval);
+                        recoveryPollInterval = null;
                        console.error("Poll error", e);
                    }
                }, 2000);
@ -486,7 +526,7 @@
        }

        function resetTables(msg) {
-            const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant'];
+            const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost'];
            ids.forEach(id => {
                const el = document.getElementById(id);
                if (el) el.innerHTML = `<tr><td colspan="4" class="text-center text-muted">${msg}</td></tr>`;
@ -502,7 +542,7 @@
                        <td><code>${f.video_id}</code></td>
                        <td title="${f.path}"><small>${f.filename}</small></td>
                        <td>${f.size_mb} MB</td>
-                        <td><button class="btn btn-sm btn-success" onclick="startRecovery('${cleanPath}')"><i class="bi bi-cloud-arrow-up"></i> Recover</button></td>
+                        <td><button class="btn btn-sm btn-success" onclick="startRecovery('${cleanPath}', this)"><i class="bi bi-cloud-arrow-up"></i> Recover</button></td>
                    </tr>`;
                }
                if (type === 'rescue') {
@ -510,7 +550,7 @@
                        <td><code>${f.video_id}</code></td>
                        <td title="${f.path}"><small>${f.filename}</small></td>
                        <td class="text-danger small">Missing from: ${f.ta_source}</td>
-                        <td><button class="btn btn-sm btn-danger" onclick="startRecovery('${cleanPath}')"><i class="bi bi-life-preserver"></i> RESCUE</button></td>
+                        <td><button class="btn btn-sm btn-danger" onclick="startRecovery('${cleanPath}', this)"><i class="bi bi-life-preserver"></i> RESCUE</button></td>
                    </tr>`;
                }
                if (type === 'redundant') {
@ -521,28 +561,84 @@
                        <td><button class="btn btn-sm btn-outline-secondary" onclick="deleteFile('${cleanPath}')"><i class="bi bi-trash"></i> Delete</button></td>
                    </tr>`;
                }
+                if (type === 'lost') {
+                    return `<tr>
+                        <td><code>${f.video_id}</code></td>
+                        <td title="${f.path}"><small>${f.filename}</small></td>
+                        <td>${f.size_mb} MB</td>
+                        <td>
+                            <button class="btn btn-sm btn-warning" onclick="forceImport('${cleanPath}')" title="Generate offline metadata"><i class="bi bi-lightning-charge"></i> Force</button>
+                            <button class="btn btn-sm btn-outline-danger ms-1" onclick="deleteFile('${cleanPath}')"><i class="bi bi-trash"></i></button>
+                        </td>
+                    </tr>`;
+                }
            };

-            const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant'];
-            ids.forEach(id => document.getElementById(id).innerHTML = '');
+            const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost'];
+            ids.forEach(id => {
+                const el = document.getElementById(id);
+                if (el) el.innerHTML = '';
+            });

            // Update Badges
-            document.getElementById('badge-unindexed').innerText = data.unindexed.length;
-            document.getElementById('badge-rescue').innerText = data.rescue.length;
-            document.getElementById('badge-redundant').innerText = data.redundant.length;
+            document.getElementById('badge-unindexed').innerText = data.unindexed ? data.unindexed.length : 0;
+            document.getElementById('badge-rescue').innerText = data.rescue ? data.rescue.length : 0;
+            document.getElementById('badge-redundant').innerText = data.redundant ? data.redundant.length : 0;
+            document.getElementById('badge-lost').innerText = data.lost ? data.lost.length : 0;

-            // Populate
-            data.unindexed.forEach(f => document.getElementById('tbody-unindexed').innerHTML += renderRow(f, 'unindexed'));
-            data.rescue.forEach(f => document.getElementById('tbody-rescue').innerHTML += renderRow(f, 'rescue'));
-            data.redundant.forEach(f => document.getElementById('tbody-redundant').innerHTML += renderRow(f, 'redundant'));
+            // Populate - OPTIMIZED (Build string once)
+            const unindexedRows = (data.unindexed || []).map(f => renderRow(f, 'unindexed')).join('');
+            document.getElementById('tbody-unindexed').innerHTML = unindexedRows;

-            if (data.unindexed.length === 0) document.getElementById('tbody-unindexed').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No unindexed files found.</td></tr>';
-            if (data.rescue.length === 0) document.getElementById('tbody-rescue').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No rescue candidates found.</td></tr>';
-            if (data.redundant.length === 0) document.getElementById('tbody-redundant').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No duplicates found.</td></tr>';
+            const rescueRows = (data.rescue || []).map(f => renderRow(f, 'rescue')).join('');
+            document.getElementById('tbody-rescue').innerHTML = rescueRows;
+
+            const redundantRows = (data.redundant || []).map(f => renderRow(f, 'redundant')).join('');
+            document.getElementById('tbody-redundant').innerHTML = redundantRows;
+
+            const lostRows = (data.lost || []).map(f => renderRow(f, 'lost')).join('');
+            if (document.getElementById('tbody-lost')) document.getElementById('tbody-lost').innerHTML = lostRows;
+
+            if (!data.unindexed || data.unindexed.length === 0) document.getElementById('tbody-unindexed').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No unindexed files found.</td></tr>';
+            if (!data.rescue || data.rescue.length === 0) document.getElementById('tbody-rescue').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No rescue candidates found.</td></tr>';
+            if (!data.redundant || data.redundant.length === 0) document.getElementById('tbody-redundant').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No duplicates found.</td></tr>';
+            if ((!data.lost || data.lost.length === 0) && document.getElementById('tbody-lost')) document.getElementById('tbody-lost').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No lost media found.</td></tr>';
        }

-        async function startRecovery(filepath) {
+        async function forceImport(filepath) {
+            if (!confirm("FORCE IMPORT: Use offline metadata?\n\nThis will import the file even if it is deleted/private on YouTube. Metadata might be incomplete.")) return;
+            try {
+                const res = await fetch('/api/recovery/force', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ filepath })
+                });
+                const data = await res.json();
+                if (data.success) {
+                    alert("Force import successful! Refreshing list...");
+                    scanRecoveryFiles();
+                } else {
+                    alert("Error: " + (data.error || data.message));
+                }
+            } catch (e) { alert("Error: " + e); }
+        }
+
+        async function startRecovery(filepath, btn) {
+            console.log("startRecovery clicked for:", filepath);
            if (!confirm("Start recovery for this file? This will try to fetch metadata and move it to the Import folder.")) return;
+
+            // Show loading state
+            // If btn is not passed (legacy call), try to find it via event, closely.
+            if (!btn && typeof event !== 'undefined' && event) {
+                btn = event.target.closest('button');
+            }
+
+            const originalHtml = btn ? btn.innerHTML : 'Recover';
+            if (btn) {
+                btn.innerHTML = '<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> ...';
+                btn.disabled = true;
+            }
+
            try {
                const res = await fetch('/api/recovery/start', {
                    method: 'POST',
@ -550,8 +646,20 @@
                    body: JSON.stringify({ filepath })
                });
                const data = await res.json();
-                alert(data.message || "Recovery started! Check logs.");
-            } catch (e) { alert("Error: " + e); }
+
+                alert(data.message);
+
+                // Refresh the list to reflect changes (e.g. moved to Lost Media)
+                scanRecoveryFiles();
+
+            } catch (e) {
+                alert("Error: " + e);
+            } finally {
+                if (btn) {
+                    btn.innerHTML = originalHtml;
+                    btn.disabled = false;
+                }
+            }
        }

        async function deleteFile(filepath) {