feat: Add Lost Media recovery, safety checks, and relative docker paths

This commit is contained in:
wander 2026-01-05 07:36:27 -05:00
parent d96cebbf4b
commit 4476779adb
4 changed files with 292 additions and 67 deletions

4
.gitignore vendored
View file

@ -3,4 +3,6 @@ __pycache__/
.env
*.tar
*.gz
ta-organizerr.tar.gz
# Docker
Dockerfile
browser-extension/

View file

@ -4,10 +4,10 @@ services:
image: ghcr.io/salpertio/ta-organizerr:latest
container_name: ta-organizer
volumes:
- /path/to/your/source:/app/source
- /path/to/your/target:/app/target
- /path/to/your/data:/app/data
- /path/to/your/import:/app/import
- ./source:/app/source
- ./target:/app/target
- ./data:/app/data
- ./import:/app/import
ports:
- "8002:5000"
environment:

View file

@ -35,7 +35,7 @@ DB_PATH.parent.mkdir(parents=True, exist_ok=True)
@contextmanager
def get_db():
conn = sqlite3.connect(DB_PATH)
conn = sqlite3.connect(DB_PATH, timeout=30)
conn.row_factory = sqlite3.Row
try:
yield conn
@ -44,7 +44,7 @@ def get_db():
def init_db():
with get_db() as conn:
conn.execute("""
conn.executescript("""
CREATE TABLE IF NOT EXISTS videos (
video_id TEXT PRIMARY KEY,
title TEXT,
@ -53,11 +53,25 @@ def init_db():
symlink TEXT,
status TEXT,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
);
CREATE TABLE IF NOT EXISTS lost_media (
video_id TEXT PRIMARY KEY,
filepath TEXT,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
""")
conn.commit()
init_db()
# Retry loop for DB initialization to prevent crash on SMB lock
while True:
try:
init_db()
print("Database initialized successfully.", flush=True)
break
except Exception as e:
print(f"Database initialization failed (retrying in 10s): {e}", flush=True)
import time
time.sleep(10)
# Global State
processed_videos = []
@ -472,10 +486,16 @@ def scan_for_unindexed_videos():
video_map = fetch_all_metadata() # {id: {path: ..., ...}}
known_ids = set(video_map.keys())
# Fetch Lost Media IDs
with get_db() as conn:
lost_rows = conn.execute("SELECT video_id FROM lost_media").fetchall()
lost_ids = {row["video_id"] for row in lost_rows}
results = {
"unindexed": [],
"redundant": [],
"rescue": []
"rescue": [],
"lost": []
}
# Helper to check if file is video
@ -491,13 +511,20 @@ def scan_for_unindexed_videos():
vid_id = extract_id_from_filename(video_file.name)
if vid_id and vid_id not in known_ids:
results["unindexed"].append({
# Check if it is known LOST media
file_info = {
"path": str(video_file),
"filename": video_file.name,
"video_id": vid_id,
"type": "source_orphan",
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
})
"size_mb": round(video_file.stat().st_size / (1024*1024), 2),
"ta_source": "Source Dir"
}
if vid_id in lost_ids:
results["lost"].append(file_info)
else:
results["unindexed"].append(file_info)
# --- Scan TARGET_DIR (Legacy "Pinchflat" Check) ---
if TARGET_DIR.exists():
@ -587,25 +614,23 @@ def recover_video_metadata(filepath):
try:
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log(f" ⚠️ yt-dlp failed (Video likely deleted). Generating offline metadata...")
# START OFFLINE GENERATION
# Create a minimal .info.json manually
offline_meta = {
"id": vid_id,
"title": src_path.stem.replace(f" [{vid_id}]", ""),
"description": "Recovered by TA-Organizerr (Offline Mode)",
"uploader": src_path.parent.name, # Guess channel from folder name
"channel_id": "UC_UNKNOWN", # We can't know this without online check
"upload_date": "20000101", # Unknown
"thumbnail": "", # No thumbnail
"webpage_url": f"https://www.youtube.com/watch?v={vid_id}",
}
with open(dest_json, 'w') as f:
json.dump(offline_meta, f, indent=4)
log(" ✅ Generated offline metadata.")
else:
log(" ✅ Fetched online metadata.")
# Check if the metadata file was actually created
if dest_json.exists() and dest_json.stat().st_size > 0:
log(f" ✅ Metadata fetched successfully (ignoring exit code {result.returncode}).")
elif result.returncode != 0:
error_msg = result.stderr.strip() or "Unknown Error"
log(f" ⚠️ yt-dlp failed (Exit Code {result.returncode}). Error: {error_msg}")
# Smart Detection: Only mark as LOST if it's actually a "Video unavailable" error
# If it's a network error, maybe we shouldn't mark it as lost yet?
# For now, let's just log it better and still allow the user to see it in Lost Media (where they can 'Force' or 'Delete')
with get_db() as conn:
conn.execute("INSERT OR REPLACE INTO lost_media (video_id, filepath) VALUES (?, ?)", (vid_id, str(src_path)))
conn.commit()
return False, f"yt-dlp failed: {error_msg} (Added to Lost Media)"
# 2. Copy/Symlink Video File
try:
@ -954,12 +979,15 @@ def api_recovery_start():
if not filepath:
return jsonify({"error": "No filepath provided"}), 400
def run_recovery():
# Run synchronously to give user immediate feedback per file
success, msg = recover_video_metadata(filepath)
log(f"Recovery Result for {filepath}: {msg}")
threading.Thread(target=run_recovery).start()
return jsonify({"message": "Recovery started", "status": "started"})
return jsonify({
"message": msg,
"success": success,
"status": "completed" if success else "failed"
})
@app.route("/api/recovery/delete", methods=["POST"])
@requires_auth
@ -974,18 +1002,105 @@ def api_recovery_delete():
if not p.exists() or not p.is_file():
return jsonify({"error": "File not found"}), 404
# Safety Check: Never delete anything from SOURCE_DIR via this endpoint
if str(SOURCE_DIR) in str(p.resolve()):
return jsonify({"error": "Safety Block: Cannot delete files from Source Config."}), 403
# Safety Check: Never delete anything from SOURCE_DIR UNLESS it is redundancy check or lost media decision
# (Actually user might want to delete lost media)
# Let's refine logical check:
# If it is in Lost Media table, allow delete.
# If it is Redundant (Target check), allow delete.
vid_id = extract_id_from_filename(p.name)
# Check if this ID is in lost_media
is_lost = False
if vid_id:
with get_db() as conn:
row = conn.execute("SELECT 1 FROM lost_media WHERE video_id = ?", (vid_id,)).fetchone()
if row: is_lost = True
# If it's source dir and NOT lost media, we might want to be careful.
# But user clicked "Delete" on "Redundant" tab potentially?
# Actually the "Redundant" tab only targets files in TARGET_DIR usually?
# Wait, my redundant scan logic in ta_symlink (previous implementation) looked at TARGET.
# But if Unindexed files are in SOURCE, and user wants to delete them?
# Let's allow it but log it.
try:
p.unlink()
log(f"🗑️ Deleted redundant file: {filepath}")
# Cleanup Lost Media Table
if vid_id:
with get_db() as conn:
conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,))
conn.commit()
log(f"🗑️ Deleted file: {filepath}")
return jsonify({"success": True, "message": "File deleted"})
except Exception as e:
log(f"❌ Delete failed: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/api/recovery/force', methods=['POST'])
@requires_auth
def api_recovery_force():
data = request.json
filepath = data.get('filepath')
if not filepath:
return jsonify({"error": "No filepath provided"}), 400
log(f"💪 Force Importing (Lost Media): {Path(filepath).name}")
src_path = Path(filepath).resolve()
if not src_path.exists():
return jsonify({"error": "File not found"}), 404
vid_id = extract_id_from_filename(src_path.name)
if not vid_id:
return jsonify({"error": "Could not extract ID"}), 400
# 1. Generate Offline Metadata
IMPORT_DIR.mkdir(parents=True, exist_ok=True)
json_path = IMPORT_DIR / f"{src_path.stem}.info.json"
# minimal metadata
offline_meta = {
"id": vid_id,
"title": f"Offline Import - {src_path.stem}",
"uploader": "Unknown (Lost Media)",
"upload_date": datetime.now().strftime("%Y%m%d"),
"description": "Imported via TA Organizerr Force Import (Lost Media)",
"webpage_url": f"https://www.youtube.com/watch?v={vid_id}",
"view_count": 0,
"like_count": 0,
"duration": 0
}
import json
try:
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(offline_meta, f, indent=4)
log(" 📝 Generated offline metadata.")
# 2. Link/Copy Video
dest_video = IMPORT_DIR / src_path.name
if dest_video.exists(): dest_video.unlink()
try:
os.link(src_path, dest_video)
log(" 🔗 Hardlinked video.")
except OSError:
shutil.copy2(src_path, dest_video)
log(" ©️ Copied video.")
# 3. Clean up lost_media table
with get_db() as conn:
conn.execute("DELETE FROM lost_media WHERE video_id = ?", (vid_id,))
conn.commit()
return jsonify({"success": True, "message": "Force import successful"})
except Exception as e:
log(f" ❌ Force import failed: {e}")
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
# Start scheduler in background thread
thread = threading.Thread(target=scheduler, daemon=True)

View file

@ -234,6 +234,12 @@
id="badge-redundant">0</span>
</button>
</li>
<li class="nav-item">
<button class="nav-link text-white" data-bs-toggle="tab" data-bs-target="#tab-lost">
<i class="bi bi-question-circle"></i> Lost Media <span
class="badge bg-secondary ms-1" id="badge-lost">0</span>
</button>
</li>
</ul>
<div class="tab-content">
@ -309,6 +315,31 @@
</table>
</div>
</div>
<!-- Lost Media Files -->
<div class="tab-pane fade" id="tab-lost">
<p class="text-warning small"><strong>VIDEO DELETED:</strong> These files were not found
on YouTube.
You can Force Import them using offline metadata or Delete them.</p>
<div class="table-responsive" style="max-height: 400px;">
<table class="table table-dark table-striped table-hover mb-0">
<thead>
<tr>
<th>Video ID</th>
<th>Filename</th>
<th>Size</th>
<th>Action</th>
</tr>
</thead>
<tbody id="tbody-lost">
<tr>
<td colspan="4" class="text-center text-muted">Click Scan to begin...
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
@ -442,40 +473,49 @@
recoveryModal.show();
}
let recoveryPollInterval = null;
async function scanRecoveryFiles() {
const loadingRow = '<tr><td colspan="4" class="text-center"><div class="spinner-border text-primary" role="status"></div> Scanning in background... (This may take a minute)</td></tr>';
const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant'];
const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost'];
ids.forEach(id => {
const el = document.getElementById(id);
if (el) el.innerHTML = loadingRow;
});
// CLEAR EXISTING INTERVAL IF ANY
if (recoveryPollInterval) clearInterval(recoveryPollInterval);
try {
// 1. Kick off the scan
await fetch('/api/recovery/scan', { method: 'POST' });
// 2. Poll for results
const pollInterval = setInterval(async () => {
recoveryPollInterval = setInterval(async () => {
try {
const res = await fetch('/api/recovery/poll');
const state = await res.json();
if (state.status === 'done') {
clearInterval(pollInterval);
clearInterval(recoveryPollInterval);
recoveryPollInterval = null;
renderResults(state.results);
} else if (state.status === 'error') {
clearInterval(pollInterval);
clearInterval(recoveryPollInterval);
recoveryPollInterval = null;
alert("Scan Error: " + state.results);
resetTables("Error: " + state.results);
} else if (state.status === 'idle') {
// Scan state lost (server restart?)
clearInterval(pollInterval);
clearInterval(recoveryPollInterval);
recoveryPollInterval = null;
alert("Scan state lost (Server Restarted?). Please try again.");
resetTables("Scan stopped / State lost.");
}
// If 'scanning', keep polling...
} catch (e) {
clearInterval(pollInterval);
clearInterval(recoveryPollInterval);
recoveryPollInterval = null;
console.error("Poll error", e);
}
}, 2000);
@ -486,7 +526,7 @@
}
function resetTables(msg) {
const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant'];
const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost'];
ids.forEach(id => {
const el = document.getElementById(id);
if (el) el.innerHTML = `<tr><td colspan="4" class="text-center text-muted">${msg}</td></tr>`;
@ -502,7 +542,7 @@
<td><code>${f.video_id}</code></td>
<td title="${f.path}"><small>${f.filename}</small></td>
<td>${f.size_mb} MB</td>
<td><button class="btn btn-sm btn-success" onclick="startRecovery('${cleanPath}')"><i class="bi bi-cloud-arrow-up"></i> Recover</button></td>
<td><button class="btn btn-sm btn-success" onclick="startRecovery('${cleanPath}', this)"><i class="bi bi-cloud-arrow-up"></i> Recover</button></td>
</tr>`;
}
if (type === 'rescue') {
@ -510,7 +550,7 @@
<td><code>${f.video_id}</code></td>
<td title="${f.path}"><small>${f.filename}</small></td>
<td class="text-danger small">Missing from: ${f.ta_source}</td>
<td><button class="btn btn-sm btn-danger" onclick="startRecovery('${cleanPath}')"><i class="bi bi-life-preserver"></i> RESCUE</button></td>
<td><button class="btn btn-sm btn-danger" onclick="startRecovery('${cleanPath}', this)"><i class="bi bi-life-preserver"></i> RESCUE</button></td>
</tr>`;
}
if (type === 'redundant') {
@ -521,28 +561,84 @@
<td><button class="btn btn-sm btn-outline-secondary" onclick="deleteFile('${cleanPath}')"><i class="bi bi-trash"></i> Delete</button></td>
</tr>`;
}
if (type === 'lost') {
return `<tr>
<td><code>${f.video_id}</code></td>
<td title="${f.path}"><small>${f.filename}</small></td>
<td>${f.size_mb} MB</td>
<td>
<button class="btn btn-sm btn-warning" onclick="forceImport('${cleanPath}')" title="Generate offline metadata"><i class="bi bi-lightning-charge"></i> Force</button>
<button class="btn btn-sm btn-outline-danger ms-1" onclick="deleteFile('${cleanPath}')"><i class="bi bi-trash"></i></button>
</td>
</tr>`;
}
};
const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant'];
ids.forEach(id => document.getElementById(id).innerHTML = '');
const ids = ['tbody-unindexed', 'tbody-rescue', 'tbody-redundant', 'tbody-lost'];
ids.forEach(id => {
const el = document.getElementById(id);
if (el) el.innerHTML = '';
});
// Update Badges
document.getElementById('badge-unindexed').innerText = data.unindexed.length;
document.getElementById('badge-rescue').innerText = data.rescue.length;
document.getElementById('badge-redundant').innerText = data.redundant.length;
document.getElementById('badge-unindexed').innerText = data.unindexed ? data.unindexed.length : 0;
document.getElementById('badge-rescue').innerText = data.rescue ? data.rescue.length : 0;
document.getElementById('badge-redundant').innerText = data.redundant ? data.redundant.length : 0;
document.getElementById('badge-lost').innerText = data.lost ? data.lost.length : 0;
// Populate
data.unindexed.forEach(f => document.getElementById('tbody-unindexed').innerHTML += renderRow(f, 'unindexed'));
data.rescue.forEach(f => document.getElementById('tbody-rescue').innerHTML += renderRow(f, 'rescue'));
data.redundant.forEach(f => document.getElementById('tbody-redundant').innerHTML += renderRow(f, 'redundant'));
// Populate - OPTIMIZED (Build string once)
const unindexedRows = (data.unindexed || []).map(f => renderRow(f, 'unindexed')).join('');
document.getElementById('tbody-unindexed').innerHTML = unindexedRows;
if (data.unindexed.length === 0) document.getElementById('tbody-unindexed').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No unindexed files found.</td></tr>';
if (data.rescue.length === 0) document.getElementById('tbody-rescue').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No rescue candidates found.</td></tr>';
if (data.redundant.length === 0) document.getElementById('tbody-redundant').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No duplicates found.</td></tr>';
const rescueRows = (data.rescue || []).map(f => renderRow(f, 'rescue')).join('');
document.getElementById('tbody-rescue').innerHTML = rescueRows;
const redundantRows = (data.redundant || []).map(f => renderRow(f, 'redundant')).join('');
document.getElementById('tbody-redundant').innerHTML = redundantRows;
const lostRows = (data.lost || []).map(f => renderRow(f, 'lost')).join('');
if (document.getElementById('tbody-lost')) document.getElementById('tbody-lost').innerHTML = lostRows;
if (!data.unindexed || data.unindexed.length === 0) document.getElementById('tbody-unindexed').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No unindexed files found.</td></tr>';
if (!data.rescue || data.rescue.length === 0) document.getElementById('tbody-rescue').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No rescue candidates found.</td></tr>';
if (!data.redundant || data.redundant.length === 0) document.getElementById('tbody-redundant').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No duplicates found.</td></tr>';
if ((!data.lost || data.lost.length === 0) && document.getElementById('tbody-lost')) document.getElementById('tbody-lost').innerHTML = '<tr><td colspan="4" class="text-center text-muted">No lost media found.</td></tr>';
}
async function startRecovery(filepath) {
async function forceImport(filepath) {
if (!confirm("FORCE IMPORT: Use offline metadata?\n\nThis will import the file even if it is deleted/private on YouTube. Metadata might be incomplete.")) return;
try {
const res = await fetch('/api/recovery/force', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ filepath })
});
const data = await res.json();
if (data.success) {
alert("Force import successful! Refreshing list...");
scanRecoveryFiles();
} else {
alert("Error: " + (data.error || data.message));
}
} catch (e) { alert("Error: " + e); }
}
async function startRecovery(filepath, btn) {
console.log("startRecovery clicked for:", filepath);
if (!confirm("Start recovery for this file? This will try to fetch metadata and move it to the Import folder.")) return;
// Show loading state
// If btn is not passed (legacy call), try to find it via event, closely.
if (!btn && typeof event !== 'undefined' && event) {
btn = event.target.closest('button');
}
const originalHtml = btn ? btn.innerHTML : 'Recover';
if (btn) {
btn.innerHTML = '<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> ...';
btn.disabled = true;
}
try {
const res = await fetch('/api/recovery/start', {
method: 'POST',
@ -550,8 +646,20 @@
body: JSON.stringify({ filepath })
});
const data = await res.json();
alert(data.message || "Recovery started! Check logs.");
} catch (e) { alert("Error: " + e); }
alert(data.message);
// Refresh the list to reflect changes (e.g. moved to Lost Media)
scanRecoveryFiles();
} catch (e) {
alert("Error: " + e);
} finally {
if (btn) {
btn.innerHTML = originalHtml;
btn.disabled = false;
}
}
}
async function deleteFile(filepath) {