Implement Advanced Recovery with Safety Checks (Redundant vs Rescue detection)
This commit is contained in:
parent
07a7dd3c07
commit
ee7d07618d
1 changed files with 107 additions and 35 deletions
134
ta_symlink.py
134
ta_symlink.py
|
|
@ -459,47 +459,94 @@ def extract_id_from_filename(filename):
|
|||
|
||||
def scan_for_unindexed_videos():
|
||||
"""
|
||||
Scans SOURCE_DIR for files that are NOT in the TubeArchivist database/metadata.
|
||||
Returns a list of candidate files for recovery.
|
||||
Scans both SOURCE_DIR and TARGET_DIR for files.
|
||||
Classifies them as:
|
||||
- unindexed: Not in TA DB (Needs Import)
|
||||
- redundant: In TA DB AND Source exists (Safe Duplicate)
|
||||
- rescue: In TA DB BUT Source missing (Needs Rescue/Import)
|
||||
"""
|
||||
log("🔍 Scanning for unindexed files...")
|
||||
log("🔍 Scanning for unindexed and legacy files...")
|
||||
|
||||
# 1. Fetch current known IDs
|
||||
video_map = fetch_all_metadata()
|
||||
# 1. Fetch current known IDs and their source paths
|
||||
video_map = fetch_all_metadata() # {id: {path: ..., ...}}
|
||||
known_ids = set(video_map.keys())
|
||||
|
||||
unindexed = []
|
||||
results = {
|
||||
"unindexed": [],
|
||||
"redundant": [],
|
||||
"rescue": []
|
||||
}
|
||||
|
||||
if not SOURCE_DIR.exists():
|
||||
return []
|
||||
# Helper to check if file is video
|
||||
def is_video(f):
|
||||
return f.suffix.lower() in ['.mp4', '.mkv', '.webm', '.mov']
|
||||
|
||||
for channel_path in SOURCE_DIR.iterdir():
|
||||
if not channel_path.is_dir():
|
||||
continue
|
||||
# --- Scan SOURCE_DIR (Standard Orphan Check) ---
|
||||
if SOURCE_DIR.exists():
|
||||
for channel_path in SOURCE_DIR.iterdir():
|
||||
if not channel_path.is_dir(): continue
|
||||
for video_file in channel_path.glob("*.*"):
|
||||
if not is_video(video_file): continue
|
||||
|
||||
for video_file in channel_path.glob("*.*"):
|
||||
# Skip non-video files broadly (adjust extensions if needed)
|
||||
if video_file.suffix.lower() not in ['.mp4', '.mkv', '.webm', '.mov']:
|
||||
continue
|
||||
vid_id = extract_id_from_filename(video_file.name)
|
||||
if vid_id and vid_id not in known_ids:
|
||||
results["unindexed"].append({
|
||||
"path": str(video_file),
|
||||
"filename": video_file.name,
|
||||
"video_id": vid_id,
|
||||
"type": "source_orphan",
|
||||
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
||||
})
|
||||
|
||||
# Try to identify
|
||||
vid_id = extract_id_from_filename(video_file.name)
|
||||
# --- Scan TARGET_DIR (Legacy "Pinchflat" Check) ---
|
||||
if TARGET_DIR.exists():
|
||||
for channel_path in TARGET_DIR.iterdir():
|
||||
if not channel_path.is_dir(): continue
|
||||
for video_file in channel_path.glob("*.*"):
|
||||
if not is_video(video_file): continue
|
||||
|
||||
# If we found an ID and it's NOT in known_ids
|
||||
if vid_id and vid_id not in known_ids:
|
||||
unindexed.append({
|
||||
"path": str(video_file),
|
||||
"filename": video_file.name,
|
||||
"video_id": vid_id,
|
||||
"channel_folder": channel_path.name,
|
||||
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
||||
})
|
||||
elif not vid_id:
|
||||
# File without ID? Maybe worth listing too
|
||||
pass
|
||||
# We only care about REAL files, not symlinks
|
||||
if video_file.is_symlink():
|
||||
continue
|
||||
|
||||
log(f"✅ Found {len(unindexed)} unindexed video files.")
|
||||
return unindexed
|
||||
vid_id = extract_id_from_filename(video_file.name)
|
||||
|
||||
# Case 1: ID NOT in TA -> Recoverable
|
||||
if vid_id and vid_id not in known_ids:
|
||||
results["unindexed"].append({
|
||||
"path": str(video_file),
|
||||
"filename": video_file.name,
|
||||
"video_id": vid_id,
|
||||
"type": "target_realfile",
|
||||
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
||||
})
|
||||
|
||||
# Case 2: ID IS in TA
|
||||
elif vid_id:
|
||||
# Check if TA's source file actually exists
|
||||
ta_source_path = Path(video_map[vid_id]['filesystem_path'])
|
||||
|
||||
if ta_source_path.exists():
|
||||
# TA has it, Source exists. This file is REDUNDANT.
|
||||
results["redundant"].append({
|
||||
"path": str(video_file),
|
||||
"filename": video_file.name,
|
||||
"video_id": vid_id,
|
||||
"ta_source": str(ta_source_path),
|
||||
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
||||
})
|
||||
else:
|
||||
# TA has it, BUT source is MISSING. This file is a RESCUE candidate.
|
||||
results["rescue"].append({
|
||||
"path": str(video_file),
|
||||
"filename": video_file.name,
|
||||
"video_id": vid_id,
|
||||
"ta_source": str(ta_source_path), # Missing path
|
||||
"size_mb": round(video_file.stat().st_size / (1024 * 1024), 2)
|
||||
})
|
||||
|
||||
log(f"✅ Scan complete. Unindexed: {len(results['unindexed'])}, Redundant: {len(results['redundant'])}, Rescue: {len(results['rescue'])}")
|
||||
return results
|
||||
|
||||
def recover_video_metadata(filepath):
|
||||
"""
|
||||
|
|
@ -877,6 +924,31 @@ def api_recovery_start():
|
|||
threading.Thread(target=run_recovery).start()
|
||||
return jsonify({"message": "Recovery started", "status": "started"})
|
||||
|
||||
@app.route("/api/recovery/delete", methods=["POST"])
|
||||
@requires_auth
|
||||
def api_recovery_delete():
|
||||
data = request.get_json()
|
||||
filepath = data.get('filepath')
|
||||
|
||||
if not filepath:
|
||||
return jsonify({"error": "No filepath provided"}), 400
|
||||
|
||||
p = Path(filepath)
|
||||
if not p.exists() or not p.is_file():
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
# Safety Check: Never delete anything from SOURCE_DIR via this endpoint
|
||||
if str(SOURCE_DIR) in str(p.resolve()):
|
||||
return jsonify({"error": "Safety Block: Cannot delete files from Source Config."}), 403
|
||||
|
||||
try:
|
||||
p.unlink()
|
||||
log(f"🗑️ Deleted redundant file: {filepath}")
|
||||
return jsonify({"success": True, "message": "File deleted"})
|
||||
except Exception as e:
|
||||
log(f"❌ Delete failed: {e}")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Start scheduler in background thread
|
||||
thread = threading.Thread(target=scheduler, daemon=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue