import os import re import json import csv from pathlib import Path from datetime import datetime from tqdm import tqdm # Pattern pour extraire les infos du nom de fichier FILENAME_PATTERN = re.compile(r'auto_.*?_b(\d+)_.*?_(\d{10})\.h5$', re.IGNORECASE) DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")] POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv") OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json") def load_node_positions(csv_path): positions = {} if not csv_path.exists(): return positions with open(csv_path, 'r', encoding='utf-8', errors='replace') as f: lines = f.readlines() if len(lines) < 5: return positions headers = lines[3].strip().split(',') try: node_code_idx = headers.index('NodeCode') easting_idx = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting') northing_idx = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing') except: return positions for line in lines[4:]: parts = line.strip().split(',') try: nid = parts[node_code_idx].strip() positions[nid] = { 'easting': float(parts[easting_idx]), 'northing': float(parts[northing_idx]), 'depth': float(parts[headers.index('Aslaid Depth')]) if 'Aslaid Depth' in headers else 0.0 } except: continue return positions def scan_all(): index = {} pos = load_node_positions(POSITIONS_CSV) print(f"Positions chargées: {len(pos)}") file_count = 0 for root in DATA_ROOTS: print(f"Scan de {root}...") for h5_file in root.rglob("*.h5"): # Extraction ID node et timestamp match = re.search(r'_b(\d+)_.*?(\d{10})\.h5$', h5_file.name) if not match: continue node_id = match.group(1) ts = int(match.group(2)) date_str = datetime.fromtimestamp(ts).strftime('%Y-%m-%d') if node_id not in index: index[node_id] = {'id': node_id, 'position': pos.get(node_id), 'dates': {}, 'hasDates': True} if date_str not in index[node_id]['dates']: index[node_id]['dates'][date_str] = [] index[node_id]['dates'][date_str].append({ 'path': str(h5_file), 'timestamp': ts, 'channels': ['ch0', 'ch1', 'ch2', 'ch3'], 'size_bytes': h5_file.stat().st_size }) file_count += 1 # Ajouter les nodes sans fichiers mais avec position for nid, p in pos.items(): if nid not in index: index[nid] = {'id': nid, 'position': p, 'dates': {}, 'hasDates': False} full_index = { 'generated_at': datetime.now().isoformat(), 'sample_rate_hz': 200, 'nodes': index, 'dates': sorted(list(set(d for n in index.values() for d in n['dates'].keys()))) } with open(OUTPUT_INDEX, 'w') as f: json.dump(full_index, f, indent=2) print(f"Index total généré: {file_count} fichiers, {len(index)} nodes.") if __name__ == '__main__': scan_all()