import os, re, json, h5py from pathlib import Path from datetime import datetime from tqdm import tqdm DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")] POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv") OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json") SAMPLE_RATE = 200 def load_pos(): positions = {} if not POSITIONS_CSV.exists(): return {} with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f: lines = f.readlines() if len(lines) < 5: return {} headers = lines[3].strip().split(',') try: ni = headers.index('NodeCode') ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting') oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing') di = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else -1 except: return {} for line in lines[4:]: parts = line.strip().split(',') try: nid = parts[ni].strip() positions[nid] = { 'easting': float(parts[ei]), 'northing': float(parts[oi]), 'depth': float(parts[di]) if di != -1 else 0.0 } except: continue return positions def scan(): pos = load_pos() index = {} file_count = 0 print(f"Scanning H5 files... Positions loaded: {len(pos)}") all_files = [] for root in DATA_ROOTS: all_files.extend(list(root.rglob("*.h5"))) for h5_path in tqdm(all_files): try: match = re.search(r'_b(\d+)_', h5_path.name) if not match: continue nid = match.group(1) with h5py.File(h5_path, 'r') as f: if 'adc_values' not in f: continue ds = f['adc_values'] start_ts = int(ds.attrs.get('timestamp', 0)) if start_ts == 0: continue duration = ds.shape[0] / SAMPLE_RATE end_ts = start_ts + duration if nid not in index: index[nid] = { 'id': nid, 'position': pos.get(nid), 'files': [] } index[nid]['files'].append({ 'path': str(h5_path), 'start': start_ts, 'end': end_ts, 'channels': ['ch0', 'ch1', 'ch2', 'ch3'] }) file_count += 1 except: continue # Sauvegarder l'index with open(OUTPUT_INDEX, 'w') as f: json.dump({ 'generated_at': datetime.now().isoformat(), 'sample_rate_hz': SAMPLE_RATE, 'nodes': index }, f) print(f"Index généré: {file_count} fichiers, {len(index)} nodes avec positions.") if __name__ == '__main__': scan()