import os, re, json, csv from pathlib import Path from datetime import datetime from tqdm import tqdm FILENAME_PATTERN = re.compile(r'_b(\d+)_.*?_(\d{10})\.h5$', re.IGNORECASE) DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")] POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv") OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json") def load_pos(): positions = {} if not POSITIONS_CSV.exists(): return {} with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f: lines = f.readlines() if len(lines) < 5: return {} headers = lines[3].strip().split(',') try: ni = headers.index('NodeCode') ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting') oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing') di = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else -1 except: return {} for line in lines[4:]: parts = line.strip().split(',') try: nid = parts[ni].strip() positions[nid] = {'easting': float(parts[ei]), 'northing': float(parts[oi]), 'depth': float(parts[di]) if di != -1 else 0.0} except: continue return positions def scan(): pos = load_pos() index = {} file_count = 0 for root in DATA_ROOTS: print(f"Scanning {root}...") for h5_file in root.rglob("*.h5"): match = FILENAME_PATTERN.search(h5_file.name) if not match: continue nid, ts = match.group(1), int(match.group(2)) # Utilisation de la date du dossier parent si possible, sinon du timestamp date_str = datetime.fromtimestamp(ts).strftime('%Y-%m-%d') # Forcer la date du dossier (plus fiable pour l'utilisateur) for p in h5_file.parents: if re.match(r'2020-09-\d{2}', p.name): date_str = p.name break if nid not in index: index[nid] = {'id': nid, 'position': pos.get(nid), 'dates': {}, 'hasDates': True} if date_str not in index[nid]['dates']: index[nid]['dates'][date_str] = [] index[nid]['dates'][date_str].append({'path': str(h5_file), 'timestamp': ts, 'channels': ['ch0', 'ch1', 'ch2', 'ch3']}) file_count += 1 for nid, p in pos.items(): if nid not in index: index[nid] = {'id': nid, 'position': p, 'dates': {}, 'hasDates': False} full = { 'generated_at': datetime.now().isoformat(), 'sample_rate_hz': 200, 'nodes': index, 'dates': sorted(list(set(d for n in index.values() for d in n['dates'].keys()))) } with open(OUTPUT_INDEX, 'w') as f: json.dump(full, f, indent=2) print(f"Index: {file_count} files, {len(index)} nodes, {len(full['dates'])} dates.") if __name__ == '__main__': scan()