Files
seisee/index_time_ranges_v2.py

88 lines
3.0 KiB
Python

import os, re, json, h5py
from pathlib import Path
from datetime import datetime
from tqdm import tqdm
DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")]
POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv")
OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json")
SAMPLE_RATE = 200
def load_pos():
positions = {}
if not POSITIONS_CSV.exists(): return {}
with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f:
lines = f.readlines()
if len(lines) < 5: return {}
headers = lines[3].strip().split(',')
try:
ni = headers.index('NodeCode')
ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting')
oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing')
di = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else -1
except: return {}
for line in lines[4:]:
parts = line.strip().split(',')
try:
nid = parts[ni].strip()
positions[nid] = {
'easting': float(parts[ei]),
'northing': float(parts[oi]),
'depth': float(parts[di]) if di != -1 else 0.0
}
except: continue
return positions
def scan():
pos = load_pos()
index = {}
file_count = 0
print(f"Scanning H5 files... Positions loaded: {len(pos)}")
all_files = []
for root in DATA_ROOTS:
all_files.extend(list(root.rglob("*.h5")))
for h5_path in tqdm(all_files):
try:
match = re.search(r'_b(\d+)_', h5_path.name)
if not match: continue
nid = match.group(1)
with h5py.File(h5_path, 'r') as f:
if 'adc_values' not in f: continue
ds = f['adc_values']
start_ts = int(ds.attrs.get('timestamp', 0))
if start_ts == 0: continue
duration = ds.shape[0] / SAMPLE_RATE
end_ts = start_ts + duration
if nid not in index:
index[nid] = {
'id': nid,
'position': pos.get(nid),
'files': []
}
index[nid]['files'].append({
'path': str(h5_path),
'start': start_ts,
'end': end_ts,
'channels': ['ch0', 'ch1', 'ch2', 'ch3']
})
file_count += 1
except: continue
# Sauvegarder l'index
with open(OUTPUT_INDEX, 'w') as f:
json.dump({
'generated_at': datetime.now().isoformat(),
'sample_rate_hz': SAMPLE_RATE,
'nodes': index
}, f)
print(f"Index généré: {file_count} fichiers, {len(index)} nodes avec positions.")
if __name__ == '__main__': scan()