97 lines
3.4 KiB
Python
97 lines
3.4 KiB
Python
import os, re, json, h5py
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from tqdm import tqdm
|
|
|
|
DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")]
|
|
POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv")
|
|
OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json")
|
|
SAMPLE_RATE = 200
|
|
|
|
def load_pos():
|
|
positions = {}
|
|
if not POSITIONS_CSV.exists(): return {}
|
|
with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f:
|
|
lines = f.readlines()
|
|
if len(lines) < 5: return {}
|
|
headers = lines[3].strip().split(',')
|
|
try:
|
|
ni = headers.index('NodeCode')
|
|
ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting')
|
|
oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing')
|
|
di = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else -1
|
|
except: return {}
|
|
for line in lines[4:]:
|
|
parts = line.strip().split(',')
|
|
try:
|
|
nid = parts[ni].strip()
|
|
positions[nid] = {
|
|
'easting': float(parts[ei]),
|
|
'northing': float(parts[oi]),
|
|
'depth': float(parts[di]) if di != -1 else 0.0
|
|
}
|
|
except: continue
|
|
return positions
|
|
|
|
def scan():
|
|
pos = load_pos()
|
|
nodes = {}
|
|
all_dates = set()
|
|
file_count = 0
|
|
|
|
print("🔍 Scanning all H5 files...")
|
|
all_h5_files = []
|
|
for root in DATA_ROOTS:
|
|
all_h5_files.extend(list(root.rglob("*.h5")))
|
|
|
|
for h5_path in tqdm(all_h5_files):
|
|
try:
|
|
match = re.search(r'_b(\d+)_', h5_path.name)
|
|
if not match: continue
|
|
nid = match.group(1)
|
|
|
|
with h5py.File(h5_path, 'r') as f:
|
|
if 'adc_values' not in f: continue
|
|
ds = f['adc_values']
|
|
start_ts = int(ds.attrs.get('timestamp', 0))
|
|
if start_ts == 0: continue
|
|
|
|
duration = ds.shape[0] / SAMPLE_RATE
|
|
end_ts = start_ts + duration
|
|
|
|
# Ajouter la date à la liste globale
|
|
date_str = datetime.fromtimestamp(start_ts).strftime('%Y-%m-%d')
|
|
all_dates.add(date_str)
|
|
|
|
if nid not in nodes:
|
|
nodes[nid] = {
|
|
'id': nid,
|
|
'position': pos.get(nid),
|
|
'files': []
|
|
}
|
|
|
|
nodes[nid]['files'].append({
|
|
'path': str(h5_path),
|
|
'start': start_ts,
|
|
'end': end_ts,
|
|
'channels': ['ch0', 'ch1', 'ch2', 'ch3']
|
|
})
|
|
file_count += 1
|
|
except: continue
|
|
|
|
# Sauvegarder l'index complet
|
|
result = {
|
|
'generated_at': datetime.now().isoformat(),
|
|
'sample_rate_hz': SAMPLE_RATE,
|
|
'nodes': nodes,
|
|
'dates': sorted(list(all_dates))
|
|
}
|
|
|
|
with open(OUTPUT_INDEX, 'w') as f:
|
|
json.dump(result, f, indent=2)
|
|
|
|
print(f"✅ Indexing complete: {file_count} files, {len(nodes)} nodes, {len(all_dates)} dates.")
|
|
print(f"📅 Dates covered: {sorted(list(all_dates))}")
|
|
|
|
if __name__ == '__main__': scan()
|