Files
seisee/scripts/index_h5_2026.py

65 lines
2.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Indexation des fichiers H5 format 2026 avec métadonnées complètes.
Génère un index JSON pour le viewer web.
"""
import h5py
import json
from pathlib import Path
from datetime import datetime
H5_DIR = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5')
OUTPUT = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5_index.json')
def index_h5_files():
files = []
for h5_file in sorted(H5_DIR.glob('*.h5')):
try:
with h5py.File(h5_file, 'r') as f:
meta = f['metadata']
# Extraire node ID du nom de fichier (rsn[0-9]+)
import re
match = re.search(r'rsn(\d+)', h5_file.name)
node_id = match.group(1) if match else 'unknown'
# Extraire date du nom (YYMMDD)
match_date = re.search(r'_(\d{6})_', h5_file.name)
date_str = match_date.group(1) if match_date else ''
files.append({
'filename': h5_file.name,
'path': str(h5_file),
'node_id': node_id,
'date': date_str,
'duration_sec': float(meta.attrs['duration_sec']),
'sample_rate': int(meta.attrs['sample_rate_hz']),
'channels': int(meta.attrs['n_channels']),
'samples': int(meta.attrs['n_samples']),
'size_mb': round(h5_file.stat().st_size / (1024*1024), 2),
'channel_info': [
{'id': 1, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 1'},
{'id': 2, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 2'},
{'id': 3, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 3'},
{'id': 4, 'type': 'hydrophone', 'unit': 'Pa', 'name': 'Hydrophone'}
]
})
except Exception as e:
print(f'Error indexing {h5_file.name}: {e}')
index = {
'generated': datetime.now().isoformat(),
'total_files': len(files),
'total_duration_hours': sum(f['duration_sec'] for f in files) / 3600,
'files': files
}
OUTPUT.write_text(json.dumps(index, indent=2))
print(f'✅ Indexed {len(files)} files → {OUTPUT}')
print(f'📊 Total duration: {index["total_duration_hours"]:.1f} hours')
if __name__ == '__main__':
index_h5_files()