Fix coverage: add /api/coverage route, remove stray gather code from loadCoverage
This commit is contained in:
231
scripts/index_h5_files.py
Executable file
231
scripts/index_h5_files.py
Executable file
@@ -0,0 +1,231 @@
|
||||
"""
|
||||
Script d'indexation des fichiers HDF5 sismiques.
|
||||
Parcourt les dossiers de données, extrait les métadonnées (node_id, date, canaux)
|
||||
et génère un index JSON utilisé par l'API backend.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any
|
||||
|
||||
# Pattern pour extraire les infos du nom de fichier
|
||||
# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
|
||||
# ou: auto_255_125334_b4_rsn13696_seq1_1599045513.h5
|
||||
FILENAME_PATTERN = re.compile(
|
||||
r'auto_(\d+)_(\d{6})_b(\d+).*?_(\d{10})\.h5$',
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
# Dossiers racine contenant les données H5
|
||||
DATA_ROOTS = [
|
||||
Path(r"F:\2020-09-12"),
|
||||
Path(r"F:\2020-09-13"),
|
||||
Path(r"F:\2020-09-14"),
|
||||
Path(r"F:\2020-09-15"),
|
||||
Path(r"F:\2020-09-16"),
|
||||
Path(r"F:\2020-09-17"),
|
||||
Path(r"F:\2020-09-18"),
|
||||
Path(r"F:\2020-09-19"),
|
||||
Path(r"F:\2020-09-21"),
|
||||
Path(r"F:\2020-09-22"),
|
||||
Path(r"F:\2020-09-23"),
|
||||
]
|
||||
|
||||
# Fichier CSV des positions
|
||||
POSITIONS_CSV = Path(r"F:\Copie de SETE_AUV_DARFV4-Copier(1).csv")
|
||||
|
||||
# Sortie
|
||||
OUTPUT_INDEX = Path(r"F:\seismic_webapp\data\index.json")
|
||||
|
||||
|
||||
def load_node_positions(csv_path: Path) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Charge les positions des nodes depuis le CSV.
|
||||
Retourne un dict: node_id -> {easting, northing, depth, ...}
|
||||
"""
|
||||
positions = {}
|
||||
|
||||
with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
# Sauter les premières lignes d'en-tête (lignes 1-4)
|
||||
lines = f.readlines()
|
||||
|
||||
# La ligne 4 (index 3) contient les vrais en-têtes
|
||||
if len(lines) < 5:
|
||||
return positions
|
||||
|
||||
header_line = lines[3]
|
||||
headers = header_line.strip().split(',')
|
||||
|
||||
# Trouver les indices des colonnes importantes
|
||||
# Utiliser Aslaid (positions réelles mesurées) plutôt que Preplot (planifiées)
|
||||
try:
|
||||
node_code_idx = headers.index('NodeCode')
|
||||
# Priorité aux positions Aslaid (réelles), sinon Preplot (planifiées)
|
||||
if 'Aslaid Easting' in headers:
|
||||
easting_idx = headers.index('Aslaid Easting')
|
||||
northing_idx = headers.index('Aslaid Northing')
|
||||
depth_idx = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else None
|
||||
print("Utilisation des coordonnées Aslaid (positions réelles)")
|
||||
else:
|
||||
easting_idx = headers.index('Preplot Easting')
|
||||
northing_idx = headers.index('Preplot Northing')
|
||||
depth_idx = headers.index('Preplot Depth') if 'Preplot Depth' in headers else None
|
||||
print("Utilisation des coordonnées Preplot (positions planifiées)")
|
||||
except ValueError as e:
|
||||
print(f"Colonne manquante dans le CSV: {e}")
|
||||
# Fallback sur indices connus (Aslaid)
|
||||
node_code_idx = 3
|
||||
easting_idx = 9 # Aslaid Easting
|
||||
northing_idx = 10 # Aslaid Northing
|
||||
depth_idx = 11 # Aslaid Depth
|
||||
|
||||
# Parser les lignes de données (à partir de la ligne 5)
|
||||
for line in lines[4:]:
|
||||
parts = line.strip().split(',')
|
||||
if len(parts) <= max(node_code_idx, easting_idx, northing_idx):
|
||||
continue
|
||||
|
||||
node_code = parts[node_code_idx].strip()
|
||||
if not node_code or node_code == '':
|
||||
continue
|
||||
|
||||
try:
|
||||
easting = float(parts[easting_idx]) if parts[easting_idx] else None
|
||||
northing = float(parts[northing_idx]) if parts[northing_idx] else None
|
||||
depth = float(parts[depth_idx]) if depth_idx and parts[depth_idx] else 0.0
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
if easting and northing:
|
||||
positions[node_code] = {
|
||||
'easting': easting,
|
||||
'northing': northing,
|
||||
'depth': depth,
|
||||
}
|
||||
|
||||
print(f"Chargé {len(positions)} positions de nodes")
|
||||
return positions
|
||||
|
||||
|
||||
def scan_h5_files(data_roots: List[Path]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parcourt les dossiers et indexe tous les fichiers H5.
|
||||
Retourne un dict structuré par node_id -> date -> fichiers
|
||||
"""
|
||||
index = {}
|
||||
file_count = 0
|
||||
|
||||
for root in data_roots:
|
||||
if not root.exists():
|
||||
print(f"Dossier non trouvé: {root}")
|
||||
continue
|
||||
|
||||
print(f"Scan de {root}...")
|
||||
|
||||
for h5_file in root.rglob("*.h5"):
|
||||
match = FILENAME_PATTERN.search(h5_file.name)
|
||||
if not match:
|
||||
# Essayer un pattern plus simple
|
||||
simple_match = re.search(r'_b(\d+)_.*?(\d{10})\.h5$', h5_file.name, re.IGNORECASE)
|
||||
if simple_match:
|
||||
node_id = simple_match.group(1)
|
||||
timestamp = int(simple_match.group(2))
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
node_id = match.group(3)
|
||||
timestamp = int(match.group(4))
|
||||
|
||||
# Convertir timestamp en date
|
||||
dt = datetime.fromtimestamp(timestamp)
|
||||
date_str = dt.strftime('%Y-%m-%d')
|
||||
|
||||
# Détecter les canaux disponibles dans le fichier
|
||||
# Pour l'instant on suppose ch0-ch3 par défaut
|
||||
channels = ['ch0', 'ch1', 'ch2', 'ch3']
|
||||
|
||||
# Structure: node_id -> date -> liste de fichiers
|
||||
if node_id not in index:
|
||||
index[node_id] = {}
|
||||
|
||||
if date_str not in index[node_id]:
|
||||
index[node_id][date_str] = []
|
||||
|
||||
index[node_id][date_str].append({
|
||||
'path': str(h5_file),
|
||||
'timestamp': timestamp,
|
||||
'channels': channels,
|
||||
'size_bytes': h5_file.stat().st_size if h5_file.exists() else 0
|
||||
})
|
||||
|
||||
file_count += 1
|
||||
|
||||
print(f"Indexé {file_count} fichiers H5")
|
||||
return index
|
||||
|
||||
|
||||
def build_full_index(positions: Dict, files_index: Dict) -> Dict[str, Any]:
|
||||
"""
|
||||
Combine les positions et l'index des fichiers.
|
||||
"""
|
||||
full_index = {
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'sample_rate_hz': 200,
|
||||
'nodes': {},
|
||||
'dates': set(),
|
||||
}
|
||||
|
||||
# Fusionner les données
|
||||
all_node_ids = set(files_index.keys()) | set(positions.keys())
|
||||
|
||||
for node_id in all_node_ids:
|
||||
node_data = {
|
||||
'id': node_id,
|
||||
'position': positions.get(node_id, None),
|
||||
'dates': {}
|
||||
}
|
||||
|
||||
if node_id in files_index:
|
||||
node_data['dates'] = files_index[node_id]
|
||||
for date_str in files_index[node_id].keys():
|
||||
full_index['dates'].add(date_str)
|
||||
|
||||
full_index['nodes'][node_id] = node_data
|
||||
|
||||
# Convertir le set en liste triée
|
||||
full_index['dates'] = sorted(list(full_index['dates']))
|
||||
|
||||
return full_index
|
||||
|
||||
|
||||
def main():
|
||||
print("=== Indexation des fichiers HDF5 sismiques ===\n")
|
||||
|
||||
# 1. Charger les positions
|
||||
print("1. Chargement des positions des nodes...")
|
||||
positions = load_node_positions(POSITIONS_CSV)
|
||||
|
||||
# 2. Scanner les fichiers H5
|
||||
print("\n2. Scan des fichiers H5...")
|
||||
files_index = scan_h5_files(DATA_ROOTS)
|
||||
|
||||
# 3. Construire l'index complet
|
||||
print("\n3. Construction de l'index...")
|
||||
full_index = build_full_index(positions, files_index)
|
||||
|
||||
# 4. Sauvegarder
|
||||
print(f"\n4. Sauvegarde vers {OUTPUT_INDEX}...")
|
||||
OUTPUT_INDEX.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(OUTPUT_INDEX, 'w', encoding='utf-8') as f:
|
||||
json.dump(full_index, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\nTerminé! Index généré avec {len(full_index['nodes'])} nodes et {len(full_index['dates'])} dates.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user