Fix coverage: add /api/coverage route, remove stray gather code from loadCoverage

2026-02-19 14:53:10 +01:00
parent 61b25ab734
commit bbd6a22b57
80 changed files with 27884 additions and 1 deletions
--- a/scripts/index_h5_files.py
+++ b/scripts/index_h5_files.py
@@ -0,0 +1,231 @@
+"""
+Script d'indexation des fichiers HDF5 sismiques.
+Parcourt les dossiers de données, extrait les métadonnées (node_id, date, canaux)
+et génère un index JSON utilisé par l'API backend.
+"""
+
+import os
+import re
+import json
+import csv
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List, Any
+
+# Pattern pour extraire les infos du nom de fichier
+# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
+# ou: auto_255_125334_b4_rsn13696_seq1_1599045513.h5
+FILENAME_PATTERN = re.compile(
+    r'auto_(\d+)_(\d{6})_b(\d+).*?_(\d{10})\.h5$',
+    re.IGNORECASE
+)
+
+# Dossiers racine contenant les données H5
+DATA_ROOTS = [
+    Path(r"F:\2020-09-12"),
+    Path(r"F:\2020-09-13"),
+    Path(r"F:\2020-09-14"),
+    Path(r"F:\2020-09-15"),
+    Path(r"F:\2020-09-16"),
+    Path(r"F:\2020-09-17"),
+    Path(r"F:\2020-09-18"),
+    Path(r"F:\2020-09-19"),
+    Path(r"F:\2020-09-21"),
+    Path(r"F:\2020-09-22"),
+    Path(r"F:\2020-09-23"),
+]
+
+# Fichier CSV des positions
+POSITIONS_CSV = Path(r"F:\Copie de SETE_AUV_DARFV4-Copier(1).csv")
+
+# Sortie
+OUTPUT_INDEX = Path(r"F:\seismic_webapp\data\index.json")
+
+
+def load_node_positions(csv_path: Path) -> Dict[str, Dict[str, Any]]:
+    """
+    Charge les positions des nodes depuis le CSV.
+    Retourne un dict: node_id -> {easting, northing, depth, ...}
+    """
+    positions = {}
+    
+    with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
+        # Sauter les premières lignes d'en-tête (lignes 1-4)
+        lines = f.readlines()
+        
+        # La ligne 4 (index 3) contient les vrais en-têtes
+        if len(lines) < 5:
+            return positions
+        
+        header_line = lines[3]
+        headers = header_line.strip().split(',')
+        
+        # Trouver les indices des colonnes importantes
+        # Utiliser Aslaid (positions réelles mesurées) plutôt que Preplot (planifiées)
+        try:
+            node_code_idx = headers.index('NodeCode')
+            # Priorité aux positions Aslaid (réelles), sinon Preplot (planifiées)
+            if 'Aslaid Easting' in headers:
+                easting_idx = headers.index('Aslaid Easting')
+                northing_idx = headers.index('Aslaid Northing')
+                depth_idx = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else None
+                print("Utilisation des coordonnées Aslaid (positions réelles)")
+            else:
+                easting_idx = headers.index('Preplot Easting')
+                northing_idx = headers.index('Preplot Northing')
+                depth_idx = headers.index('Preplot Depth') if 'Preplot Depth' in headers else None
+                print("Utilisation des coordonnées Preplot (positions planifiées)")
+        except ValueError as e:
+            print(f"Colonne manquante dans le CSV: {e}")
+            # Fallback sur indices connus (Aslaid)
+            node_code_idx = 3
+            easting_idx = 9  # Aslaid Easting
+            northing_idx = 10  # Aslaid Northing
+            depth_idx = 11  # Aslaid Depth
+        
+        # Parser les lignes de données (à partir de la ligne 5)
+        for line in lines[4:]:
+            parts = line.strip().split(',')
+            if len(parts) <= max(node_code_idx, easting_idx, northing_idx):
+                continue
+            
+            node_code = parts[node_code_idx].strip()
+            if not node_code or node_code == '':
+                continue
+            
+            try:
+                easting = float(parts[easting_idx]) if parts[easting_idx] else None
+                northing = float(parts[northing_idx]) if parts[northing_idx] else None
+                depth = float(parts[depth_idx]) if depth_idx and parts[depth_idx] else 0.0
+            except (ValueError, IndexError):
+                continue
+            
+            if easting and northing:
+                positions[node_code] = {
+                    'easting': easting,
+                    'northing': northing,
+                    'depth': depth,
+                }
+    
+    print(f"Chargé {len(positions)} positions de nodes")
+    return positions
+
+
+def scan_h5_files(data_roots: List[Path]) -> Dict[str, Any]:
+    """
+    Parcourt les dossiers et indexe tous les fichiers H5.
+    Retourne un dict structuré par node_id -> date -> fichiers
+    """
+    index = {}
+    file_count = 0
+    
+    for root in data_roots:
+        if not root.exists():
+            print(f"Dossier non trouvé: {root}")
+            continue
+        
+        print(f"Scan de {root}...")
+        
+        for h5_file in root.rglob("*.h5"):
+            match = FILENAME_PATTERN.search(h5_file.name)
+            if not match:
+                # Essayer un pattern plus simple
+                simple_match = re.search(r'_b(\d+)_.*?(\d{10})\.h5$', h5_file.name, re.IGNORECASE)
+                if simple_match:
+                    node_id = simple_match.group(1)
+                    timestamp = int(simple_match.group(2))
+                else:
+                    continue
+            else:
+                node_id = match.group(3)
+                timestamp = int(match.group(4))
+            
+            # Convertir timestamp en date
+            dt = datetime.fromtimestamp(timestamp)
+            date_str = dt.strftime('%Y-%m-%d')
+            
+            # Détecter les canaux disponibles dans le fichier
+            # Pour l'instant on suppose ch0-ch3 par défaut
+            channels = ['ch0', 'ch1', 'ch2', 'ch3']
+            
+            # Structure: node_id -> date -> liste de fichiers
+            if node_id not in index:
+                index[node_id] = {}
+            
+            if date_str not in index[node_id]:
+                index[node_id][date_str] = []
+            
+            index[node_id][date_str].append({
+                'path': str(h5_file),
+                'timestamp': timestamp,
+                'channels': channels,
+                'size_bytes': h5_file.stat().st_size if h5_file.exists() else 0
+            })
+            
+            file_count += 1
+    
+    print(f"Indexé {file_count} fichiers H5")
+    return index
+
+
+def build_full_index(positions: Dict, files_index: Dict) -> Dict[str, Any]:
+    """
+    Combine les positions et l'index des fichiers.
+    """
+    full_index = {
+        'generated_at': datetime.now().isoformat(),
+        'sample_rate_hz': 200,
+        'nodes': {},
+        'dates': set(),
+    }
+    
+    # Fusionner les données
+    all_node_ids = set(files_index.keys()) | set(positions.keys())
+    
+    for node_id in all_node_ids:
+        node_data = {
+            'id': node_id,
+            'position': positions.get(node_id, None),
+            'dates': {}
+        }
+        
+        if node_id in files_index:
+            node_data['dates'] = files_index[node_id]
+            for date_str in files_index[node_id].keys():
+                full_index['dates'].add(date_str)
+        
+        full_index['nodes'][node_id] = node_data
+    
+    # Convertir le set en liste triée
+    full_index['dates'] = sorted(list(full_index['dates']))
+    
+    return full_index
+
+
+def main():
+    print("=== Indexation des fichiers HDF5 sismiques ===\n")
+    
+    # 1. Charger les positions
+    print("1. Chargement des positions des nodes...")
+    positions = load_node_positions(POSITIONS_CSV)
+    
+    # 2. Scanner les fichiers H5
+    print("\n2. Scan des fichiers H5...")
+    files_index = scan_h5_files(DATA_ROOTS)
+    
+    # 3. Construire l'index complet
+    print("\n3. Construction de l'index...")
+    full_index = build_full_index(positions, files_index)
+    
+    # 4. Sauvegarder
+    print(f"\n4. Sauvegarde vers {OUTPUT_INDEX}...")
+    OUTPUT_INDEX.parent.mkdir(parents=True, exist_ok=True)
+    
+    with open(OUTPUT_INDEX, 'w', encoding='utf-8') as f:
+        json.dump(full_index, f, indent=2, ensure_ascii=False)
+    
+    print(f"\nTerminé! Index généré avec {len(full_index['nodes'])} nodes et {len(full_index['dates'])} dates.")
+
+
+if __name__ == '__main__':
+    main()