Fix coverage: add /api/coverage route, remove stray gather code from loadCoverage

2026-02-19 14:53:10 +01:00
parent 61b25ab734
commit bbd6a22b57
80 changed files with 27884 additions and 1 deletions
--- a/scripts/inventory_h5.py
+++ b/scripts/inventory_h5.py
@@ -0,0 +1,158 @@
+"""
+Script d'inventaire des fichiers HDF5.
+Extrait les timestamps des noms de fichiers et génère un rapport.
+"""
+
+import os
+import re
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+
+# Dossiers racine
+DATA_ROOTS = [
+    Path(r"F:\2020-09-12"),
+    Path(r"F:\2020-09-13"),
+    Path(r"F:\2020-09-14"),
+    Path(r"F:\2020-09-15"),
+    Path(r"F:\2020-09-16"),
+    Path(r"F:\2020-09-17"),
+    Path(r"F:\2020-09-18"),
+    Path(r"F:\2020-09-19"),
+    Path(r"F:\2020-09-21"),
+    Path(r"F:\2020-09-22"),
+    Path(r"F:\2020-09-23"),
+]
+
+# Pattern pour extraire node_id et timestamp
+# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
+PATTERN = re.compile(r'_b(\d+)_.*?(\d{10})\.h5$', re.IGNORECASE)
+
+
+def main():
+    print("=" * 70)
+    print("INVENTAIRE DES FICHIERS HDF5")
+    print("=" * 70)
+    
+    # Structure: folder -> node_id -> list of (timestamp, filename, type)
+    inventory = defaultdict(lambda: defaultdict(list))
+    
+    # Stats globales
+    total_files = 0
+    total_size = 0
+    nodes_set = set()
+    timestamps_set = set()
+    
+    for root in DATA_ROOTS:
+        if not root.exists():
+            continue
+        
+        folder_name = root.name
+        
+        for h5_file in root.rglob("*.h5"):
+            match = PATTERN.search(h5_file.name)
+            if not match:
+                continue
+            
+            node_id = match.group(1)
+            timestamp = int(match.group(2))
+            
+            # Déterminer le type (data ou aux)
+            file_type = "data" if "_data_" in h5_file.name else "aux" if "_aux_" in h5_file.name else "unknown"
+            
+            # Extraire le channel si présent
+            ch_match = re.search(r'_ch(\d+)_', h5_file.name)
+            channel = f"ch{ch_match.group(1)}" if ch_match else "?"
+            
+            file_size = h5_file.stat().st_size
+            
+            inventory[folder_name][node_id].append({
+                'timestamp': timestamp,
+                'datetime': datetime.fromtimestamp(timestamp),
+                'type': file_type,
+                'channel': channel,
+                'filename': h5_file.name,
+                'size': file_size
+            })
+            
+            total_files += 1
+            total_size += file_size
+            nodes_set.add(node_id)
+            timestamps_set.add(timestamp)
+    
+    # Rapport par dossier
+    print(f"\n{'DOSSIER':<15} {'NODES':<10} {'FICHIERS':<10} {'TAILLE':<15}")
+    print("-" * 50)
+    
+    for folder in sorted(inventory.keys()):
+        folder_data = inventory[folder]
+        n_nodes = len(folder_data)
+        n_files = sum(len(files) for files in folder_data.values())
+        folder_size = sum(f['size'] for files in folder_data.values() for f in files)
+        print(f"{folder:<15} {n_nodes:<10} {n_files:<10} {folder_size / 1e9:.2f} GB")
+    
+    # Stats globales
+    print("\n" + "=" * 70)
+    print("STATISTIQUES GLOBALES")
+    print("=" * 70)
+    print(f"Fichiers H5 totaux: {total_files}")
+    print(f"Taille totale: {total_size / 1e9:.2f} GB")
+    print(f"Nodes uniques: {len(nodes_set)}")
+    
+    # Plage temporelle
+    if timestamps_set:
+        min_ts = min(timestamps_set)
+        max_ts = max(timestamps_set)
+        print(f"\nPlage temporelle des données:")
+        print(f"  Début: {datetime.fromtimestamp(min_ts)} (timestamp: {min_ts})")
+        print(f"  Fin:   {datetime.fromtimestamp(max_ts)} (timestamp: {max_ts})")
+    
+    # Détail par node (top 20)
+    print("\n" + "=" * 70)
+    print("DETAIL PAR NODE (nodes avec le plus de fichiers)")
+    print("=" * 70)
+    
+    # Agréger par node
+    node_stats = defaultdict(lambda: {'files': 0, 'size': 0, 'timestamps': set(), 'folders': set()})
+    
+    for folder, folder_data in inventory.items():
+        for node_id, files in folder_data.items():
+            node_stats[node_id]['files'] += len(files)
+            node_stats[node_id]['size'] += sum(f['size'] for f in files)
+            node_stats[node_id]['timestamps'].update(f['timestamp'] for f in files)
+            node_stats[node_id]['folders'].add(folder)
+    
+    # Trier par nombre de fichiers
+    sorted_nodes = sorted(node_stats.items(), key=lambda x: x[1]['files'], reverse=True)
+    
+    print(f"\n{'NODE':<8} {'FICHIERS':<10} {'TAILLE':<12} {'DATES':<25} {'DOSSIERS'}")
+    print("-" * 90)
+    
+    for node_id, stats in sorted_nodes[:30]:
+        ts_list = sorted(stats['timestamps'])
+        if ts_list:
+            date_range = f"{datetime.fromtimestamp(ts_list[0]).strftime('%Y-%m-%d %H:%M')} -> {datetime.fromtimestamp(ts_list[-1]).strftime('%H:%M')}"
+        else:
+            date_range = "N/A"
+        
+        folders = ", ".join(sorted(stats['folders']))
+        print(f"b{node_id:<7} {stats['files']:<10} {stats['size']/1e6:.1f} MB    {date_range:<25} {folders}")
+    
+    # Dates uniques (jours)
+    print("\n" + "=" * 70)
+    print("JOURS DE DONNEES DISPONIBLES (basé sur timestamps)")
+    print("=" * 70)
+    
+    days = set()
+    for ts in timestamps_set:
+        days.add(datetime.fromtimestamp(ts).strftime('%Y-%m-%d'))
+    
+    for day in sorted(days):
+        # Compter les fichiers pour ce jour
+        day_files = sum(1 for ts in timestamps_set 
+                       if datetime.fromtimestamp(ts).strftime('%Y-%m-%d') == day)
+        print(f"  {day}: ~{day_files} timestamps uniques")
+
+
+if __name__ == '__main__':
+    main()