Fix coverage: add /api/coverage route, remove stray gather code from loadCoverage

This commit is contained in:
Floppyrj45
2026-02-19 14:53:10 +01:00
parent 61b25ab734
commit bbd6a22b57
80 changed files with 27884 additions and 1 deletions

158
scripts/inventory_h5.py Executable file
View File

@@ -0,0 +1,158 @@
"""
Script d'inventaire des fichiers HDF5.
Extrait les timestamps des noms de fichiers et génère un rapport.
"""
import os
import re
from pathlib import Path
from datetime import datetime
from collections import defaultdict
# Dossiers racine
DATA_ROOTS = [
Path(r"F:\2020-09-12"),
Path(r"F:\2020-09-13"),
Path(r"F:\2020-09-14"),
Path(r"F:\2020-09-15"),
Path(r"F:\2020-09-16"),
Path(r"F:\2020-09-17"),
Path(r"F:\2020-09-18"),
Path(r"F:\2020-09-19"),
Path(r"F:\2020-09-21"),
Path(r"F:\2020-09-22"),
Path(r"F:\2020-09-23"),
]
# Pattern pour extraire node_id et timestamp
# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
PATTERN = re.compile(r'_b(\d+)_.*?(\d{10})\.h5$', re.IGNORECASE)
def main():
print("=" * 70)
print("INVENTAIRE DES FICHIERS HDF5")
print("=" * 70)
# Structure: folder -> node_id -> list of (timestamp, filename, type)
inventory = defaultdict(lambda: defaultdict(list))
# Stats globales
total_files = 0
total_size = 0
nodes_set = set()
timestamps_set = set()
for root in DATA_ROOTS:
if not root.exists():
continue
folder_name = root.name
for h5_file in root.rglob("*.h5"):
match = PATTERN.search(h5_file.name)
if not match:
continue
node_id = match.group(1)
timestamp = int(match.group(2))
# Déterminer le type (data ou aux)
file_type = "data" if "_data_" in h5_file.name else "aux" if "_aux_" in h5_file.name else "unknown"
# Extraire le channel si présent
ch_match = re.search(r'_ch(\d+)_', h5_file.name)
channel = f"ch{ch_match.group(1)}" if ch_match else "?"
file_size = h5_file.stat().st_size
inventory[folder_name][node_id].append({
'timestamp': timestamp,
'datetime': datetime.fromtimestamp(timestamp),
'type': file_type,
'channel': channel,
'filename': h5_file.name,
'size': file_size
})
total_files += 1
total_size += file_size
nodes_set.add(node_id)
timestamps_set.add(timestamp)
# Rapport par dossier
print(f"\n{'DOSSIER':<15} {'NODES':<10} {'FICHIERS':<10} {'TAILLE':<15}")
print("-" * 50)
for folder in sorted(inventory.keys()):
folder_data = inventory[folder]
n_nodes = len(folder_data)
n_files = sum(len(files) for files in folder_data.values())
folder_size = sum(f['size'] for files in folder_data.values() for f in files)
print(f"{folder:<15} {n_nodes:<10} {n_files:<10} {folder_size / 1e9:.2f} GB")
# Stats globales
print("\n" + "=" * 70)
print("STATISTIQUES GLOBALES")
print("=" * 70)
print(f"Fichiers H5 totaux: {total_files}")
print(f"Taille totale: {total_size / 1e9:.2f} GB")
print(f"Nodes uniques: {len(nodes_set)}")
# Plage temporelle
if timestamps_set:
min_ts = min(timestamps_set)
max_ts = max(timestamps_set)
print(f"\nPlage temporelle des données:")
print(f" Début: {datetime.fromtimestamp(min_ts)} (timestamp: {min_ts})")
print(f" Fin: {datetime.fromtimestamp(max_ts)} (timestamp: {max_ts})")
# Détail par node (top 20)
print("\n" + "=" * 70)
print("DETAIL PAR NODE (nodes avec le plus de fichiers)")
print("=" * 70)
# Agréger par node
node_stats = defaultdict(lambda: {'files': 0, 'size': 0, 'timestamps': set(), 'folders': set()})
for folder, folder_data in inventory.items():
for node_id, files in folder_data.items():
node_stats[node_id]['files'] += len(files)
node_stats[node_id]['size'] += sum(f['size'] for f in files)
node_stats[node_id]['timestamps'].update(f['timestamp'] for f in files)
node_stats[node_id]['folders'].add(folder)
# Trier par nombre de fichiers
sorted_nodes = sorted(node_stats.items(), key=lambda x: x[1]['files'], reverse=True)
print(f"\n{'NODE':<8} {'FICHIERS':<10} {'TAILLE':<12} {'DATES':<25} {'DOSSIERS'}")
print("-" * 90)
for node_id, stats in sorted_nodes[:30]:
ts_list = sorted(stats['timestamps'])
if ts_list:
date_range = f"{datetime.fromtimestamp(ts_list[0]).strftime('%Y-%m-%d %H:%M')} -> {datetime.fromtimestamp(ts_list[-1]).strftime('%H:%M')}"
else:
date_range = "N/A"
folders = ", ".join(sorted(stats['folders']))
print(f"b{node_id:<7} {stats['files']:<10} {stats['size']/1e6:.1f} MB {date_range:<25} {folders}")
# Dates uniques (jours)
print("\n" + "=" * 70)
print("JOURS DE DONNEES DISPONIBLES (basé sur timestamps)")
print("=" * 70)
days = set()
for ts in timestamps_set:
days.add(datetime.fromtimestamp(ts).strftime('%Y-%m-%d'))
for day in sorted(days):
# Compter les fichiers pour ce jour
day_files = sum(1 for ts in timestamps_set
if datetime.fromtimestamp(ts).strftime('%Y-%m-%d') == day)
print(f" {day}: ~{day_files} timestamps uniques")
if __name__ == '__main__':
main()