#!/usr/bin/env python3 """ Script pour générer un inventaire HTML de tous les fichiers HDF5. Affiche: numéro de bumper, canal, date/heure début, date/heure fin, durée, nombre d'échantillons. """ import os import sys import json import h5py import re from datetime import datetime from pathlib import Path from collections import defaultdict # Configuration SAMPLE_RATE = 200 # Hz DATA_DIRS = [ r"F:\2020-09-11", r"E:\2020-09-11", r"E:\2020-09-14", ] def parse_filename(filename): """ Parse le nom de fichier HDF5 pour extraire les infos. Formats supportes: - auto_260_061316_b0_13_212626_data_rsn84614_seq1_ch0_1598976585.h5 (bumper = 13) - auto_255_061140_b119_12_230609_data_rsn5725_seq1_ch0_1599065292.h5 (bumper = 119) """ bumper_id = None # Format 1: _b0_XX_ (ex: _b0_13_) bumper_match = re.search(r'_b0_(\d+)_', filename) if bumper_match: bumper_id = bumper_match.group(1) else: # Format 2: _bXXX_ (ex: _b119_) bumper_match = re.search(r'_b(\d+)_', filename) if bumper_match: bumper_id = bumper_match.group(1) # Extraire le canal (ch0, ch1, ch2, ch3, ch5, ch6, ch7, ch15) channel_match = re.search(r'_(ch\d+)_', filename) channel = channel_match.group(1) if channel_match else None # Extraire l'epoch time (dernier nombre avant .h5) epoch_match = re.search(r'_(\d{10})\.h5$', filename) epoch_time = int(epoch_match.group(1)) if epoch_match else None # Type de fichier (data ou aux) file_type = 'data' if '_data_' in filename else 'aux' if '_aux_' in filename else 'unknown' return { 'bumper_id': bumper_id, 'channel': channel, 'epoch_time': epoch_time, 'file_type': file_type } def get_hdf5_info(filepath): """ Ouvre le fichier HDF5 et récupère le nombre d'échantillons. """ try: with h5py.File(filepath, 'r') as f: # Chercher le dataset adc_values if 'adc_values' in f: samples = f['adc_values'].shape[0] return {'samples': samples, 'error': None} else: # Lister les datasets disponibles datasets = list(f.keys()) return {'samples': 0, 'error': f'No adc_values, found: {datasets}'} except Exception as e: return {'samples': 0, 'error': str(e)} def format_datetime(epoch_time): """Formate un timestamp en date/heure lisible.""" if not epoch_time: return "N/A" dt = datetime.fromtimestamp(epoch_time) return dt.strftime('%Y-%m-%d %H:%M:%S') def format_duration(seconds): """Formate une durée en heures:minutes:secondes.""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = int(seconds % 60) if hours > 0: return f"{hours}h {minutes}m {secs}s" elif minutes > 0: return f"{minutes}m {secs}s" else: return f"{secs}s" def scan_directory(data_dir): """Scanne un répertoire pour trouver tous les fichiers HDF5.""" files = [] data_path = Path(data_dir) / 'data' if not data_path.exists(): print(f" Directory not found: {data_path}") return files for filepath in data_path.glob('*.h5'): files.append(filepath) return files def generate_html(inventory, output_path): """Génère le document HTML.""" # Organiser par bumper puis par canal by_bumper = defaultdict(lambda: defaultdict(list)) for item in inventory: bumper = item['bumper_id'] or 'unknown' channel = item['channel'] or 'unknown' by_bumper[bumper][channel].append(item) # Trier les bumpers numériquement sorted_bumpers = sorted(by_bumper.keys(), key=lambda x: int(x) if x.isdigit() else 999) # Statistiques globales total_files = len(inventory) total_samples = sum(i['samples'] for i in inventory) total_duration = total_samples / SAMPLE_RATE total_errors = sum(1 for i in inventory if i['error']) # Compter par canal channel_stats = defaultdict(lambda: {'files': 0, 'samples': 0, 'bumpers': set()}) for item in inventory: ch = item['channel'] or 'unknown' channel_stats[ch]['files'] += 1 channel_stats[ch]['samples'] += item['samples'] if item['bumper_id']: channel_stats[ch]['bumpers'].add(item['bumper_id']) html = f"""
Généré le {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
| Bumper | Canal | Type | Début (epoch) | Début (date/heure) | Fin (date/heure) | Durée | Samples | Fichier |
|---|---|---|---|---|---|---|---|---|
| b{bumper} | {channel.upper()} | {item['file_type'].upper()} | {item['epoch_time'] or 'N/A'} | {format_datetime(item['epoch_time'])} | {format_datetime(end_time)} | {format_duration(duration_sec)} | {item['samples']:,} | {item['filename']}{error_html} |