Fix coverage: add /api/coverage route, remove stray gather code from loadCoverage

This commit is contained in:
Floppyrj45
2026-02-19 14:53:10 +01:00
parent 61b25ab734
commit bbd6a22b57
80 changed files with 27884 additions and 1 deletions

Binary file not shown.

22
scripts/check_node29.py Executable file
View File

@@ -0,0 +1,22 @@
import json
data = json.load(open(r'F:\seismic_webapp\data\index.json'))
node29 = data['nodes'].get('29')
if node29:
print(f"Node 29:")
print(f" Position: {node29.get('position')}")
print(f" Dates disponibles: {list(node29.get('dates', {}).keys())}")
for date, files in node29.get('dates', {}).items():
print(f" {date}: {len(files)} fichiers")
for f in files[:2]:
print(f" - {f['path']}")
else:
print("Node 29 non trouvé dans l'index")
print("\n--- Tous les nodes avec données ---")
for node_id, node in data['nodes'].items():
if node.get('dates') and len(node['dates']) > 0:
has_pos = node.get('position') is not None
dates = list(node['dates'].keys())
print(f"Node {node_id}: pos={has_pos}, dates={dates}")

22
scripts/check_positions.py Executable file
View File

@@ -0,0 +1,22 @@
import json
data = json.load(open(r'F:\seismic_webapp\data\index.json'))
nodes_with_data = [n for n in data['nodes'].values() if n.get('dates') and len(n['dates']) > 0]
print(f'Nodes avec donnees: {len(nodes_with_data)}')
print('\n--- Nodes avec donnees et leurs positions ---')
for n in nodes_with_data[:10]:
pos = n.get('position')
has_pos = pos and pos.get('easting') and pos.get('northing')
print(f"Node {n['id']}: hasPos={has_pos}, pos={pos}")
print('\n--- Nodes avec donnees SANS position valide ---')
no_pos_count = 0
for n in nodes_with_data:
pos = n.get('position')
if not pos or not pos.get('easting') or not pos.get('northing'):
print(f"Node {n['id']}: pos={pos}")
no_pos_count += 1
print(f'\nTotal nodes sans position valide: {no_pos_count}')

35
scripts/debug_inventory.py Executable file
View File

@@ -0,0 +1,35 @@
import json
d = json.load(open(r'F:\seismic_webapp\inventory.json'))
# Verifier quelques fichiers
print("=== EXEMPLES DE FICHIERS ===")
for f in d[:5]:
print(f"File: {f['filename']}")
print(f" Bumper: {f['bumper_id']}, Channel: {f['channel']}")
print(f" Samples: {f['samples']}, Epoch: {f['epoch_time']}")
print()
# Compter les bumpers uniques
bumpers = set(f['bumper_id'] for f in d if f['bumper_id'])
print(f"Bumpers uniques: {len(bumpers)}")
print(f"Liste: {sorted(bumpers, key=lambda x: int(x) if x and x.isdigit() else 999)[:30]}")
# Verifier le probleme des samples
print("\n=== FICHIERS AVEC GROS SAMPLES ===")
big_files = [f for f in d if f['samples'] > 100000000]
for f in big_files[:5]:
print(f" {f['filename']}: {f['samples']} samples = {f['samples']/200/3600:.1f}h")
# Stats par bumper
from collections import defaultdict
by_bumper = defaultdict(lambda: {'files': 0, 'channels': set()})
for f in d:
if f['bumper_id']:
by_bumper[f['bumper_id']]['files'] += 1
if f['channel']:
by_bumper[f['bumper_id']]['channels'].add(f['channel'])
print(f"\n=== PAR BUMPER (premiers 20) ===")
for b in sorted(by_bumper.keys(), key=lambda x: int(x) if x.isdigit() else 999)[:20]:
s = by_bumper[b]
print(f" b{b}: {s['files']} files, channels: {sorted(s['channels'])}")

View File

@@ -0,0 +1,71 @@
#!/usr/bin/env python3
"""
Script d'extraction de données H5 calibrées (format 2026).
Lit calibrated_data/channel_X (valeurs physiques avec unités).
"""
import argparse
import json
import sys
import h5py
import numpy as np
def extract_window(file_path: str, channel: int, start_ts: int, duration_sec: int) -> dict:
try:
with h5py.File(file_path, 'r') as f:
# Métadonnées
meta = f['metadata']
sample_rate = meta.attrs['sample_rate_hz']
file_duration = meta.attrs['duration_sec']
total_samples = meta.attrs['n_samples']
# Dataset calibré
dataset = f[f'calibrated_data/channel_{channel}']
# Calcul indices (si start_ts = 0, on prend depuis le début)
start_idx = int(start_ts * sample_rate) if start_ts > 0 else 0
num_samples = int(duration_sec * sample_rate) if duration_sec > 0 else total_samples
end_idx = min(start_idx + num_samples, total_samples)
# Extraire
samples = dataset[start_idx:end_idx]
# Unité selon le canal
unit = 'm/s' if channel in [1, 2, 3] else 'Pa'
channel_name = f'Geophone {channel}' if channel in [1, 2, 3] else 'Hydrophone'
return {
"samples": samples.tolist(),
"start_idx": int(start_idx),
"end_idx": int(end_idx),
"total_samples": int(total_samples),
"sample_rate": int(sample_rate),
"duration_sec": float(file_duration),
"channel": channel,
"channel_name": channel_name,
"unit": unit,
"stats": {
"min": float(np.min(samples)),
"max": float(np.max(samples)),
"mean": float(np.mean(samples)),
"std": float(np.std(samples)),
"rms": float(np.sqrt(np.mean(samples**2)))
},
"source": "calibrated_h5_2026"
}
except Exception as e:
return {"error": str(e)}
def main():
parser = argparse.ArgumentParser(description='Extraction H5 calibré')
parser.add_argument('--file', required=True, help='Fichier H5')
parser.add_argument('--channel', type=int, required=True, help='Canal 1-4')
parser.add_argument('--start', type=int, default=0, help='Offset secondes (0=début)')
parser.add_argument('--duration', type=int, default=0, help='Durée secondes (0=tout)')
args = parser.parse_args()
result = extract_window(args.file, args.channel, args.start, args.duration)
print(json.dumps(result))
if __name__ == '__main__':
main()

132
scripts/extract_hdf5_window.py Executable file
View File

@@ -0,0 +1,132 @@
"""
Script d'extraction de fenêtres de données HDF5.
Appelé par le backend Node.js pour lire des portions de données ADC
sans charger tout le fichier en mémoire.
Usage:
python extract_hdf5_window.py --file <path> --channel <ch0-ch3> --start <timestamp> --duration <seconds>
"""
import argparse
import json
import sys
from pathlib import Path
try:
import h5py
import numpy as np
except ImportError as e:
print(json.dumps({"error": f"Module manquant: {e}"}))
sys.exit(1)
SAMPLE_RATE = 200 # Hz
def extract_window(file_path: str, channel: str, start_ts: int, duration_sec: int) -> dict:
"""
Extrait une fenêtre de données ADC d'un fichier HDF5.
Args:
file_path: Chemin vers le fichier H5
channel: Canal à extraire (ch0, ch1, ch2, ch3)
start_ts: Timestamp de début (secondes Unix)
duration_sec: Durée en secondes
Returns:
dict avec les échantillons et métadonnées
"""
file_path = Path(file_path)
if not file_path.exists():
return {"error": f"Fichier non trouvé: {file_path}"}
try:
with h5py.File(file_path, 'r') as f:
# Chaque fichier HDF5 contient un seul dataset 'adc_values'
# Le canal est déterminé par le nom du fichier, pas par un chemin interne
if 'adc_values' not in f:
# Lister les datasets disponibles pour debug
available = []
def visit(name, obj):
if isinstance(obj, h5py.Dataset):
available.append(name)
f.visititems(visit)
return {"error": f"Dataset 'adc_values' non trouvé. Disponibles: {available}"}
dataset = f['adc_values']
# Récupérer les attributs de temps si disponibles
# Chercher d'abord dans les attributs du dataset, puis du fichier
file_start_ts = None
if 'timestamp' in dataset.attrs:
file_start_ts = int(dataset.attrs['timestamp'])
elif 'start_time' in dataset.attrs:
file_start_ts = int(dataset.attrs['start_time'])
elif 'timestamp' in f.attrs:
file_start_ts = int(f.attrs['timestamp'])
elif 'start_time' in f.attrs:
file_start_ts = int(f.attrs['start_time'])
# Calculer les indices de début et fin
total_samples = dataset.shape[0]
if file_start_ts is not None:
# Offset par rapport au début du fichier
offset_sec = max(0, start_ts - file_start_ts)
start_idx = int(offset_sec * SAMPLE_RATE)
else:
# Pas d'info de temps, prendre depuis le début
start_idx = 0
num_samples = int(duration_sec * SAMPLE_RATE)
end_idx = min(start_idx + num_samples, total_samples)
# Limiter pour éviter les gros payloads (max 60 secondes = 12000 samples)
max_samples = 60 * SAMPLE_RATE
if end_idx - start_idx > max_samples:
end_idx = start_idx + max_samples
# Extraire les données (lecture partielle, pas tout en RAM)
samples = dataset[start_idx:end_idx]
# Garder en numpy pour les stats
samples_array = np.array(samples) if not isinstance(samples, np.ndarray) else samples
return {
"samples": samples.tolist() if isinstance(samples, np.ndarray) else samples,
"start_idx": start_idx,
"end_idx": end_idx,
"total_samples": total_samples,
"file_start_ts": file_start_ts,
"channel": channel,
"stats": {
"min": float(np.min(samples_array)) if len(samples_array) > 0 else None,
"max": float(np.max(samples_array)) if len(samples_array) > 0 else None,
"mean": float(np.mean(samples_array)) if len(samples_array) > 0 else None,
"rms": float(np.sqrt(np.mean(samples_array**2))) if len(samples_array) > 0 else None,
}
}
except Exception as e:
return {"error": str(e)}
def main():
parser = argparse.ArgumentParser(description='Extraction de fenêtre HDF5')
parser.add_argument('--file', required=True, help='Chemin du fichier H5')
parser.add_argument('--channel', required=True, help='Canal (ch0-ch3)')
parser.add_argument('--start', type=int, required=True, help='Timestamp de début')
parser.add_argument('--duration', type=int, default=10, help='Durée en secondes')
args = parser.parse_args()
result = extract_window(args.file, args.channel, args.start, args.duration)
# Sortie JSON pour le backend Node.js
print(json.dumps(result))
if __name__ == '__main__':
main()

479
scripts/generate_inventory.py Executable file
View File

@@ -0,0 +1,479 @@
#!/usr/bin/env python3
"""
Script pour générer un inventaire HTML de tous les fichiers HDF5.
Affiche: numéro de bumper, canal, date/heure début, date/heure fin, durée, nombre d'échantillons.
"""
import os
import sys
import json
import h5py
import re
from datetime import datetime
from pathlib import Path
from collections import defaultdict
# Configuration
SAMPLE_RATE = 200 # Hz
DATA_DIRS = [
r"F:\2020-09-11",
r"E:\2020-09-11",
r"E:\2020-09-14",
]
def parse_filename(filename):
"""
Parse le nom de fichier HDF5 pour extraire les infos.
Formats supportes:
- auto_260_061316_b0_13_212626_data_rsn84614_seq1_ch0_1598976585.h5 (bumper = 13)
- auto_255_061140_b119_12_230609_data_rsn5725_seq1_ch0_1599065292.h5 (bumper = 119)
"""
bumper_id = None
# Format 1: _b0_XX_ (ex: _b0_13_)
bumper_match = re.search(r'_b0_(\d+)_', filename)
if bumper_match:
bumper_id = bumper_match.group(1)
else:
# Format 2: _bXXX_ (ex: _b119_)
bumper_match = re.search(r'_b(\d+)_', filename)
if bumper_match:
bumper_id = bumper_match.group(1)
# Extraire le canal (ch0, ch1, ch2, ch3, ch5, ch6, ch7, ch15)
channel_match = re.search(r'_(ch\d+)_', filename)
channel = channel_match.group(1) if channel_match else None
# Extraire l'epoch time (dernier nombre avant .h5)
epoch_match = re.search(r'_(\d{10})\.h5$', filename)
epoch_time = int(epoch_match.group(1)) if epoch_match else None
# Type de fichier (data ou aux)
file_type = 'data' if '_data_' in filename else 'aux' if '_aux_' in filename else 'unknown'
return {
'bumper_id': bumper_id,
'channel': channel,
'epoch_time': epoch_time,
'file_type': file_type
}
def get_hdf5_info(filepath):
"""
Ouvre le fichier HDF5 et récupère le nombre d'échantillons.
"""
try:
with h5py.File(filepath, 'r') as f:
# Chercher le dataset adc_values
if 'adc_values' in f:
samples = f['adc_values'].shape[0]
return {'samples': samples, 'error': None}
else:
# Lister les datasets disponibles
datasets = list(f.keys())
return {'samples': 0, 'error': f'No adc_values, found: {datasets}'}
except Exception as e:
return {'samples': 0, 'error': str(e)}
def format_datetime(epoch_time):
"""Formate un timestamp en date/heure lisible."""
if not epoch_time:
return "N/A"
dt = datetime.fromtimestamp(epoch_time)
return dt.strftime('%Y-%m-%d %H:%M:%S')
def format_duration(seconds):
"""Formate une durée en heures:minutes:secondes."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
if hours > 0:
return f"{hours}h {minutes}m {secs}s"
elif minutes > 0:
return f"{minutes}m {secs}s"
else:
return f"{secs}s"
def scan_directory(data_dir):
"""Scanne un répertoire pour trouver tous les fichiers HDF5."""
files = []
data_path = Path(data_dir) / 'data'
if not data_path.exists():
print(f" Directory not found: {data_path}")
return files
for filepath in data_path.glob('*.h5'):
files.append(filepath)
return files
def generate_html(inventory, output_path):
"""Génère le document HTML."""
# Organiser par bumper puis par canal
by_bumper = defaultdict(lambda: defaultdict(list))
for item in inventory:
bumper = item['bumper_id'] or 'unknown'
channel = item['channel'] or 'unknown'
by_bumper[bumper][channel].append(item)
# Trier les bumpers numériquement
sorted_bumpers = sorted(by_bumper.keys(), key=lambda x: int(x) if x.isdigit() else 999)
# Statistiques globales
total_files = len(inventory)
total_samples = sum(i['samples'] for i in inventory)
total_duration = total_samples / SAMPLE_RATE
total_errors = sum(1 for i in inventory if i['error'])
# Compter par canal
channel_stats = defaultdict(lambda: {'files': 0, 'samples': 0, 'bumpers': set()})
for item in inventory:
ch = item['channel'] or 'unknown'
channel_stats[ch]['files'] += 1
channel_stats[ch]['samples'] += item['samples']
if item['bumper_id']:
channel_stats[ch]['bumpers'].add(item['bumper_id'])
html = f"""<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Inventaire Fichiers HDF5 Sismiques</title>
<style>
* {{ box-sizing: border-box; }}
body {{
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: #0a0a1a;
color: #eee;
margin: 0;
padding: 20px;
}}
h1 {{
color: #4ade80;
border-bottom: 2px solid #4ade80;
padding-bottom: 10px;
}}
h2 {{
color: #e94560;
margin-top: 30px;
}}
h3 {{
color: #fbbf24;
margin-top: 20px;
}}
.stats {{
background: #16213e;
padding: 20px;
border-radius: 8px;
margin-bottom: 30px;
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
}}
.stat-box {{
background: #0f3460;
padding: 15px;
border-radius: 6px;
text-align: center;
}}
.stat-value {{
font-size: 2rem;
font-weight: bold;
color: #4ade80;
}}
.stat-label {{
color: #888;
font-size: 0.9rem;
}}
.channel-summary {{
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 10px;
margin-bottom: 30px;
}}
.channel-box {{
background: #16213e;
padding: 15px;
border-radius: 6px;
text-align: center;
}}
.channel-box h4 {{
margin: 0 0 10px 0;
color: #4ade80;
}}
table {{
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
font-size: 0.9rem;
}}
th, td {{
padding: 10px;
text-align: left;
border-bottom: 1px solid #1a1a2e;
}}
th {{
background: #16213e;
color: #4ade80;
position: sticky;
top: 0;
}}
tr:hover {{
background: #16213e;
}}
.ch0 {{ color: #4ade80; }}
.ch1 {{ color: #60a5fa; }}
.ch2 {{ color: #fbbf24; }}
.ch3 {{ color: #f472b6; }}
.data {{ color: #4ade80; }}
.aux {{ color: #888; }}
.error {{ color: #e94560; font-size: 0.8rem; }}
.bumper-section {{
background: #0f3460;
padding: 15px;
border-radius: 8px;
margin-bottom: 20px;
}}
.filter-controls {{
background: #16213e;
padding: 15px;
border-radius: 8px;
margin-bottom: 20px;
display: flex;
gap: 20px;
flex-wrap: wrap;
}}
.filter-controls label {{
display: flex;
align-items: center;
gap: 8px;
cursor: pointer;
}}
input[type="checkbox"] {{
width: 18px;
height: 18px;
}}
.summary-table {{
width: auto;
margin: 0 auto;
}}
.summary-table td {{
padding: 5px 15px;
}}
</style>
</head>
<body>
<h1>📊 Inventaire Fichiers HDF5 Sismiques</h1>
<p>Généré le {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
<div class="stats">
<div class="stat-box">
<div class="stat-value">{total_files}</div>
<div class="stat-label">Fichiers HDF5</div>
</div>
<div class="stat-box">
<div class="stat-value">{len(sorted_bumpers)}</div>
<div class="stat-label">Bumpers (nodes)</div>
</div>
<div class="stat-box">
<div class="stat-value">{total_samples:,}</div>
<div class="stat-label">Échantillons total</div>
</div>
<div class="stat-box">
<div class="stat-value">{format_duration(total_duration)}</div>
<div class="stat-label">Durée totale @ 200Hz</div>
</div>
<div class="stat-box">
<div class="stat-value">{total_errors}</div>
<div class="stat-label">Erreurs lecture</div>
</div>
</div>
<h2>📡 Résumé par Canal</h2>
<div class="channel-summary">
"""
for ch in ['ch0', 'ch1', 'ch2', 'ch3']:
stats = channel_stats.get(ch, {'files': 0, 'samples': 0, 'bumpers': set()})
duration = stats['samples'] / SAMPLE_RATE
html += f"""
<div class="channel-box">
<h4 class="{ch}">{ch.upper()}</h4>
<div><strong>{stats['files']}</strong> fichiers</div>
<div><strong>{len(stats['bumpers'])}</strong> bumpers</div>
<div><strong>{stats['samples']:,}</strong> samples</div>
<div>{format_duration(duration)}</div>
</div>
"""
html += """
</div>
<h2>📋 Détail par Bumper</h2>
<div class="filter-controls">
<label><input type="checkbox" id="showCh0" checked onchange="filterTable()"> <span class="ch0">CH0</span></label>
<label><input type="checkbox" id="showCh1" checked onchange="filterTable()"> <span class="ch1">CH1</span></label>
<label><input type="checkbox" id="showCh2" checked onchange="filterTable()"> <span class="ch2">CH2</span></label>
<label><input type="checkbox" id="showCh3" checked onchange="filterTable()"> <span class="ch3">CH3</span></label>
<label><input type="checkbox" id="showData" checked onchange="filterTable()"> <span class="data">DATA</span></label>
<label><input type="checkbox" id="showAux" checked onchange="filterTable()"> <span class="aux">AUX</span></label>
</div>
<table id="mainTable">
<thead>
<tr>
<th>Bumper</th>
<th>Canal</th>
<th>Type</th>
<th>Début (epoch)</th>
<th>Début (date/heure)</th>
<th>Fin (date/heure)</th>
<th>Durée</th>
<th>Samples</th>
<th>Fichier</th>
</tr>
</thead>
<tbody>
"""
for bumper in sorted_bumpers:
channels = by_bumper[bumper]
for channel in sorted(channels.keys()):
items = sorted(channels[channel], key=lambda x: x['epoch_time'] or 0)
for item in items:
duration_sec = item['samples'] / SAMPLE_RATE
end_time = (item['epoch_time'] + duration_sec) if item['epoch_time'] else None
error_html = f'<div class="error">{item["error"]}</div>' if item['error'] else ''
html += f"""
<tr class="row-{channel} row-{item['file_type']}">
<td><strong>b{bumper}</strong></td>
<td class="{channel}">{channel.upper()}</td>
<td class="{item['file_type']}">{item['file_type'].upper()}</td>
<td>{item['epoch_time'] or 'N/A'}</td>
<td>{format_datetime(item['epoch_time'])}</td>
<td>{format_datetime(end_time)}</td>
<td>{format_duration(duration_sec)}</td>
<td>{item['samples']:,}</td>
<td style="font-size: 0.8rem; color: #888;">{item['filename']}{error_html}</td>
</tr>
"""
html += """
</tbody>
</table>
<script>
function filterTable() {
const showCh0 = document.getElementById('showCh0').checked;
const showCh1 = document.getElementById('showCh1').checked;
const showCh2 = document.getElementById('showCh2').checked;
const showCh3 = document.getElementById('showCh3').checked;
const showData = document.getElementById('showData').checked;
const showAux = document.getElementById('showAux').checked;
const rows = document.querySelectorAll('#mainTable tbody tr');
rows.forEach(row => {
const isCh0 = row.classList.contains('row-ch0');
const isCh1 = row.classList.contains('row-ch1');
const isCh2 = row.classList.contains('row-ch2');
const isCh3 = row.classList.contains('row-ch3');
const isData = row.classList.contains('row-data');
const isAux = row.classList.contains('row-aux');
const channelVisible = (isCh0 && showCh0) || (isCh1 && showCh1) ||
(isCh2 && showCh2) || (isCh3 && showCh3);
const typeVisible = (isData && showData) || (isAux && showAux);
row.style.display = (channelVisible && typeVisible) ? '' : 'none';
});
}
</script>
</body>
</html>
"""
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html)
print(f"\nHTML genere: {output_path}")
def main():
print("=" * 60)
print("INVENTAIRE DES FICHIERS HDF5 SISMIQUES")
print("=" * 60)
# Charger l'index existant pour connaître tous les répertoires
index_path = Path(r"F:\seismic_webapp\data\index.json")
all_dirs = set()
if index_path.exists():
with open(index_path, 'r') as f:
index = json.load(f)
# Récupérer tous les répertoires de dates
for node_data in index.get('nodes', {}).values():
for files_list in node_data.get('dates', {}).values():
# files_list est une liste de fichiers directement
if isinstance(files_list, list):
for file_info in files_list:
file_path = Path(file_info.get('path', ''))
if file_path.parent.parent.exists():
all_dirs.add(str(file_path.parent.parent))
# Ajouter les répertoires par défaut
for d in DATA_DIRS:
if Path(d).exists():
all_dirs.add(d)
print(f"\nRépertoires à scanner: {len(all_dirs)}")
for d in sorted(all_dirs):
print(f" - {d}")
# Scanner tous les fichiers
inventory = []
for data_dir in sorted(all_dirs):
print(f"\nScanning {data_dir}...")
files = scan_directory(data_dir)
print(f" Found {len(files)} HDF5 files")
for i, filepath in enumerate(files):
if i % 50 == 0:
print(f" Processing {i}/{len(files)}...")
parsed = parse_filename(filepath.name)
hdf5_info = get_hdf5_info(filepath)
inventory.append({
'filepath': str(filepath),
'filename': filepath.name,
'directory': data_dir,
'bumper_id': parsed['bumper_id'],
'channel': parsed['channel'],
'epoch_time': parsed['epoch_time'],
'file_type': parsed['file_type'],
'samples': hdf5_info['samples'],
'error': hdf5_info['error']
})
print(f"\nTotal: {len(inventory)} fichiers")
# Générer le HTML
output_path = Path(r"F:\seismic_webapp\inventory.html")
generate_html(inventory, output_path)
# Aussi sauvegarder en JSON pour référence
json_path = Path(r"F:\seismic_webapp\inventory.json")
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(inventory, f, indent=2, ensure_ascii=False)
print(f"JSON genere: {json_path}")
if __name__ == '__main__':
main()

125
scripts/h5_api_server.py Executable file
View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
from flask import Flask, jsonify, request, send_file
from flask_cors import CORS
import h5py
import json
from pathlib import Path
import re
app = Flask(__name__)
CORS(app)
H5_DIR = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5')
DOCS_DIR = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/docs')
@app.route('/api/h5/files', methods=['GET'])
def list_files():
try:
files = []
for h5_file in sorted(H5_DIR.glob('*.h5')):
match = re.search(r'rsn(\d+)', h5_file.name)
node_id = match.group(1) if match else 'unknown'
match_date = re.search(r'_(\d{6})_', h5_file.name)
date = match_date.group(1) if match_date else ''
files.append({
'filename': h5_file.name,
'nodeId': node_id,
'date': date,
'path': str(h5_file)
})
return jsonify({'files': files, 'count': len(files)})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/h5/data', methods=['GET'])
def get_data():
try:
filename = request.args.get('file')
channel = int(request.args.get('channel', 1))
start = int(request.args.get('start', 0))
duration = int(request.args.get('duration', 10))
filepath = H5_DIR / filename
with h5py.File(filepath, 'r') as f:
meta = f['metadata']
sample_rate = meta.attrs['sample_rate_hz']
file_duration = meta.attrs['duration_sec']
total_samples = meta.attrs['n_samples']
dataset = f[f'calibrated_data/channel_{channel}']
start_idx = int(start * sample_rate) if start > 0 else 0
num_samples = int(duration * sample_rate) if duration > 0 else total_samples
end_idx = min(start_idx + num_samples, total_samples)
samples = dataset[start_idx:end_idx]
unit = 'm/s' if channel in [1, 2, 3] else 'Pa'
channel_name = f'Geophone {channel}' if channel in [1, 2, 3] else 'Hydrophone'
import numpy as np
return jsonify({
'samples': samples.tolist(),
'start_idx': int(start_idx),
'end_idx': int(end_idx),
'total_samples': int(total_samples),
'sample_rate': int(sample_rate),
'duration_sec': float(file_duration),
'channel': channel,
'channel_name': channel_name,
'unit': unit,
'stats': {
'min': float(np.min(samples)),
'max': float(np.max(samples)),
'mean': float(np.mean(samples)),
'std': float(np.std(samples)),
'rms': float(np.sqrt(np.mean(samples**2)))
}
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/docs/manifest', methods=['GET'])
def get_manifest():
try:
manifest_file = DOCS_DIR / 'campaign_manifest.json'
with open(manifest_file, 'r') as f:
return jsonify(json.load(f))
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/docs/<filename>', methods=['GET'])
def get_document(filename):
try:
doc_file = DOCS_DIR / filename
if not doc_file.exists():
return jsonify({'error': 'File not found'}), 404
return send_file(str(doc_file))
except Exception as e:
return jsonify({'error': str(e)}), 500
# === Endpoints pour la carte ===
@app.route('/api/nodes', methods=['GET'])
def get_nodes():
"""Retourne la liste des nodes avec leurs positions"""
nodes = [
{'id': '80274', 'lat': 43.40, 'lon': 3.70, 'name': 'Node 80274'},
{'id': '2221', 'lat': 43.41, 'lon': 3.71, 'name': 'Node 2221'},
{'id': '3541', 'lat': 43.39, 'lon': 3.69, 'name': 'Node 3541'},
]
return jsonify(nodes)
@app.route('/api/dates', methods=['GET'])
def get_dates():
"""Retourne les dates disponibles"""
return jsonify(['2020-08-08', '2020-08-09', '2020-08-10'])
@app.route('/api/migration-status', methods=['GET'])
def migration_status():
"""Status de migration (désactivé)"""
return jsonify({'status': 'complete'})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=3004, debug=False)

64
scripts/index_h5_2026.py Executable file
View File

@@ -0,0 +1,64 @@
#!/usr/bin/env python3
"""
Indexation des fichiers H5 format 2026 avec métadonnées complètes.
Génère un index JSON pour le viewer web.
"""
import h5py
import json
from pathlib import Path
from datetime import datetime
H5_DIR = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5')
OUTPUT = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5_index.json')
def index_h5_files():
files = []
for h5_file in sorted(H5_DIR.glob('*.h5')):
try:
with h5py.File(h5_file, 'r') as f:
meta = f['metadata']
# Extraire node ID du nom de fichier (rsn[0-9]+)
import re
match = re.search(r'rsn(\d+)', h5_file.name)
node_id = match.group(1) if match else 'unknown'
# Extraire date du nom (YYMMDD)
match_date = re.search(r'_(\d{6})_', h5_file.name)
date_str = match_date.group(1) if match_date else ''
files.append({
'filename': h5_file.name,
'path': str(h5_file),
'node_id': node_id,
'date': date_str,
'duration_sec': float(meta.attrs['duration_sec']),
'sample_rate': int(meta.attrs['sample_rate_hz']),
'channels': int(meta.attrs['n_channels']),
'samples': int(meta.attrs['n_samples']),
'size_mb': round(h5_file.stat().st_size / (1024*1024), 2),
'channel_info': [
{'id': 1, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 1'},
{'id': 2, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 2'},
{'id': 3, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 3'},
{'id': 4, 'type': 'hydrophone', 'unit': 'Pa', 'name': 'Hydrophone'}
]
})
except Exception as e:
print(f'Error indexing {h5_file.name}: {e}')
index = {
'generated': datetime.now().isoformat(),
'total_files': len(files),
'total_duration_hours': sum(f['duration_sec'] for f in files) / 3600,
'files': files
}
OUTPUT.write_text(json.dumps(index, indent=2))
print(f'✅ Indexed {len(files)} files → {OUTPUT}')
print(f'📊 Total duration: {index["total_duration_hours"]:.1f} hours')
if __name__ == '__main__':
index_h5_files()

231
scripts/index_h5_files.py Executable file
View File

@@ -0,0 +1,231 @@
"""
Script d'indexation des fichiers HDF5 sismiques.
Parcourt les dossiers de données, extrait les métadonnées (node_id, date, canaux)
et génère un index JSON utilisé par l'API backend.
"""
import os
import re
import json
import csv
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any
# Pattern pour extraire les infos du nom de fichier
# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
# ou: auto_255_125334_b4_rsn13696_seq1_1599045513.h5
FILENAME_PATTERN = re.compile(
r'auto_(\d+)_(\d{6})_b(\d+).*?_(\d{10})\.h5$',
re.IGNORECASE
)
# Dossiers racine contenant les données H5
DATA_ROOTS = [
Path(r"F:\2020-09-12"),
Path(r"F:\2020-09-13"),
Path(r"F:\2020-09-14"),
Path(r"F:\2020-09-15"),
Path(r"F:\2020-09-16"),
Path(r"F:\2020-09-17"),
Path(r"F:\2020-09-18"),
Path(r"F:\2020-09-19"),
Path(r"F:\2020-09-21"),
Path(r"F:\2020-09-22"),
Path(r"F:\2020-09-23"),
]
# Fichier CSV des positions
POSITIONS_CSV = Path(r"F:\Copie de SETE_AUV_DARFV4-Copier(1).csv")
# Sortie
OUTPUT_INDEX = Path(r"F:\seismic_webapp\data\index.json")
def load_node_positions(csv_path: Path) -> Dict[str, Dict[str, Any]]:
"""
Charge les positions des nodes depuis le CSV.
Retourne un dict: node_id -> {easting, northing, depth, ...}
"""
positions = {}
with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
# Sauter les premières lignes d'en-tête (lignes 1-4)
lines = f.readlines()
# La ligne 4 (index 3) contient les vrais en-têtes
if len(lines) < 5:
return positions
header_line = lines[3]
headers = header_line.strip().split(',')
# Trouver les indices des colonnes importantes
# Utiliser Aslaid (positions réelles mesurées) plutôt que Preplot (planifiées)
try:
node_code_idx = headers.index('NodeCode')
# Priorité aux positions Aslaid (réelles), sinon Preplot (planifiées)
if 'Aslaid Easting' in headers:
easting_idx = headers.index('Aslaid Easting')
northing_idx = headers.index('Aslaid Northing')
depth_idx = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else None
print("Utilisation des coordonnées Aslaid (positions réelles)")
else:
easting_idx = headers.index('Preplot Easting')
northing_idx = headers.index('Preplot Northing')
depth_idx = headers.index('Preplot Depth') if 'Preplot Depth' in headers else None
print("Utilisation des coordonnées Preplot (positions planifiées)")
except ValueError as e:
print(f"Colonne manquante dans le CSV: {e}")
# Fallback sur indices connus (Aslaid)
node_code_idx = 3
easting_idx = 9 # Aslaid Easting
northing_idx = 10 # Aslaid Northing
depth_idx = 11 # Aslaid Depth
# Parser les lignes de données (à partir de la ligne 5)
for line in lines[4:]:
parts = line.strip().split(',')
if len(parts) <= max(node_code_idx, easting_idx, northing_idx):
continue
node_code = parts[node_code_idx].strip()
if not node_code or node_code == '':
continue
try:
easting = float(parts[easting_idx]) if parts[easting_idx] else None
northing = float(parts[northing_idx]) if parts[northing_idx] else None
depth = float(parts[depth_idx]) if depth_idx and parts[depth_idx] else 0.0
except (ValueError, IndexError):
continue
if easting and northing:
positions[node_code] = {
'easting': easting,
'northing': northing,
'depth': depth,
}
print(f"Chargé {len(positions)} positions de nodes")
return positions
def scan_h5_files(data_roots: List[Path]) -> Dict[str, Any]:
"""
Parcourt les dossiers et indexe tous les fichiers H5.
Retourne un dict structuré par node_id -> date -> fichiers
"""
index = {}
file_count = 0
for root in data_roots:
if not root.exists():
print(f"Dossier non trouvé: {root}")
continue
print(f"Scan de {root}...")
for h5_file in root.rglob("*.h5"):
match = FILENAME_PATTERN.search(h5_file.name)
if not match:
# Essayer un pattern plus simple
simple_match = re.search(r'_b(\d+)_.*?(\d{10})\.h5$', h5_file.name, re.IGNORECASE)
if simple_match:
node_id = simple_match.group(1)
timestamp = int(simple_match.group(2))
else:
continue
else:
node_id = match.group(3)
timestamp = int(match.group(4))
# Convertir timestamp en date
dt = datetime.fromtimestamp(timestamp)
date_str = dt.strftime('%Y-%m-%d')
# Détecter les canaux disponibles dans le fichier
# Pour l'instant on suppose ch0-ch3 par défaut
channels = ['ch0', 'ch1', 'ch2', 'ch3']
# Structure: node_id -> date -> liste de fichiers
if node_id not in index:
index[node_id] = {}
if date_str not in index[node_id]:
index[node_id][date_str] = []
index[node_id][date_str].append({
'path': str(h5_file),
'timestamp': timestamp,
'channels': channels,
'size_bytes': h5_file.stat().st_size if h5_file.exists() else 0
})
file_count += 1
print(f"Indexé {file_count} fichiers H5")
return index
def build_full_index(positions: Dict, files_index: Dict) -> Dict[str, Any]:
"""
Combine les positions et l'index des fichiers.
"""
full_index = {
'generated_at': datetime.now().isoformat(),
'sample_rate_hz': 200,
'nodes': {},
'dates': set(),
}
# Fusionner les données
all_node_ids = set(files_index.keys()) | set(positions.keys())
for node_id in all_node_ids:
node_data = {
'id': node_id,
'position': positions.get(node_id, None),
'dates': {}
}
if node_id in files_index:
node_data['dates'] = files_index[node_id]
for date_str in files_index[node_id].keys():
full_index['dates'].add(date_str)
full_index['nodes'][node_id] = node_data
# Convertir le set en liste triée
full_index['dates'] = sorted(list(full_index['dates']))
return full_index
def main():
print("=== Indexation des fichiers HDF5 sismiques ===\n")
# 1. Charger les positions
print("1. Chargement des positions des nodes...")
positions = load_node_positions(POSITIONS_CSV)
# 2. Scanner les fichiers H5
print("\n2. Scan des fichiers H5...")
files_index = scan_h5_files(DATA_ROOTS)
# 3. Construire l'index complet
print("\n3. Construction de l'index...")
full_index = build_full_index(positions, files_index)
# 4. Sauvegarder
print(f"\n4. Sauvegarde vers {OUTPUT_INDEX}...")
OUTPUT_INDEX.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_INDEX, 'w', encoding='utf-8') as f:
json.dump(full_index, f, indent=2, ensure_ascii=False)
print(f"\nTerminé! Index généré avec {len(full_index['nodes'])} nodes et {len(full_index['dates'])} dates.")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,87 @@
import os, re, json, h5py
from pathlib import Path
from datetime import datetime
from tqdm import tqdm
DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")]
POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv")
OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json")
SAMPLE_RATE = 200
def load_pos():
positions = {}
if not POSITIONS_CSV.exists(): return {}
with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f:
lines = f.readlines()
if len(lines) < 5: return {}
headers = lines[3].strip().split(',')
try:
ni = headers.index('NodeCode')
ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting')
oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing')
di = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else -1
except: return {}
for line in lines[4:]:
parts = line.strip().split(',')
try:
nid = parts[ni].strip()
positions[nid] = {
'easting': float(parts[ei]),
'northing': float(parts[oi]),
'depth': float(parts[di]) if di != -1 else 0.0
}
except: continue
return positions
def scan():
pos = load_pos()
index = {}
file_count = 0
print(f"Scanning H5 files... Positions loaded: {len(pos)}")
all_files = []
for root in DATA_ROOTS:
all_files.extend(list(root.rglob("*.h5")))
for h5_path in tqdm(all_files):
try:
match = re.search(r'_b(\d+)_', h5_path.name)
if not match: continue
nid = match.group(1)
with h5py.File(h5_path, 'r') as f:
if 'adc_values' not in f: continue
ds = f['adc_values']
start_ts = int(ds.attrs.get('timestamp', 0))
if start_ts == 0: continue
duration = ds.shape[0] / SAMPLE_RATE
end_ts = start_ts + duration
if nid not in index:
index[nid] = {
'id': nid,
'position': pos.get(nid),
'files': []
}
index[nid]['files'].append({
'path': str(h5_path),
'start': start_ts,
'end': end_ts,
'channels': ['ch0', 'ch1', 'ch2', 'ch3']
})
file_count += 1
except: continue
# Sauvegarder l'index
with open(OUTPUT_INDEX, 'w') as f:
json.dump({
'generated_at': datetime.now().isoformat(),
'sample_rate_hz': SAMPLE_RATE,
'nodes': index
}, f)
print(f"Index généré: {file_count} fichiers, {len(index)} nodes avec positions.")
if __name__ == '__main__': scan()

105
scripts/index_ultimate.py Normal file
View File

@@ -0,0 +1,105 @@
import os, re, json, h5py
from pathlib import Path
from datetime import datetime, timedelta
from tqdm import tqdm
DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")]
POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv")
OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json")
SAMPLE_RATE = 200
def load_pos():
positions = {}
if not POSITIONS_CSV.exists(): return {}
with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f:
lines = f.readlines()
if len(lines) < 5: return {}
headers = lines[3].strip().split(',')
try:
ni = headers.index('NodeCode')
ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting')
oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing')
except: return {}
for line in lines[4:]:
parts = line.strip().split(',')
try:
nid = parts[ni].strip()
positions[nid] = {
'easting': float(parts[ei]),
'northing': float(parts[oi]),
'depth': float(parts[headers.index('Aslaid Depth')]) if 'Aslaid Depth' in headers else 0.0
}
except: continue
return positions
def scan():
pos = load_pos()
nodes = {}
all_dates = set()
file_count = 0
print("🔍 Scanning ONLY 'data' H5 files (ignoring 'aux')...")
all_h5_files = []
for root in DATA_ROOTS:
all_h5_files.extend(list(root.rglob("*.h5")))
for h5_path in tqdm(all_h5_files):
# FILTRE : Uniquement les fichiers contenant "data"
if "_data_" not in h5_path.name.lower():
continue
try:
match = re.search(r'auto_(\d+)_(\d{6})_b(\d+)_.*?_(\d{10})\.h5$', h5_path.name)
if not match: continue
julian_day = int(match.group(1))
time_str = match.group(2)
node_id = match.group(3)
date_ref = datetime(2020, 1, 1) + timedelta(days=julian_day - 1)
date_str = date_ref.strftime('%Y-%m-%d')
h, m, s = int(time_str[:2]), int(time_str[2:4]), int(time_str[4:6])
actual_start_ts = int(datetime(2020, 1, 1).timestamp() + (julian_day - 1) * 86400 + h * 3600 + m * 60 + s)
with h5py.File(h5_path, 'r') as f:
if 'adc_values' not in f: continue
duration = f['adc_values'].shape[0] / SAMPLE_RATE
actual_end_ts = actual_start_ts + duration
all_dates.add(date_str)
if node_id not in nodes:
nodes[node_id] = {
'id': node_id,
'position': pos.get(node_id),
'files': []
}
# On extrait le canal du nom de fichier pour un matching plus précis
channel_match = re.search(r'_ch(\d+)_', h5_path.name)
channel = f"ch{channel_match.group(1)}" if channel_match else "ch0"
nodes[node_id]['files'].append({
'path': str(h5_path),
'start': actual_start_ts,
'end': actual_end_ts,
'julian': julian_day,
'channel': channel # Canal spécifique au fichier
})
file_count += 1
except: continue
result = {
'generated_at': datetime.now().isoformat(),
'sample_rate_hz': SAMPLE_RATE,
'nodes': nodes,
'dates': sorted(list(all_dates))
}
with open(OUTPUT_INDEX, 'w') as f:
json.dump(result, f, indent=2)
print(f"✅ Index updated: {file_count} 'data' files, {len(nodes)} nodes.")
if __name__ == '__main__': scan()

158
scripts/inventory_h5.py Executable file
View File

@@ -0,0 +1,158 @@
"""
Script d'inventaire des fichiers HDF5.
Extrait les timestamps des noms de fichiers et génère un rapport.
"""
import os
import re
from pathlib import Path
from datetime import datetime
from collections import defaultdict
# Dossiers racine
DATA_ROOTS = [
Path(r"F:\2020-09-12"),
Path(r"F:\2020-09-13"),
Path(r"F:\2020-09-14"),
Path(r"F:\2020-09-15"),
Path(r"F:\2020-09-16"),
Path(r"F:\2020-09-17"),
Path(r"F:\2020-09-18"),
Path(r"F:\2020-09-19"),
Path(r"F:\2020-09-21"),
Path(r"F:\2020-09-22"),
Path(r"F:\2020-09-23"),
]
# Pattern pour extraire node_id et timestamp
# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
PATTERN = re.compile(r'_b(\d+)_.*?(\d{10})\.h5$', re.IGNORECASE)
def main():
print("=" * 70)
print("INVENTAIRE DES FICHIERS HDF5")
print("=" * 70)
# Structure: folder -> node_id -> list of (timestamp, filename, type)
inventory = defaultdict(lambda: defaultdict(list))
# Stats globales
total_files = 0
total_size = 0
nodes_set = set()
timestamps_set = set()
for root in DATA_ROOTS:
if not root.exists():
continue
folder_name = root.name
for h5_file in root.rglob("*.h5"):
match = PATTERN.search(h5_file.name)
if not match:
continue
node_id = match.group(1)
timestamp = int(match.group(2))
# Déterminer le type (data ou aux)
file_type = "data" if "_data_" in h5_file.name else "aux" if "_aux_" in h5_file.name else "unknown"
# Extraire le channel si présent
ch_match = re.search(r'_ch(\d+)_', h5_file.name)
channel = f"ch{ch_match.group(1)}" if ch_match else "?"
file_size = h5_file.stat().st_size
inventory[folder_name][node_id].append({
'timestamp': timestamp,
'datetime': datetime.fromtimestamp(timestamp),
'type': file_type,
'channel': channel,
'filename': h5_file.name,
'size': file_size
})
total_files += 1
total_size += file_size
nodes_set.add(node_id)
timestamps_set.add(timestamp)
# Rapport par dossier
print(f"\n{'DOSSIER':<15} {'NODES':<10} {'FICHIERS':<10} {'TAILLE':<15}")
print("-" * 50)
for folder in sorted(inventory.keys()):
folder_data = inventory[folder]
n_nodes = len(folder_data)
n_files = sum(len(files) for files in folder_data.values())
folder_size = sum(f['size'] for files in folder_data.values() for f in files)
print(f"{folder:<15} {n_nodes:<10} {n_files:<10} {folder_size / 1e9:.2f} GB")
# Stats globales
print("\n" + "=" * 70)
print("STATISTIQUES GLOBALES")
print("=" * 70)
print(f"Fichiers H5 totaux: {total_files}")
print(f"Taille totale: {total_size / 1e9:.2f} GB")
print(f"Nodes uniques: {len(nodes_set)}")
# Plage temporelle
if timestamps_set:
min_ts = min(timestamps_set)
max_ts = max(timestamps_set)
print(f"\nPlage temporelle des données:")
print(f" Début: {datetime.fromtimestamp(min_ts)} (timestamp: {min_ts})")
print(f" Fin: {datetime.fromtimestamp(max_ts)} (timestamp: {max_ts})")
# Détail par node (top 20)
print("\n" + "=" * 70)
print("DETAIL PAR NODE (nodes avec le plus de fichiers)")
print("=" * 70)
# Agréger par node
node_stats = defaultdict(lambda: {'files': 0, 'size': 0, 'timestamps': set(), 'folders': set()})
for folder, folder_data in inventory.items():
for node_id, files in folder_data.items():
node_stats[node_id]['files'] += len(files)
node_stats[node_id]['size'] += sum(f['size'] for f in files)
node_stats[node_id]['timestamps'].update(f['timestamp'] for f in files)
node_stats[node_id]['folders'].add(folder)
# Trier par nombre de fichiers
sorted_nodes = sorted(node_stats.items(), key=lambda x: x[1]['files'], reverse=True)
print(f"\n{'NODE':<8} {'FICHIERS':<10} {'TAILLE':<12} {'DATES':<25} {'DOSSIERS'}")
print("-" * 90)
for node_id, stats in sorted_nodes[:30]:
ts_list = sorted(stats['timestamps'])
if ts_list:
date_range = f"{datetime.fromtimestamp(ts_list[0]).strftime('%Y-%m-%d %H:%M')} -> {datetime.fromtimestamp(ts_list[-1]).strftime('%H:%M')}"
else:
date_range = "N/A"
folders = ", ".join(sorted(stats['folders']))
print(f"b{node_id:<7} {stats['files']:<10} {stats['size']/1e6:.1f} MB {date_range:<25} {folders}")
# Dates uniques (jours)
print("\n" + "=" * 70)
print("JOURS DE DONNEES DISPONIBLES (basé sur timestamps)")
print("=" * 70)
days = set()
for ts in timestamps_set:
days.add(datetime.fromtimestamp(ts).strftime('%Y-%m-%d'))
for day in sorted(days):
# Compter les fichiers pour ce jour
day_files = sum(1 for ts in timestamps_set
if datetime.fromtimestamp(ts).strftime('%Y-%m-%d') == day)
print(f" {day}: ~{day_files} timestamps uniques")
if __name__ == '__main__':
main()

45
scripts/migrate_all.py Normal file
View File

@@ -0,0 +1,45 @@
import json, psycopg2, os
from pathlib import Path
from migrate_to_db import migrate_file
INDEX_PATH = "/mnt/kingston/seismic_webapp/data/index.json"
DB_URL = "postgresql://postgres:seismic_pass@db:5432/seismic_data"
def update_status(processed, total, current):
try:
conn = psycopg2.connect(DB_URL)
cur = conn.cursor()
cur.execute("UPDATE migration_status SET processed_files = %s, total_files = %s, current_file = %s, last_update = NOW() WHERE id = 1", (processed, total, current))
conn.commit()
cur.close()
conn.close()
except Exception as e:
print(f"Status update error: {e}")
def main():
with open(INDEX_PATH, 'r') as f:
index = json.load(f)
nodes = index.get('nodes', {})
all_files = []
for nid, node in nodes.items():
for f in node.get('files', []):
if '_data_' in f['path']:
all_files.append((nid, f))
total = len(all_files)
print(f"Starting migration for {total} files...")
for i, (nid, f) in enumerate(all_files):
filename = os.path.basename(f['path'])
update_status(i, total, filename)
try:
# Migration de 1h de chaque fichier
migrate_file(f['path'], nid, f.get('channel', 'ch0'), duration_sec=3600)
except Exception as e:
print(f"Error migrating {filename}: {e}")
update_status(total, total, "Terminé")
if __name__ == "__main__":
main()

53
scripts/migrate_to_db.py Normal file
View File

@@ -0,0 +1,53 @@
import h5py
import numpy as np
import psycopg2
from psycopg2.extras import execute_values
from datetime import datetime, timezone, timedelta
import os
from tqdm import tqdm
DB_URL = "postgresql://postgres:seismic_pass@db:5432/seismic_data"
def fix_path(p):
p = p.replace('\\', '/')
if p.startswith('F:/'): return '/mnt/kingston/' + p[3:]
if p.startswith('E:/'): return '/mnt/data_sdb1/' + p[3:]
return p
def migrate_file(h5_path, node_id, channel, start_offset_sec=0, duration_sec=3600):
h5_path = fix_path(h5_path)
conn = psycopg2.connect(DB_URL)
cur = conn.cursor()
with h5py.File(h5_path, 'r') as f:
ds = f['adc_values']
start_ts = int(ds.attrs['timestamp'])
# On calcule le début réel
actual_start = start_ts + start_offset_sec
start_idx = start_offset_sec * 200
end_idx = start_idx + (duration_sec * 200)
data = ds[start_idx:end_idx]
print(f"Migrating {len(data)} samples...")
# Préparation des tuples pour insertion par lots
batch_size = 10000
for i in range(0, len(data), batch_size):
batch = data[i:i+batch_size]
values = []
for j, val in enumerate(batch):
ts = datetime.fromtimestamp(actual_start + (i + j) / 200, tz=timezone.utc)
values.append((ts, node_id, channel, float(val)))
execute_values(cur, "INSERT INTO adc_samples (time, node_id, channel, value) VALUES %s", values)
conn.commit()
cur.close()
conn.close()
print("Done.")
if __name__ == "__main__":
# Test sur Node 193, 1er septembre (Julian 245), 10 minutes
# On cherche un fichier du node 193
import sys
migrate_file(sys.argv[1], sys.argv[2], sys.argv[3], duration_sec=600)

62
scripts/precompute_all.py Normal file
View File

@@ -0,0 +1,62 @@
import json, sys, os, numpy as np, h5py, re
from pathlib import Path
from datetime import datetime, timedelta
from tqdm import tqdm
INDEX_PATH = Path("/mnt/kingston/seismic_webapp/data/index.json")
OUTPUT_DIR = Path("/mnt/kingston/seismic_webapp/data/rms_cache")
SAMPLE_RATE = 200
def fix_path(p):
p = p.replace('\\', '/')
if p.startswith('F:/'): return '/mnt/kingston/' + p[3:]
if p.startswith('E:/'): return '/mnt/data_sdb1/' + p[3:]
return p
def compute_rms(h5_path):
h5_path = fix_path(h5_path)
if not os.path.exists(h5_path): return None
try:
# Extraire le timestamp réel du nom de fichier (Julian day)
match = re.search(r'auto_(\d+)_(\d{6})_b', os.path.basename(h5_path))
if not match: return None
julian, time_str = int(match.group(1)), match.group(2)
h, m, s = int(time_str[:2]), int(time_str[2:4]), int(time_str[4:6])
start_ts = int(datetime(2020, 1, 1).timestamp() + (julian - 1) * 86400 + h * 3600 + m * 60 + s)
with h5py.File(h5_path, 'r') as f:
ds = f['adc_values']
samples = ds[0:5000]
rms = float(np.sqrt(np.mean(samples.astype(np.float64)**2)))
return [{'ts': start_ts, 'rms': rms}]
except: return None
def main():
with open(INDEX_PATH, 'r') as f: index = json.load(f)
nodes = index.get('nodes', {})
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
for date in index['dates']:
channel = "ch0"
output_file = OUTPUT_DIR / f"rms_{{date}}_{{channel}}.json"
print(f"Processing {date}...")
results = {}
for nid, node in tqdm(nodes.items(), desc=f"Nodes {date}"):
files = node.get('files', [])
# Filtrer les fichiers par Julian Day correspondant à la date
dt = datetime.strptime(date, '%Y-%m-%d')
target_julian = dt.timetuple().tm_yday
target = next((f for f in files if f['julian'] == target_julian and f'_{{channel}}_' in f['path']), None)
if not target and files:
target = next((f for f in files if f['julian'] == target_julian), None)
if target:
data = compute_rms(target['path'])
if data: results[nid] = data
if results:
with open(output_file, 'w') as f:
json.dump({'date':date, 'channel':channel, 'nodes':results}, f)
if __name__ == '__main__': main()

189
scripts/precompute_rms.py Executable file
View File

@@ -0,0 +1,189 @@
"""
Pré-calcul des valeurs RMS ADC pour tous les nodes.
Génère un fichier JSON avec les RMS à intervalles réguliers pour une lecture rapide.
"""
import json
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any
import numpy as np
import h5py
from tqdm import tqdm
# Configuration
SAMPLE_RATE = 200 # Hz
RMS_INTERVAL_SEC = 60 # Calculer RMS toutes les 60 secondes (plus rapide)
RMS_WINDOW_SEC = 5 # Fenêtre de calcul RMS (5 secondes = 1000 samples)
INDEX_PATH = Path(r"F:\seismic_webapp\data\index.json")
OUTPUT_DIR = Path(r"F:\seismic_webapp\data\rms_cache")
def compute_rms_for_file(h5_path: str, interval_sec: int = RMS_INTERVAL_SEC, window_sec: int = RMS_WINDOW_SEC, max_duration_sec: int = 3600) -> List[Dict]:
"""
Calcule les valeurs RMS à intervalles réguliers pour un fichier HDF5.
Retourne une liste de {timestamp, rms}
max_duration_sec: Limite à traiter (en secondes) pour accélérer
"""
results = []
try:
with h5py.File(h5_path, 'r') as f:
if 'adc_values' not in f:
return results
dataset = f['adc_values']
total_samples = dataset.shape[0]
# Récupérer le timestamp de début
start_ts = None
if 'timestamp' in dataset.attrs:
start_ts = int(dataset.attrs['timestamp'])
if start_ts is None:
return results
# Calculer RMS à intervalles réguliers
window_samples = window_sec * SAMPLE_RATE
interval_samples = interval_sec * SAMPLE_RATE
# Limiter la durée pour accélérer
max_samples = min(total_samples, max_duration_sec * SAMPLE_RATE)
for idx in range(0, max_samples - window_samples, interval_samples):
# Lire uniquement la fenêtre nécessaire
samples = dataset[idx:idx + window_samples]
# Calculer RMS
rms = float(np.sqrt(np.mean(samples.astype(np.float64) ** 2)))
# Timestamp pour ce point
ts = start_ts + (idx // SAMPLE_RATE)
results.append({
'ts': ts,
'rms': rms
})
except Exception as e:
print(f"Erreur lecture {h5_path}: {e}")
return results
def precompute_for_date(index: Dict, date: str, channel: str = 'ch0') -> Dict[str, List[Dict]]:
"""
Pré-calcule les RMS pour tous les nodes pour une date donnée.
Retourne {node_id: [{ts, rms}, ...]}
"""
results = {}
# Trouver tous les nodes avec données pour cette date
nodes_with_data = []
for node_id, node in index['nodes'].items():
if node.get('dates') and date in node['dates']:
nodes_with_data.append((node_id, node['dates'][date]))
print(f"Traitement de {len(nodes_with_data)} nodes pour {date}, canal {channel}")
for node_id, files in tqdm(nodes_with_data, desc=f"Date {date}"):
# Trouver le fichier pour le canal demandé (priorité aux fichiers "data")
channel_pattern = f'_{channel}_'
target_file = None
for f in files:
if channel_pattern in f['path'] and '_data_' in f['path']:
target_file = f
break
if not target_file:
for f in files:
if channel_pattern in f['path']:
target_file = f
break
if not target_file:
continue
# Calculer les RMS
rms_data = compute_rms_for_file(target_file['path'])
if rms_data:
results[node_id] = rms_data
return results
def main():
import argparse
parser = argparse.ArgumentParser(description='Pré-calcul des RMS ADC')
parser.add_argument('--date', help='Date spécifique (ex: 2020-09-02)')
parser.add_argument('--channel', default='ch0', help='Canal (ch0-ch3)')
parser.add_argument('--all', action='store_true', help='Traiter toutes les dates/canaux')
args = parser.parse_args()
# Charger l'index
if not INDEX_PATH.exists():
print(f"Index non trouvé: {INDEX_PATH}")
sys.exit(1)
with open(INDEX_PATH, 'r') as f:
index = json.load(f)
print(f"Index chargé: {len(index['nodes'])} nodes, {len(index['dates'])} dates")
# Créer le dossier de sortie
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Déterminer quoi traiter
if args.date:
dates_to_process = [args.date]
channels_to_process = [args.channel]
elif args.all:
dates_to_process = index['dates']
channels_to_process = ['ch0', 'ch1', 'ch2', 'ch3']
else:
# Par défaut, traiter la première date disponible, canal ch0
dates_to_process = [index['dates'][0]] if index['dates'] else []
channels_to_process = ['ch0']
for date in dates_to_process:
for channel in channels_to_process:
output_file = OUTPUT_DIR / f"rms_{date}_{channel}.json"
# Skip si déjà calculé
if output_file.exists():
print(f"Skip {output_file.name} (déjà existant)")
continue
print(f"\n=== Traitement {date} - {channel} ===")
results = precompute_for_date(index, date, channel)
if results:
# Sauvegarder
output_data = {
'date': date,
'channel': channel,
'interval_sec': RMS_INTERVAL_SEC,
'window_sec': RMS_WINDOW_SEC,
'nodes': results,
'generated_at': datetime.now().isoformat()
}
with open(output_file, 'w') as f:
json.dump(output_data, f)
print(f"Sauvegardé: {output_file.name} ({len(results)} nodes)")
else:
print(f"Aucune donnée pour {date} - {channel}")
print("\n=== Terminé ===")
if __name__ == '__main__':
main()

83
scripts/rebuild_h5_db.py Executable file
View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""Rebuild H5 metadata database for the seismic viewer."""
import os
import re
import sqlite3
from datetime import datetime
H5_ROOTS = [
'/mnt/data_sdb1',
'/mnt/kingston'
]
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
FILE_PATTERN = re.compile(r'b(\d+)_.*_ch(\d+)')
SCHEMA = [
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT)',
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
]
def rebuild_db():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
for stmt in SCHEMA:
cur.execute(stmt)
cur.execute('DELETE FROM files')
cur.execute('DELETE FROM positions')
files_counter = 0
summary = {}
for root in H5_ROOTS:
for dirpath, _, filenames in os.walk(root):
for filename in filenames:
if not filename.endswith('.h5'):
continue
filepath = os.path.join(dirpath, filename)
match = FILE_PATTERN.search(filename)
if not match:
continue
node_code = int(match.group(1))
channel = int(match.group(2))
dataset = 'aux' if 'aux' in filename else 'data'
stat = os.stat(filepath)
mtime = int(stat.st_mtime)
size = stat.st_size
summary.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
summary[node_code]['count'] += 1
summary[node_code]['last'] = max(summary[node_code]['last'], mtime)
if dataset == 'data':
summary[node_code]['data'] = True
else:
summary[node_code]['aux'] = True
cur.execute(
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
(filepath, node_code, channel, dataset, size, mtime)
)
files_counter += 1
print(f"Indexed {files_counter} H5 files")
for node_code, stats in summary.items():
has_data = 1 if stats['data'] else 0
has_aux = 1 if stats['aux'] else 0
last_seen = datetime.utcfromtimestamp(stats['last']).isoformat()
cur.execute(
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen) VALUES (?, ?, ?, ?, ?)',
(node_code, has_data, has_aux, stats['count'], last_seen)
)
conn.commit()
conn.close()
print(f"Rebuilt DB at {DB_PATH} with {len(summary)} positions")
if __name__ == '__main__':
rebuild_db()

104
scripts/rebuild_h5_db_v2.py Executable file
View File

@@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""Rebuild H5 metadata database - V2 (capture ALL patterns)."""
import os
import re
import sqlite3
from datetime import datetime
H5_ROOTS = ['/mnt/data_sdb1', '/mnt/kingston']
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
# Pattern plus permissif - capture TOUS les b###
FILE_PATTERN = re.compile(r'b(\d+)')
CHANNEL_PATTERN = re.compile(r'ch(\d+)')
SCHEMA = [
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT)',
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
]
def rebuild_db():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
for stmt in SCHEMA:
cur.execute(stmt)
cur.execute('DELETE FROM files')
cur.execute('DELETE FROM positions')
files_counter = 0
summary = {}
for root in H5_ROOTS:
for dirpath, _, filenames in os.walk(root):
for filename in filenames:
if not filename.endswith('.h5'):
continue
filepath = os.path.join(dirpath, filename)
# Extraire node_code
node_match = FILE_PATTERN.search(filename)
if not node_match:
continue
node_code = int(node_match.group(1))
# Extraire channel (peut ne pas exister)
channel_match = CHANNEL_PATTERN.search(filename)
channel = int(channel_match.group(1)) if channel_match else -1
# Déterminer dataset (data vs aux)
dataset = 'aux' if 'aux' in filename else 'data'
stat = os.stat(filepath)
mtime = int(stat.st_mtime)
size = stat.st_size
# Mise à jour summary
summary.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
summary[node_code]['count'] += 1
summary[node_code]['last'] = max(summary[node_code]['last'], mtime)
if dataset == 'data':
summary[node_code]['data'] = True
else:
summary[node_code]['aux'] = True
# Insertion fichier
cur.execute(
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
(filepath, node_code, channel, dataset, size, mtime)
)
files_counter += 1
print(f"✓ Indexed {files_counter} H5 files")
# Insertion positions
for node_code, stats in summary.items():
has_data = 1 if stats['data'] else 0
has_aux = 1 if stats['aux'] else 0
last_seen = datetime.fromtimestamp(stats['last']).isoformat()
cur.execute(
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen) VALUES (?, ?, ?, ?, ?)',
(node_code, has_data, has_aux, stats['count'], last_seen)
)
conn.commit()
# Stats finales
total_positions = len(summary)
with_data = sum(1 for s in summary.values() if s['data'])
with_aux = sum(1 for s in summary.values() if s['aux'])
print(f"✓ Rebuilt DB: {total_positions} positions total")
print(f" • With data files: {with_data}")
print(f" • With aux files: {with_aux}")
print(f" • Both: {sum(1 for s in summary.values() if s['data'] and s['aux'])}")
print(f" • Coverage: {(with_data/205*100):.1f}% (assuming 205 planned)")
conn.close()
if __name__ == '__main__':
rebuild_db()

137
scripts/rebuild_h5_db_v3.py Executable file
View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
"""Rebuild H5 metadata database - V3 (include expected positions from CSV)."""
import os
import re
import csv
import sqlite3
from datetime import datetime
H5_ROOTS = ['/mnt/data_sdb1', '/mnt/kingston']
CSV_PATH = '/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv'
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
FILE_PATTERN = re.compile(r'b(\d+)')
CHANNEL_PATTERN = re.compile(r'ch(\d+)')
SCHEMA = [
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT, expected BOOLEAN)',
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
]
def rebuild_db():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
for stmt in SCHEMA:
cur.execute(stmt)
cur.execute('DELETE FROM files')
cur.execute('DELETE FROM positions')
# 1. Charger les positions attendues depuis le CSV
expected_nodes = set()
try:
with open(CSV_PATH, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
node_code = row.get('NodeCode', '').strip()
if node_code and node_code.isdigit():
expected_nodes.add(int(node_code))
print(f"✓ Loaded {len(expected_nodes)} expected positions from CSV")
except Exception as e:
print(f"⚠ CSV not found or error: {e}")
print(" Continuing with file scan only...")
# 2. Scanner les fichiers H5
files_counter = 0
found_nodes = {}
for root in H5_ROOTS:
for dirpath, _, filenames in os.walk(root):
for filename in filenames:
if not filename.endswith('.h5'):
continue
filepath = os.path.join(dirpath, filename)
node_match = FILE_PATTERN.search(filename)
if not node_match:
continue
node_code = int(node_match.group(1))
channel_match = CHANNEL_PATTERN.search(filename)
channel = int(channel_match.group(1)) if channel_match else -1
dataset = 'aux' if 'aux' in filename else 'data'
stat = os.stat(filepath)
mtime = int(stat.st_mtime)
size = stat.st_size
found_nodes.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
found_nodes[node_code]['count'] += 1
found_nodes[node_code]['last'] = max(found_nodes[node_code]['last'], mtime)
if dataset == 'data':
found_nodes[node_code]['data'] = True
else:
found_nodes[node_code]['aux'] = True
cur.execute(
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
(filepath, node_code, channel, dataset, size, mtime)
)
files_counter += 1
print(f"✓ Indexed {files_counter} H5 files")
print(f"✓ Found {len(found_nodes)} positions with data")
# 3. Créer les entrées pour TOUTES les positions (attendues + trouvées)
all_nodes = expected_nodes | set(found_nodes.keys())
for node_code in all_nodes:
is_expected = node_code in expected_nodes
if node_code in found_nodes:
stats = found_nodes[node_code]
has_data = 1 if stats['data'] else 0
has_aux = 1 if stats['aux'] else 0
last_seen = datetime.fromtimestamp(stats['last']).isoformat()
sample_count = stats['count']
else:
# Position attendue mais sans données
has_data = 0
has_aux = 0
last_seen = None
sample_count = 0
cur.execute(
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen, expected) VALUES (?, ?, ?, ?, ?, ?)',
(node_code, has_data, has_aux, sample_count, last_seen, 1 if is_expected else 0)
)
conn.commit()
# Stats finales
cur.execute('SELECT COUNT(*) FROM positions')
total = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM positions WHERE has_data = 1')
with_data = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1')
expected_count = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1 AND has_data = 0')
missing = cur.fetchone()[0]
print(f"\n📊 Database Summary:")
print(f" • Total positions in DB: {total}")
print(f" • Expected (from CSV): {expected_count}")
print(f" • With H5 data: {with_data}")
print(f" • Missing (expected but no data): {missing}")
print(f" • Coverage: {(with_data/expected_count*100 if expected_count else 0):.1f}%")
conn.close()
if __name__ == '__main__':
rebuild_db()

114
scripts/rebuild_index.py Executable file
View File

@@ -0,0 +1,114 @@
#!/usr/bin/env python3
"""
Reconstruit l'index.json de la webapp à partir de l'inventaire complet.
Prend en compte tous les fichiers HDF5 sur tous les disques.
"""
import json
from pathlib import Path
from datetime import datetime
from collections import defaultdict
def main():
# Charger l'inventaire
inv_path = Path(r'F:\seismic_webapp\inventory.json')
inv = json.load(open(inv_path))
print(f"Inventaire charge: {len(inv)} fichiers")
# Charger l'index existant pour garder les positions
idx_path = Path(r'F:\seismic_webapp\data\index.json')
old_idx = json.load(open(idx_path))
print(f"Index existant: {len(old_idx.get('nodes', {}))} nodes")
# Construire le nouvel index
nodes = {}
# Copier les positions existantes
for node_id, node_data in old_idx.get('nodes', {}).items():
nodes[node_id] = {
'position': node_data.get('position'),
'dates': {},
'hasDates': False
}
# Ajouter les fichiers de l'inventaire
files_added = 0
for f in inv:
bumper_id = f['bumper_id']
if not bumper_id:
continue
# Créer le node s'il n'existe pas
if bumper_id not in nodes:
nodes[bumper_id] = {
'position': None,
'dates': {},
'hasDates': False
}
# Calculer la date depuis l'epoch
if f['epoch_time']:
dt = datetime.fromtimestamp(f['epoch_time'])
date_str = dt.strftime('%Y-%m-%d')
else:
continue
# Ajouter à la liste des dates
if date_str not in nodes[bumper_id]['dates']:
nodes[bumper_id]['dates'][date_str] = []
# Déterminer les canaux (extraire du nom de fichier)
channel = f['channel']
channels = [channel] if channel else []
# Ajouter le fichier
file_info = {
'path': f['filepath'],
'timestamp': f['epoch_time'],
'channels': channels,
'size_bytes': 0 # On n'a pas cette info
}
# Éviter les doublons
existing_paths = [fi['path'] for fi in nodes[bumper_id]['dates'][date_str]]
if f['filepath'] not in existing_paths:
nodes[bumper_id]['dates'][date_str].append(file_info)
files_added += 1
# Marquer les nodes qui ont des dates
for node_id, node_data in nodes.items():
node_data['hasDates'] = len(node_data['dates']) > 0
# Statistiques
nodes_with_data = sum(1 for n in nodes.values() if n['hasDates'])
total_files = sum(
len(files)
for n in nodes.values()
for files in n['dates'].values()
)
print(f"\nNouvel index:")
print(f" Nodes total: {len(nodes)}")
print(f" Nodes avec donnees: {nodes_with_data}")
print(f" Fichiers indexes: {total_files}")
# Sauvegarder
new_idx = {
'nodes': nodes,
'sampleRateHz': old_idx.get('sampleRateHz', 200),
'generated': datetime.now().isoformat()
}
# Backup de l'ancien
backup_path = idx_path.with_suffix('.json.bak')
with open(backup_path, 'w') as f:
json.dump(old_idx, f)
print(f"\nBackup sauvegarde: {backup_path}")
# Sauvegarder le nouveau
with open(idx_path, 'w') as f:
json.dump(new_idx, f, indent=2)
print(f"Nouvel index sauvegarde: {idx_path}")
if __name__ == '__main__':
main()

39
scripts/show_stats.py Executable file
View File

@@ -0,0 +1,39 @@
import json
from collections import defaultdict
d = json.load(open(r'F:\seismic_webapp\inventory.json'))
by_channel = defaultdict(lambda: {'data': 0, 'aux': 0, 'bumpers': set()})
for f in d:
ch = f['channel'] or 'unknown'
if f['file_type'] == 'data':
by_channel[ch]['data'] += 1
else:
by_channel[ch]['aux'] += 1
if f['bumper_id']:
by_channel[ch]['bumpers'].add(f['bumper_id'])
print('=== RESUME PAR CANAL ===')
print('Canal DATA AUX Bumpers')
print('-' * 35)
for ch in ['ch0', 'ch1', 'ch2', 'ch3', 'ch5', 'ch6', 'ch7', 'ch15', 'unknown']:
if ch in by_channel:
s = by_channel[ch]
total = s['data'] + s['aux']
print(f'{ch:8} {s["data"]:4} {s["aux"]:4} {len(s["bumpers"]):3}')
# Stats globales
total_data = sum(s['data'] for s in by_channel.values())
total_aux = sum(s['aux'] for s in by_channel.values())
all_bumpers = set()
for s in by_channel.values():
all_bumpers.update(s['bumpers'])
print('-' * 35)
print(f'TOTAL {total_data:4} {total_aux:4} {len(all_bumpers):3}')
errors = [f for f in d if f['error']]
print(f'\nErreurs de lecture: {len(errors)} fichiers')
if errors:
for e in errors[:5]:
print(f' - {e["filename"][:50]}...')

31
scripts/test_hdf5.py Executable file
View File

@@ -0,0 +1,31 @@
import h5py
import numpy as np
# Test file
filepath = r'F:\2020-09-22\data\auto_266_143513_b29_13_213605_data_rsn2648_seq1_ch0_1599039547.h5'
with h5py.File(filepath, 'r') as f:
print("=== Structure du fichier ===")
print("Datasets:", list(f.keys()))
if 'adc_values' in f:
d = f['adc_values']
print("\n=== Dataset adc_values ===")
print("Shape:", d.shape)
print("Dtype:", d.dtype)
print("\n=== Attributs du dataset ===")
for k, v in d.attrs.items():
print(f" {k}: {v}")
# Charger un échantillon
sample = d[:2000]
print("\n=== Statistiques (premiers 2000 samples) ===")
print("Min:", np.min(sample))
print("Max:", np.max(sample))
print("Mean:", np.mean(sample))
print("Std:", np.std(sample))
print("RMS:", np.sqrt(np.mean(sample**2)))
print("\n=== Premiers 20 valeurs ===")
print(sample[:20])