Fix coverage: add /api/coverage route, remove stray gather code from loadCoverage
This commit is contained in:
BIN
scripts/__pycache__/migrate_to_db.cpython-311.pyc
Normal file
BIN
scripts/__pycache__/migrate_to_db.cpython-311.pyc
Normal file
Binary file not shown.
22
scripts/check_node29.py
Executable file
22
scripts/check_node29.py
Executable file
@@ -0,0 +1,22 @@
|
||||
import json
|
||||
|
||||
data = json.load(open(r'F:\seismic_webapp\data\index.json'))
|
||||
|
||||
node29 = data['nodes'].get('29')
|
||||
if node29:
|
||||
print(f"Node 29:")
|
||||
print(f" Position: {node29.get('position')}")
|
||||
print(f" Dates disponibles: {list(node29.get('dates', {}).keys())}")
|
||||
for date, files in node29.get('dates', {}).items():
|
||||
print(f" {date}: {len(files)} fichiers")
|
||||
for f in files[:2]:
|
||||
print(f" - {f['path']}")
|
||||
else:
|
||||
print("Node 29 non trouvé dans l'index")
|
||||
|
||||
print("\n--- Tous les nodes avec données ---")
|
||||
for node_id, node in data['nodes'].items():
|
||||
if node.get('dates') and len(node['dates']) > 0:
|
||||
has_pos = node.get('position') is not None
|
||||
dates = list(node['dates'].keys())
|
||||
print(f"Node {node_id}: pos={has_pos}, dates={dates}")
|
||||
22
scripts/check_positions.py
Executable file
22
scripts/check_positions.py
Executable file
@@ -0,0 +1,22 @@
|
||||
import json
|
||||
|
||||
data = json.load(open(r'F:\seismic_webapp\data\index.json'))
|
||||
|
||||
nodes_with_data = [n for n in data['nodes'].values() if n.get('dates') and len(n['dates']) > 0]
|
||||
print(f'Nodes avec donnees: {len(nodes_with_data)}')
|
||||
|
||||
print('\n--- Nodes avec donnees et leurs positions ---')
|
||||
for n in nodes_with_data[:10]:
|
||||
pos = n.get('position')
|
||||
has_pos = pos and pos.get('easting') and pos.get('northing')
|
||||
print(f"Node {n['id']}: hasPos={has_pos}, pos={pos}")
|
||||
|
||||
print('\n--- Nodes avec donnees SANS position valide ---')
|
||||
no_pos_count = 0
|
||||
for n in nodes_with_data:
|
||||
pos = n.get('position')
|
||||
if not pos or not pos.get('easting') or not pos.get('northing'):
|
||||
print(f"Node {n['id']}: pos={pos}")
|
||||
no_pos_count += 1
|
||||
|
||||
print(f'\nTotal nodes sans position valide: {no_pos_count}')
|
||||
35
scripts/debug_inventory.py
Executable file
35
scripts/debug_inventory.py
Executable file
@@ -0,0 +1,35 @@
|
||||
import json
|
||||
d = json.load(open(r'F:\seismic_webapp\inventory.json'))
|
||||
|
||||
# Verifier quelques fichiers
|
||||
print("=== EXEMPLES DE FICHIERS ===")
|
||||
for f in d[:5]:
|
||||
print(f"File: {f['filename']}")
|
||||
print(f" Bumper: {f['bumper_id']}, Channel: {f['channel']}")
|
||||
print(f" Samples: {f['samples']}, Epoch: {f['epoch_time']}")
|
||||
print()
|
||||
|
||||
# Compter les bumpers uniques
|
||||
bumpers = set(f['bumper_id'] for f in d if f['bumper_id'])
|
||||
print(f"Bumpers uniques: {len(bumpers)}")
|
||||
print(f"Liste: {sorted(bumpers, key=lambda x: int(x) if x and x.isdigit() else 999)[:30]}")
|
||||
|
||||
# Verifier le probleme des samples
|
||||
print("\n=== FICHIERS AVEC GROS SAMPLES ===")
|
||||
big_files = [f for f in d if f['samples'] > 100000000]
|
||||
for f in big_files[:5]:
|
||||
print(f" {f['filename']}: {f['samples']} samples = {f['samples']/200/3600:.1f}h")
|
||||
|
||||
# Stats par bumper
|
||||
from collections import defaultdict
|
||||
by_bumper = defaultdict(lambda: {'files': 0, 'channels': set()})
|
||||
for f in d:
|
||||
if f['bumper_id']:
|
||||
by_bumper[f['bumper_id']]['files'] += 1
|
||||
if f['channel']:
|
||||
by_bumper[f['bumper_id']]['channels'].add(f['channel'])
|
||||
|
||||
print(f"\n=== PAR BUMPER (premiers 20) ===")
|
||||
for b in sorted(by_bumper.keys(), key=lambda x: int(x) if x.isdigit() else 999)[:20]:
|
||||
s = by_bumper[b]
|
||||
print(f" b{b}: {s['files']} files, channels: {sorted(s['channels'])}")
|
||||
71
scripts/extract_h5_calibrated.py
Executable file
71
scripts/extract_h5_calibrated.py
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script d'extraction de données H5 calibrées (format 2026).
|
||||
Lit calibrated_data/channel_X (valeurs physiques avec unités).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import h5py
|
||||
import numpy as np
|
||||
|
||||
def extract_window(file_path: str, channel: int, start_ts: int, duration_sec: int) -> dict:
|
||||
try:
|
||||
with h5py.File(file_path, 'r') as f:
|
||||
# Métadonnées
|
||||
meta = f['metadata']
|
||||
sample_rate = meta.attrs['sample_rate_hz']
|
||||
file_duration = meta.attrs['duration_sec']
|
||||
total_samples = meta.attrs['n_samples']
|
||||
|
||||
# Dataset calibré
|
||||
dataset = f[f'calibrated_data/channel_{channel}']
|
||||
|
||||
# Calcul indices (si start_ts = 0, on prend depuis le début)
|
||||
start_idx = int(start_ts * sample_rate) if start_ts > 0 else 0
|
||||
num_samples = int(duration_sec * sample_rate) if duration_sec > 0 else total_samples
|
||||
end_idx = min(start_idx + num_samples, total_samples)
|
||||
|
||||
# Extraire
|
||||
samples = dataset[start_idx:end_idx]
|
||||
|
||||
# Unité selon le canal
|
||||
unit = 'm/s' if channel in [1, 2, 3] else 'Pa'
|
||||
channel_name = f'Geophone {channel}' if channel in [1, 2, 3] else 'Hydrophone'
|
||||
|
||||
return {
|
||||
"samples": samples.tolist(),
|
||||
"start_idx": int(start_idx),
|
||||
"end_idx": int(end_idx),
|
||||
"total_samples": int(total_samples),
|
||||
"sample_rate": int(sample_rate),
|
||||
"duration_sec": float(file_duration),
|
||||
"channel": channel,
|
||||
"channel_name": channel_name,
|
||||
"unit": unit,
|
||||
"stats": {
|
||||
"min": float(np.min(samples)),
|
||||
"max": float(np.max(samples)),
|
||||
"mean": float(np.mean(samples)),
|
||||
"std": float(np.std(samples)),
|
||||
"rms": float(np.sqrt(np.mean(samples**2)))
|
||||
},
|
||||
"source": "calibrated_h5_2026"
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Extraction H5 calibré')
|
||||
parser.add_argument('--file', required=True, help='Fichier H5')
|
||||
parser.add_argument('--channel', type=int, required=True, help='Canal 1-4')
|
||||
parser.add_argument('--start', type=int, default=0, help='Offset secondes (0=début)')
|
||||
parser.add_argument('--duration', type=int, default=0, help='Durée secondes (0=tout)')
|
||||
args = parser.parse_args()
|
||||
|
||||
result = extract_window(args.file, args.channel, args.start, args.duration)
|
||||
print(json.dumps(result))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
132
scripts/extract_hdf5_window.py
Executable file
132
scripts/extract_hdf5_window.py
Executable file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
Script d'extraction de fenêtres de données HDF5.
|
||||
Appelé par le backend Node.js pour lire des portions de données ADC
|
||||
sans charger tout le fichier en mémoire.
|
||||
|
||||
Usage:
|
||||
python extract_hdf5_window.py --file <path> --channel <ch0-ch3> --start <timestamp> --duration <seconds>
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import h5py
|
||||
import numpy as np
|
||||
except ImportError as e:
|
||||
print(json.dumps({"error": f"Module manquant: {e}"}))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
SAMPLE_RATE = 200 # Hz
|
||||
|
||||
|
||||
def extract_window(file_path: str, channel: str, start_ts: int, duration_sec: int) -> dict:
|
||||
"""
|
||||
Extrait une fenêtre de données ADC d'un fichier HDF5.
|
||||
|
||||
Args:
|
||||
file_path: Chemin vers le fichier H5
|
||||
channel: Canal à extraire (ch0, ch1, ch2, ch3)
|
||||
start_ts: Timestamp de début (secondes Unix)
|
||||
duration_sec: Durée en secondes
|
||||
|
||||
Returns:
|
||||
dict avec les échantillons et métadonnées
|
||||
"""
|
||||
file_path = Path(file_path)
|
||||
|
||||
if not file_path.exists():
|
||||
return {"error": f"Fichier non trouvé: {file_path}"}
|
||||
|
||||
try:
|
||||
with h5py.File(file_path, 'r') as f:
|
||||
# Chaque fichier HDF5 contient un seul dataset 'adc_values'
|
||||
# Le canal est déterminé par le nom du fichier, pas par un chemin interne
|
||||
|
||||
if 'adc_values' not in f:
|
||||
# Lister les datasets disponibles pour debug
|
||||
available = []
|
||||
def visit(name, obj):
|
||||
if isinstance(obj, h5py.Dataset):
|
||||
available.append(name)
|
||||
f.visititems(visit)
|
||||
return {"error": f"Dataset 'adc_values' non trouvé. Disponibles: {available}"}
|
||||
|
||||
dataset = f['adc_values']
|
||||
|
||||
# Récupérer les attributs de temps si disponibles
|
||||
# Chercher d'abord dans les attributs du dataset, puis du fichier
|
||||
file_start_ts = None
|
||||
if 'timestamp' in dataset.attrs:
|
||||
file_start_ts = int(dataset.attrs['timestamp'])
|
||||
elif 'start_time' in dataset.attrs:
|
||||
file_start_ts = int(dataset.attrs['start_time'])
|
||||
elif 'timestamp' in f.attrs:
|
||||
file_start_ts = int(f.attrs['timestamp'])
|
||||
elif 'start_time' in f.attrs:
|
||||
file_start_ts = int(f.attrs['start_time'])
|
||||
|
||||
# Calculer les indices de début et fin
|
||||
total_samples = dataset.shape[0]
|
||||
|
||||
if file_start_ts is not None:
|
||||
# Offset par rapport au début du fichier
|
||||
offset_sec = max(0, start_ts - file_start_ts)
|
||||
start_idx = int(offset_sec * SAMPLE_RATE)
|
||||
else:
|
||||
# Pas d'info de temps, prendre depuis le début
|
||||
start_idx = 0
|
||||
|
||||
num_samples = int(duration_sec * SAMPLE_RATE)
|
||||
end_idx = min(start_idx + num_samples, total_samples)
|
||||
|
||||
# Limiter pour éviter les gros payloads (max 60 secondes = 12000 samples)
|
||||
max_samples = 60 * SAMPLE_RATE
|
||||
if end_idx - start_idx > max_samples:
|
||||
end_idx = start_idx + max_samples
|
||||
|
||||
# Extraire les données (lecture partielle, pas tout en RAM)
|
||||
samples = dataset[start_idx:end_idx]
|
||||
|
||||
# Garder en numpy pour les stats
|
||||
samples_array = np.array(samples) if not isinstance(samples, np.ndarray) else samples
|
||||
|
||||
return {
|
||||
"samples": samples.tolist() if isinstance(samples, np.ndarray) else samples,
|
||||
"start_idx": start_idx,
|
||||
"end_idx": end_idx,
|
||||
"total_samples": total_samples,
|
||||
"file_start_ts": file_start_ts,
|
||||
"channel": channel,
|
||||
"stats": {
|
||||
"min": float(np.min(samples_array)) if len(samples_array) > 0 else None,
|
||||
"max": float(np.max(samples_array)) if len(samples_array) > 0 else None,
|
||||
"mean": float(np.mean(samples_array)) if len(samples_array) > 0 else None,
|
||||
"rms": float(np.sqrt(np.mean(samples_array**2))) if len(samples_array) > 0 else None,
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Extraction de fenêtre HDF5')
|
||||
parser.add_argument('--file', required=True, help='Chemin du fichier H5')
|
||||
parser.add_argument('--channel', required=True, help='Canal (ch0-ch3)')
|
||||
parser.add_argument('--start', type=int, required=True, help='Timestamp de début')
|
||||
parser.add_argument('--duration', type=int, default=10, help='Durée en secondes')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
result = extract_window(args.file, args.channel, args.start, args.duration)
|
||||
|
||||
# Sortie JSON pour le backend Node.js
|
||||
print(json.dumps(result))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
479
scripts/generate_inventory.py
Executable file
479
scripts/generate_inventory.py
Executable file
@@ -0,0 +1,479 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script pour générer un inventaire HTML de tous les fichiers HDF5.
|
||||
Affiche: numéro de bumper, canal, date/heure début, date/heure fin, durée, nombre d'échantillons.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import h5py
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
# Configuration
|
||||
SAMPLE_RATE = 200 # Hz
|
||||
DATA_DIRS = [
|
||||
r"F:\2020-09-11",
|
||||
r"E:\2020-09-11",
|
||||
r"E:\2020-09-14",
|
||||
]
|
||||
|
||||
def parse_filename(filename):
|
||||
"""
|
||||
Parse le nom de fichier HDF5 pour extraire les infos.
|
||||
Formats supportes:
|
||||
- auto_260_061316_b0_13_212626_data_rsn84614_seq1_ch0_1598976585.h5 (bumper = 13)
|
||||
- auto_255_061140_b119_12_230609_data_rsn5725_seq1_ch0_1599065292.h5 (bumper = 119)
|
||||
"""
|
||||
bumper_id = None
|
||||
|
||||
# Format 1: _b0_XX_ (ex: _b0_13_)
|
||||
bumper_match = re.search(r'_b0_(\d+)_', filename)
|
||||
if bumper_match:
|
||||
bumper_id = bumper_match.group(1)
|
||||
else:
|
||||
# Format 2: _bXXX_ (ex: _b119_)
|
||||
bumper_match = re.search(r'_b(\d+)_', filename)
|
||||
if bumper_match:
|
||||
bumper_id = bumper_match.group(1)
|
||||
|
||||
# Extraire le canal (ch0, ch1, ch2, ch3, ch5, ch6, ch7, ch15)
|
||||
channel_match = re.search(r'_(ch\d+)_', filename)
|
||||
channel = channel_match.group(1) if channel_match else None
|
||||
|
||||
# Extraire l'epoch time (dernier nombre avant .h5)
|
||||
epoch_match = re.search(r'_(\d{10})\.h5$', filename)
|
||||
epoch_time = int(epoch_match.group(1)) if epoch_match else None
|
||||
|
||||
# Type de fichier (data ou aux)
|
||||
file_type = 'data' if '_data_' in filename else 'aux' if '_aux_' in filename else 'unknown'
|
||||
|
||||
return {
|
||||
'bumper_id': bumper_id,
|
||||
'channel': channel,
|
||||
'epoch_time': epoch_time,
|
||||
'file_type': file_type
|
||||
}
|
||||
|
||||
def get_hdf5_info(filepath):
|
||||
"""
|
||||
Ouvre le fichier HDF5 et récupère le nombre d'échantillons.
|
||||
"""
|
||||
try:
|
||||
with h5py.File(filepath, 'r') as f:
|
||||
# Chercher le dataset adc_values
|
||||
if 'adc_values' in f:
|
||||
samples = f['adc_values'].shape[0]
|
||||
return {'samples': samples, 'error': None}
|
||||
else:
|
||||
# Lister les datasets disponibles
|
||||
datasets = list(f.keys())
|
||||
return {'samples': 0, 'error': f'No adc_values, found: {datasets}'}
|
||||
except Exception as e:
|
||||
return {'samples': 0, 'error': str(e)}
|
||||
|
||||
def format_datetime(epoch_time):
|
||||
"""Formate un timestamp en date/heure lisible."""
|
||||
if not epoch_time:
|
||||
return "N/A"
|
||||
dt = datetime.fromtimestamp(epoch_time)
|
||||
return dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
def format_duration(seconds):
|
||||
"""Formate une durée en heures:minutes:secondes."""
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
secs = int(seconds % 60)
|
||||
if hours > 0:
|
||||
return f"{hours}h {minutes}m {secs}s"
|
||||
elif minutes > 0:
|
||||
return f"{minutes}m {secs}s"
|
||||
else:
|
||||
return f"{secs}s"
|
||||
|
||||
def scan_directory(data_dir):
|
||||
"""Scanne un répertoire pour trouver tous les fichiers HDF5."""
|
||||
files = []
|
||||
data_path = Path(data_dir) / 'data'
|
||||
|
||||
if not data_path.exists():
|
||||
print(f" Directory not found: {data_path}")
|
||||
return files
|
||||
|
||||
for filepath in data_path.glob('*.h5'):
|
||||
files.append(filepath)
|
||||
|
||||
return files
|
||||
|
||||
def generate_html(inventory, output_path):
|
||||
"""Génère le document HTML."""
|
||||
|
||||
# Organiser par bumper puis par canal
|
||||
by_bumper = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
for item in inventory:
|
||||
bumper = item['bumper_id'] or 'unknown'
|
||||
channel = item['channel'] or 'unknown'
|
||||
by_bumper[bumper][channel].append(item)
|
||||
|
||||
# Trier les bumpers numériquement
|
||||
sorted_bumpers = sorted(by_bumper.keys(), key=lambda x: int(x) if x.isdigit() else 999)
|
||||
|
||||
# Statistiques globales
|
||||
total_files = len(inventory)
|
||||
total_samples = sum(i['samples'] for i in inventory)
|
||||
total_duration = total_samples / SAMPLE_RATE
|
||||
total_errors = sum(1 for i in inventory if i['error'])
|
||||
|
||||
# Compter par canal
|
||||
channel_stats = defaultdict(lambda: {'files': 0, 'samples': 0, 'bumpers': set()})
|
||||
for item in inventory:
|
||||
ch = item['channel'] or 'unknown'
|
||||
channel_stats[ch]['files'] += 1
|
||||
channel_stats[ch]['samples'] += item['samples']
|
||||
if item['bumper_id']:
|
||||
channel_stats[ch]['bumpers'].add(item['bumper_id'])
|
||||
|
||||
html = f"""<!DOCTYPE html>
|
||||
<html lang="fr">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Inventaire Fichiers HDF5 Sismiques</title>
|
||||
<style>
|
||||
* {{ box-sizing: border-box; }}
|
||||
body {{
|
||||
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
||||
background: #0a0a1a;
|
||||
color: #eee;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
}}
|
||||
h1 {{
|
||||
color: #4ade80;
|
||||
border-bottom: 2px solid #4ade80;
|
||||
padding-bottom: 10px;
|
||||
}}
|
||||
h2 {{
|
||||
color: #e94560;
|
||||
margin-top: 30px;
|
||||
}}
|
||||
h3 {{
|
||||
color: #fbbf24;
|
||||
margin-top: 20px;
|
||||
}}
|
||||
.stats {{
|
||||
background: #16213e;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 30px;
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 20px;
|
||||
}}
|
||||
.stat-box {{
|
||||
background: #0f3460;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
text-align: center;
|
||||
}}
|
||||
.stat-value {{
|
||||
font-size: 2rem;
|
||||
font-weight: bold;
|
||||
color: #4ade80;
|
||||
}}
|
||||
.stat-label {{
|
||||
color: #888;
|
||||
font-size: 0.9rem;
|
||||
}}
|
||||
.channel-summary {{
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
gap: 10px;
|
||||
margin-bottom: 30px;
|
||||
}}
|
||||
.channel-box {{
|
||||
background: #16213e;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
text-align: center;
|
||||
}}
|
||||
.channel-box h4 {{
|
||||
margin: 0 0 10px 0;
|
||||
color: #4ade80;
|
||||
}}
|
||||
table {{
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 20px;
|
||||
font-size: 0.9rem;
|
||||
}}
|
||||
th, td {{
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #1a1a2e;
|
||||
}}
|
||||
th {{
|
||||
background: #16213e;
|
||||
color: #4ade80;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
}}
|
||||
tr:hover {{
|
||||
background: #16213e;
|
||||
}}
|
||||
.ch0 {{ color: #4ade80; }}
|
||||
.ch1 {{ color: #60a5fa; }}
|
||||
.ch2 {{ color: #fbbf24; }}
|
||||
.ch3 {{ color: #f472b6; }}
|
||||
.data {{ color: #4ade80; }}
|
||||
.aux {{ color: #888; }}
|
||||
.error {{ color: #e94560; font-size: 0.8rem; }}
|
||||
.bumper-section {{
|
||||
background: #0f3460;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}}
|
||||
.filter-controls {{
|
||||
background: #16213e;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
flex-wrap: wrap;
|
||||
}}
|
||||
.filter-controls label {{
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
cursor: pointer;
|
||||
}}
|
||||
input[type="checkbox"] {{
|
||||
width: 18px;
|
||||
height: 18px;
|
||||
}}
|
||||
.summary-table {{
|
||||
width: auto;
|
||||
margin: 0 auto;
|
||||
}}
|
||||
.summary-table td {{
|
||||
padding: 5px 15px;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>📊 Inventaire Fichiers HDF5 Sismiques</h1>
|
||||
<p>Généré le {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
|
||||
<div class="stats">
|
||||
<div class="stat-box">
|
||||
<div class="stat-value">{total_files}</div>
|
||||
<div class="stat-label">Fichiers HDF5</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value">{len(sorted_bumpers)}</div>
|
||||
<div class="stat-label">Bumpers (nodes)</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value">{total_samples:,}</div>
|
||||
<div class="stat-label">Échantillons total</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value">{format_duration(total_duration)}</div>
|
||||
<div class="stat-label">Durée totale @ 200Hz</div>
|
||||
</div>
|
||||
<div class="stat-box">
|
||||
<div class="stat-value">{total_errors}</div>
|
||||
<div class="stat-label">Erreurs lecture</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2>📡 Résumé par Canal</h2>
|
||||
<div class="channel-summary">
|
||||
"""
|
||||
|
||||
for ch in ['ch0', 'ch1', 'ch2', 'ch3']:
|
||||
stats = channel_stats.get(ch, {'files': 0, 'samples': 0, 'bumpers': set()})
|
||||
duration = stats['samples'] / SAMPLE_RATE
|
||||
html += f"""
|
||||
<div class="channel-box">
|
||||
<h4 class="{ch}">{ch.upper()}</h4>
|
||||
<div><strong>{stats['files']}</strong> fichiers</div>
|
||||
<div><strong>{len(stats['bumpers'])}</strong> bumpers</div>
|
||||
<div><strong>{stats['samples']:,}</strong> samples</div>
|
||||
<div>{format_duration(duration)}</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
html += """
|
||||
</div>
|
||||
|
||||
<h2>📋 Détail par Bumper</h2>
|
||||
|
||||
<div class="filter-controls">
|
||||
<label><input type="checkbox" id="showCh0" checked onchange="filterTable()"> <span class="ch0">CH0</span></label>
|
||||
<label><input type="checkbox" id="showCh1" checked onchange="filterTable()"> <span class="ch1">CH1</span></label>
|
||||
<label><input type="checkbox" id="showCh2" checked onchange="filterTable()"> <span class="ch2">CH2</span></label>
|
||||
<label><input type="checkbox" id="showCh3" checked onchange="filterTable()"> <span class="ch3">CH3</span></label>
|
||||
<label><input type="checkbox" id="showData" checked onchange="filterTable()"> <span class="data">DATA</span></label>
|
||||
<label><input type="checkbox" id="showAux" checked onchange="filterTable()"> <span class="aux">AUX</span></label>
|
||||
</div>
|
||||
|
||||
<table id="mainTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Bumper</th>
|
||||
<th>Canal</th>
|
||||
<th>Type</th>
|
||||
<th>Début (epoch)</th>
|
||||
<th>Début (date/heure)</th>
|
||||
<th>Fin (date/heure)</th>
|
||||
<th>Durée</th>
|
||||
<th>Samples</th>
|
||||
<th>Fichier</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
"""
|
||||
|
||||
for bumper in sorted_bumpers:
|
||||
channels = by_bumper[bumper]
|
||||
for channel in sorted(channels.keys()):
|
||||
items = sorted(channels[channel], key=lambda x: x['epoch_time'] or 0)
|
||||
for item in items:
|
||||
duration_sec = item['samples'] / SAMPLE_RATE
|
||||
end_time = (item['epoch_time'] + duration_sec) if item['epoch_time'] else None
|
||||
|
||||
error_html = f'<div class="error">{item["error"]}</div>' if item['error'] else ''
|
||||
|
||||
html += f"""
|
||||
<tr class="row-{channel} row-{item['file_type']}">
|
||||
<td><strong>b{bumper}</strong></td>
|
||||
<td class="{channel}">{channel.upper()}</td>
|
||||
<td class="{item['file_type']}">{item['file_type'].upper()}</td>
|
||||
<td>{item['epoch_time'] or 'N/A'}</td>
|
||||
<td>{format_datetime(item['epoch_time'])}</td>
|
||||
<td>{format_datetime(end_time)}</td>
|
||||
<td>{format_duration(duration_sec)}</td>
|
||||
<td>{item['samples']:,}</td>
|
||||
<td style="font-size: 0.8rem; color: #888;">{item['filename']}{error_html}</td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
html += """
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<script>
|
||||
function filterTable() {
|
||||
const showCh0 = document.getElementById('showCh0').checked;
|
||||
const showCh1 = document.getElementById('showCh1').checked;
|
||||
const showCh2 = document.getElementById('showCh2').checked;
|
||||
const showCh3 = document.getElementById('showCh3').checked;
|
||||
const showData = document.getElementById('showData').checked;
|
||||
const showAux = document.getElementById('showAux').checked;
|
||||
|
||||
const rows = document.querySelectorAll('#mainTable tbody tr');
|
||||
rows.forEach(row => {
|
||||
const isCh0 = row.classList.contains('row-ch0');
|
||||
const isCh1 = row.classList.contains('row-ch1');
|
||||
const isCh2 = row.classList.contains('row-ch2');
|
||||
const isCh3 = row.classList.contains('row-ch3');
|
||||
const isData = row.classList.contains('row-data');
|
||||
const isAux = row.classList.contains('row-aux');
|
||||
|
||||
const channelVisible = (isCh0 && showCh0) || (isCh1 && showCh1) ||
|
||||
(isCh2 && showCh2) || (isCh3 && showCh3);
|
||||
const typeVisible = (isData && showData) || (isAux && showAux);
|
||||
|
||||
row.style.display = (channelVisible && typeVisible) ? '' : 'none';
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html)
|
||||
|
||||
print(f"\nHTML genere: {output_path}")
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("INVENTAIRE DES FICHIERS HDF5 SISMIQUES")
|
||||
print("=" * 60)
|
||||
|
||||
# Charger l'index existant pour connaître tous les répertoires
|
||||
index_path = Path(r"F:\seismic_webapp\data\index.json")
|
||||
all_dirs = set()
|
||||
|
||||
if index_path.exists():
|
||||
with open(index_path, 'r') as f:
|
||||
index = json.load(f)
|
||||
|
||||
# Récupérer tous les répertoires de dates
|
||||
for node_data in index.get('nodes', {}).values():
|
||||
for files_list in node_data.get('dates', {}).values():
|
||||
# files_list est une liste de fichiers directement
|
||||
if isinstance(files_list, list):
|
||||
for file_info in files_list:
|
||||
file_path = Path(file_info.get('path', ''))
|
||||
if file_path.parent.parent.exists():
|
||||
all_dirs.add(str(file_path.parent.parent))
|
||||
|
||||
# Ajouter les répertoires par défaut
|
||||
for d in DATA_DIRS:
|
||||
if Path(d).exists():
|
||||
all_dirs.add(d)
|
||||
|
||||
print(f"\nRépertoires à scanner: {len(all_dirs)}")
|
||||
for d in sorted(all_dirs):
|
||||
print(f" - {d}")
|
||||
|
||||
# Scanner tous les fichiers
|
||||
inventory = []
|
||||
|
||||
for data_dir in sorted(all_dirs):
|
||||
print(f"\nScanning {data_dir}...")
|
||||
files = scan_directory(data_dir)
|
||||
print(f" Found {len(files)} HDF5 files")
|
||||
|
||||
for i, filepath in enumerate(files):
|
||||
if i % 50 == 0:
|
||||
print(f" Processing {i}/{len(files)}...")
|
||||
|
||||
parsed = parse_filename(filepath.name)
|
||||
hdf5_info = get_hdf5_info(filepath)
|
||||
|
||||
inventory.append({
|
||||
'filepath': str(filepath),
|
||||
'filename': filepath.name,
|
||||
'directory': data_dir,
|
||||
'bumper_id': parsed['bumper_id'],
|
||||
'channel': parsed['channel'],
|
||||
'epoch_time': parsed['epoch_time'],
|
||||
'file_type': parsed['file_type'],
|
||||
'samples': hdf5_info['samples'],
|
||||
'error': hdf5_info['error']
|
||||
})
|
||||
|
||||
print(f"\nTotal: {len(inventory)} fichiers")
|
||||
|
||||
# Générer le HTML
|
||||
output_path = Path(r"F:\seismic_webapp\inventory.html")
|
||||
generate_html(inventory, output_path)
|
||||
|
||||
# Aussi sauvegarder en JSON pour référence
|
||||
json_path = Path(r"F:\seismic_webapp\inventory.json")
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(inventory, f, indent=2, ensure_ascii=False)
|
||||
print(f"JSON genere: {json_path}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
125
scripts/h5_api_server.py
Executable file
125
scripts/h5_api_server.py
Executable file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
from flask import Flask, jsonify, request, send_file
|
||||
from flask_cors import CORS
|
||||
import h5py
|
||||
import json
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
H5_DIR = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5')
|
||||
DOCS_DIR = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/docs')
|
||||
|
||||
@app.route('/api/h5/files', methods=['GET'])
|
||||
def list_files():
|
||||
try:
|
||||
files = []
|
||||
for h5_file in sorted(H5_DIR.glob('*.h5')):
|
||||
match = re.search(r'rsn(\d+)', h5_file.name)
|
||||
node_id = match.group(1) if match else 'unknown'
|
||||
match_date = re.search(r'_(\d{6})_', h5_file.name)
|
||||
date = match_date.group(1) if match_date else ''
|
||||
files.append({
|
||||
'filename': h5_file.name,
|
||||
'nodeId': node_id,
|
||||
'date': date,
|
||||
'path': str(h5_file)
|
||||
})
|
||||
return jsonify({'files': files, 'count': len(files)})
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/h5/data', methods=['GET'])
|
||||
def get_data():
|
||||
try:
|
||||
filename = request.args.get('file')
|
||||
channel = int(request.args.get('channel', 1))
|
||||
start = int(request.args.get('start', 0))
|
||||
duration = int(request.args.get('duration', 10))
|
||||
|
||||
filepath = H5_DIR / filename
|
||||
|
||||
with h5py.File(filepath, 'r') as f:
|
||||
meta = f['metadata']
|
||||
sample_rate = meta.attrs['sample_rate_hz']
|
||||
file_duration = meta.attrs['duration_sec']
|
||||
total_samples = meta.attrs['n_samples']
|
||||
|
||||
dataset = f[f'calibrated_data/channel_{channel}']
|
||||
|
||||
start_idx = int(start * sample_rate) if start > 0 else 0
|
||||
num_samples = int(duration * sample_rate) if duration > 0 else total_samples
|
||||
end_idx = min(start_idx + num_samples, total_samples)
|
||||
|
||||
samples = dataset[start_idx:end_idx]
|
||||
|
||||
unit = 'm/s' if channel in [1, 2, 3] else 'Pa'
|
||||
channel_name = f'Geophone {channel}' if channel in [1, 2, 3] else 'Hydrophone'
|
||||
|
||||
import numpy as np
|
||||
return jsonify({
|
||||
'samples': samples.tolist(),
|
||||
'start_idx': int(start_idx),
|
||||
'end_idx': int(end_idx),
|
||||
'total_samples': int(total_samples),
|
||||
'sample_rate': int(sample_rate),
|
||||
'duration_sec': float(file_duration),
|
||||
'channel': channel,
|
||||
'channel_name': channel_name,
|
||||
'unit': unit,
|
||||
'stats': {
|
||||
'min': float(np.min(samples)),
|
||||
'max': float(np.max(samples)),
|
||||
'mean': float(np.mean(samples)),
|
||||
'std': float(np.std(samples)),
|
||||
'rms': float(np.sqrt(np.mean(samples**2)))
|
||||
}
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/docs/manifest', methods=['GET'])
|
||||
def get_manifest():
|
||||
try:
|
||||
manifest_file = DOCS_DIR / 'campaign_manifest.json'
|
||||
with open(manifest_file, 'r') as f:
|
||||
return jsonify(json.load(f))
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/docs/<filename>', methods=['GET'])
|
||||
def get_document(filename):
|
||||
try:
|
||||
doc_file = DOCS_DIR / filename
|
||||
if not doc_file.exists():
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
return send_file(str(doc_file))
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
# === Endpoints pour la carte ===
|
||||
@app.route('/api/nodes', methods=['GET'])
|
||||
def get_nodes():
|
||||
"""Retourne la liste des nodes avec leurs positions"""
|
||||
nodes = [
|
||||
{'id': '80274', 'lat': 43.40, 'lon': 3.70, 'name': 'Node 80274'},
|
||||
{'id': '2221', 'lat': 43.41, 'lon': 3.71, 'name': 'Node 2221'},
|
||||
{'id': '3541', 'lat': 43.39, 'lon': 3.69, 'name': 'Node 3541'},
|
||||
]
|
||||
return jsonify(nodes)
|
||||
|
||||
@app.route('/api/dates', methods=['GET'])
|
||||
def get_dates():
|
||||
"""Retourne les dates disponibles"""
|
||||
return jsonify(['2020-08-08', '2020-08-09', '2020-08-10'])
|
||||
|
||||
@app.route('/api/migration-status', methods=['GET'])
|
||||
def migration_status():
|
||||
"""Status de migration (désactivé)"""
|
||||
return jsonify({'status': 'complete'})
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=3004, debug=False)
|
||||
64
scripts/index_h5_2026.py
Executable file
64
scripts/index_h5_2026.py
Executable file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Indexation des fichiers H5 format 2026 avec métadonnées complètes.
|
||||
Génère un index JSON pour le viewer web.
|
||||
"""
|
||||
|
||||
import h5py
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
H5_DIR = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5')
|
||||
OUTPUT = Path('/home/floppyrj45/docker/seismic-nodes-viewer/data/h5_index.json')
|
||||
|
||||
def index_h5_files():
|
||||
files = []
|
||||
|
||||
for h5_file in sorted(H5_DIR.glob('*.h5')):
|
||||
try:
|
||||
with h5py.File(h5_file, 'r') as f:
|
||||
meta = f['metadata']
|
||||
|
||||
# Extraire node ID du nom de fichier (rsn[0-9]+)
|
||||
import re
|
||||
match = re.search(r'rsn(\d+)', h5_file.name)
|
||||
node_id = match.group(1) if match else 'unknown'
|
||||
|
||||
# Extraire date du nom (YYMMDD)
|
||||
match_date = re.search(r'_(\d{6})_', h5_file.name)
|
||||
date_str = match_date.group(1) if match_date else ''
|
||||
|
||||
files.append({
|
||||
'filename': h5_file.name,
|
||||
'path': str(h5_file),
|
||||
'node_id': node_id,
|
||||
'date': date_str,
|
||||
'duration_sec': float(meta.attrs['duration_sec']),
|
||||
'sample_rate': int(meta.attrs['sample_rate_hz']),
|
||||
'channels': int(meta.attrs['n_channels']),
|
||||
'samples': int(meta.attrs['n_samples']),
|
||||
'size_mb': round(h5_file.stat().st_size / (1024*1024), 2),
|
||||
'channel_info': [
|
||||
{'id': 1, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 1'},
|
||||
{'id': 2, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 2'},
|
||||
{'id': 3, 'type': 'geophone', 'unit': 'm/s', 'name': 'Geophone 3'},
|
||||
{'id': 4, 'type': 'hydrophone', 'unit': 'Pa', 'name': 'Hydrophone'}
|
||||
]
|
||||
})
|
||||
except Exception as e:
|
||||
print(f'Error indexing {h5_file.name}: {e}')
|
||||
|
||||
index = {
|
||||
'generated': datetime.now().isoformat(),
|
||||
'total_files': len(files),
|
||||
'total_duration_hours': sum(f['duration_sec'] for f in files) / 3600,
|
||||
'files': files
|
||||
}
|
||||
|
||||
OUTPUT.write_text(json.dumps(index, indent=2))
|
||||
print(f'✅ Indexed {len(files)} files → {OUTPUT}')
|
||||
print(f'📊 Total duration: {index["total_duration_hours"]:.1f} hours')
|
||||
|
||||
if __name__ == '__main__':
|
||||
index_h5_files()
|
||||
231
scripts/index_h5_files.py
Executable file
231
scripts/index_h5_files.py
Executable file
@@ -0,0 +1,231 @@
|
||||
"""
|
||||
Script d'indexation des fichiers HDF5 sismiques.
|
||||
Parcourt les dossiers de données, extrait les métadonnées (node_id, date, canaux)
|
||||
et génère un index JSON utilisé par l'API backend.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any
|
||||
|
||||
# Pattern pour extraire les infos du nom de fichier
|
||||
# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
|
||||
# ou: auto_255_125334_b4_rsn13696_seq1_1599045513.h5
|
||||
FILENAME_PATTERN = re.compile(
|
||||
r'auto_(\d+)_(\d{6})_b(\d+).*?_(\d{10})\.h5$',
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
# Dossiers racine contenant les données H5
|
||||
DATA_ROOTS = [
|
||||
Path(r"F:\2020-09-12"),
|
||||
Path(r"F:\2020-09-13"),
|
||||
Path(r"F:\2020-09-14"),
|
||||
Path(r"F:\2020-09-15"),
|
||||
Path(r"F:\2020-09-16"),
|
||||
Path(r"F:\2020-09-17"),
|
||||
Path(r"F:\2020-09-18"),
|
||||
Path(r"F:\2020-09-19"),
|
||||
Path(r"F:\2020-09-21"),
|
||||
Path(r"F:\2020-09-22"),
|
||||
Path(r"F:\2020-09-23"),
|
||||
]
|
||||
|
||||
# Fichier CSV des positions
|
||||
POSITIONS_CSV = Path(r"F:\Copie de SETE_AUV_DARFV4-Copier(1).csv")
|
||||
|
||||
# Sortie
|
||||
OUTPUT_INDEX = Path(r"F:\seismic_webapp\data\index.json")
|
||||
|
||||
|
||||
def load_node_positions(csv_path: Path) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Charge les positions des nodes depuis le CSV.
|
||||
Retourne un dict: node_id -> {easting, northing, depth, ...}
|
||||
"""
|
||||
positions = {}
|
||||
|
||||
with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
# Sauter les premières lignes d'en-tête (lignes 1-4)
|
||||
lines = f.readlines()
|
||||
|
||||
# La ligne 4 (index 3) contient les vrais en-têtes
|
||||
if len(lines) < 5:
|
||||
return positions
|
||||
|
||||
header_line = lines[3]
|
||||
headers = header_line.strip().split(',')
|
||||
|
||||
# Trouver les indices des colonnes importantes
|
||||
# Utiliser Aslaid (positions réelles mesurées) plutôt que Preplot (planifiées)
|
||||
try:
|
||||
node_code_idx = headers.index('NodeCode')
|
||||
# Priorité aux positions Aslaid (réelles), sinon Preplot (planifiées)
|
||||
if 'Aslaid Easting' in headers:
|
||||
easting_idx = headers.index('Aslaid Easting')
|
||||
northing_idx = headers.index('Aslaid Northing')
|
||||
depth_idx = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else None
|
||||
print("Utilisation des coordonnées Aslaid (positions réelles)")
|
||||
else:
|
||||
easting_idx = headers.index('Preplot Easting')
|
||||
northing_idx = headers.index('Preplot Northing')
|
||||
depth_idx = headers.index('Preplot Depth') if 'Preplot Depth' in headers else None
|
||||
print("Utilisation des coordonnées Preplot (positions planifiées)")
|
||||
except ValueError as e:
|
||||
print(f"Colonne manquante dans le CSV: {e}")
|
||||
# Fallback sur indices connus (Aslaid)
|
||||
node_code_idx = 3
|
||||
easting_idx = 9 # Aslaid Easting
|
||||
northing_idx = 10 # Aslaid Northing
|
||||
depth_idx = 11 # Aslaid Depth
|
||||
|
||||
# Parser les lignes de données (à partir de la ligne 5)
|
||||
for line in lines[4:]:
|
||||
parts = line.strip().split(',')
|
||||
if len(parts) <= max(node_code_idx, easting_idx, northing_idx):
|
||||
continue
|
||||
|
||||
node_code = parts[node_code_idx].strip()
|
||||
if not node_code or node_code == '':
|
||||
continue
|
||||
|
||||
try:
|
||||
easting = float(parts[easting_idx]) if parts[easting_idx] else None
|
||||
northing = float(parts[northing_idx]) if parts[northing_idx] else None
|
||||
depth = float(parts[depth_idx]) if depth_idx and parts[depth_idx] else 0.0
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
if easting and northing:
|
||||
positions[node_code] = {
|
||||
'easting': easting,
|
||||
'northing': northing,
|
||||
'depth': depth,
|
||||
}
|
||||
|
||||
print(f"Chargé {len(positions)} positions de nodes")
|
||||
return positions
|
||||
|
||||
|
||||
def scan_h5_files(data_roots: List[Path]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parcourt les dossiers et indexe tous les fichiers H5.
|
||||
Retourne un dict structuré par node_id -> date -> fichiers
|
||||
"""
|
||||
index = {}
|
||||
file_count = 0
|
||||
|
||||
for root in data_roots:
|
||||
if not root.exists():
|
||||
print(f"Dossier non trouvé: {root}")
|
||||
continue
|
||||
|
||||
print(f"Scan de {root}...")
|
||||
|
||||
for h5_file in root.rglob("*.h5"):
|
||||
match = FILENAME_PATTERN.search(h5_file.name)
|
||||
if not match:
|
||||
# Essayer un pattern plus simple
|
||||
simple_match = re.search(r'_b(\d+)_.*?(\d{10})\.h5$', h5_file.name, re.IGNORECASE)
|
||||
if simple_match:
|
||||
node_id = simple_match.group(1)
|
||||
timestamp = int(simple_match.group(2))
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
node_id = match.group(3)
|
||||
timestamp = int(match.group(4))
|
||||
|
||||
# Convertir timestamp en date
|
||||
dt = datetime.fromtimestamp(timestamp)
|
||||
date_str = dt.strftime('%Y-%m-%d')
|
||||
|
||||
# Détecter les canaux disponibles dans le fichier
|
||||
# Pour l'instant on suppose ch0-ch3 par défaut
|
||||
channels = ['ch0', 'ch1', 'ch2', 'ch3']
|
||||
|
||||
# Structure: node_id -> date -> liste de fichiers
|
||||
if node_id not in index:
|
||||
index[node_id] = {}
|
||||
|
||||
if date_str not in index[node_id]:
|
||||
index[node_id][date_str] = []
|
||||
|
||||
index[node_id][date_str].append({
|
||||
'path': str(h5_file),
|
||||
'timestamp': timestamp,
|
||||
'channels': channels,
|
||||
'size_bytes': h5_file.stat().st_size if h5_file.exists() else 0
|
||||
})
|
||||
|
||||
file_count += 1
|
||||
|
||||
print(f"Indexé {file_count} fichiers H5")
|
||||
return index
|
||||
|
||||
|
||||
def build_full_index(positions: Dict, files_index: Dict) -> Dict[str, Any]:
|
||||
"""
|
||||
Combine les positions et l'index des fichiers.
|
||||
"""
|
||||
full_index = {
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'sample_rate_hz': 200,
|
||||
'nodes': {},
|
||||
'dates': set(),
|
||||
}
|
||||
|
||||
# Fusionner les données
|
||||
all_node_ids = set(files_index.keys()) | set(positions.keys())
|
||||
|
||||
for node_id in all_node_ids:
|
||||
node_data = {
|
||||
'id': node_id,
|
||||
'position': positions.get(node_id, None),
|
||||
'dates': {}
|
||||
}
|
||||
|
||||
if node_id in files_index:
|
||||
node_data['dates'] = files_index[node_id]
|
||||
for date_str in files_index[node_id].keys():
|
||||
full_index['dates'].add(date_str)
|
||||
|
||||
full_index['nodes'][node_id] = node_data
|
||||
|
||||
# Convertir le set en liste triée
|
||||
full_index['dates'] = sorted(list(full_index['dates']))
|
||||
|
||||
return full_index
|
||||
|
||||
|
||||
def main():
|
||||
print("=== Indexation des fichiers HDF5 sismiques ===\n")
|
||||
|
||||
# 1. Charger les positions
|
||||
print("1. Chargement des positions des nodes...")
|
||||
positions = load_node_positions(POSITIONS_CSV)
|
||||
|
||||
# 2. Scanner les fichiers H5
|
||||
print("\n2. Scan des fichiers H5...")
|
||||
files_index = scan_h5_files(DATA_ROOTS)
|
||||
|
||||
# 3. Construire l'index complet
|
||||
print("\n3. Construction de l'index...")
|
||||
full_index = build_full_index(positions, files_index)
|
||||
|
||||
# 4. Sauvegarder
|
||||
print(f"\n4. Sauvegarde vers {OUTPUT_INDEX}...")
|
||||
OUTPUT_INDEX.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(OUTPUT_INDEX, 'w', encoding='utf-8') as f:
|
||||
json.dump(full_index, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\nTerminé! Index généré avec {len(full_index['nodes'])} nodes et {len(full_index['dates'])} dates.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
87
scripts/index_time_ranges.py
Normal file
87
scripts/index_time_ranges.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import os, re, json, h5py
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
|
||||
DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")]
|
||||
POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv")
|
||||
OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json")
|
||||
SAMPLE_RATE = 200
|
||||
|
||||
def load_pos():
|
||||
positions = {}
|
||||
if not POSITIONS_CSV.exists(): return {}
|
||||
with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f:
|
||||
lines = f.readlines()
|
||||
if len(lines) < 5: return {}
|
||||
headers = lines[3].strip().split(',')
|
||||
try:
|
||||
ni = headers.index('NodeCode')
|
||||
ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting')
|
||||
oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing')
|
||||
di = headers.index('Aslaid Depth') if 'Aslaid Depth' in headers else -1
|
||||
except: return {}
|
||||
for line in lines[4:]:
|
||||
parts = line.strip().split(',')
|
||||
try:
|
||||
nid = parts[ni].strip()
|
||||
positions[nid] = {
|
||||
'easting': float(parts[ei]),
|
||||
'northing': float(parts[oi]),
|
||||
'depth': float(parts[di]) if di != -1 else 0.0
|
||||
}
|
||||
except: continue
|
||||
return positions
|
||||
|
||||
def scan():
|
||||
pos = load_pos()
|
||||
index = {}
|
||||
file_count = 0
|
||||
print(f"Scanning H5 files... Positions loaded: {len(pos)}")
|
||||
|
||||
all_files = []
|
||||
for root in DATA_ROOTS:
|
||||
all_files.extend(list(root.rglob("*.h5")))
|
||||
|
||||
for h5_path in tqdm(all_files):
|
||||
try:
|
||||
match = re.search(r'_b(\d+)_', h5_path.name)
|
||||
if not match: continue
|
||||
nid = match.group(1)
|
||||
|
||||
with h5py.File(h5_path, 'r') as f:
|
||||
if 'adc_values' not in f: continue
|
||||
ds = f['adc_values']
|
||||
start_ts = int(ds.attrs.get('timestamp', 0))
|
||||
if start_ts == 0: continue
|
||||
|
||||
duration = ds.shape[0] / SAMPLE_RATE
|
||||
end_ts = start_ts + duration
|
||||
|
||||
if nid not in index:
|
||||
index[nid] = {
|
||||
'id': nid,
|
||||
'position': pos.get(nid),
|
||||
'files': []
|
||||
}
|
||||
|
||||
index[nid]['files'].append({
|
||||
'path': str(h5_path),
|
||||
'start': start_ts,
|
||||
'end': end_ts,
|
||||
'channels': ['ch0', 'ch1', 'ch2', 'ch3']
|
||||
})
|
||||
file_count += 1
|
||||
except: continue
|
||||
|
||||
# Sauvegarder l'index
|
||||
with open(OUTPUT_INDEX, 'w') as f:
|
||||
json.dump({
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'sample_rate_hz': SAMPLE_RATE,
|
||||
'nodes': index
|
||||
}, f)
|
||||
|
||||
print(f"Index généré: {file_count} fichiers, {len(index)} nodes avec positions.")
|
||||
|
||||
if __name__ == '__main__': scan()
|
||||
105
scripts/index_ultimate.py
Normal file
105
scripts/index_ultimate.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os, re, json, h5py
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from tqdm import tqdm
|
||||
|
||||
DATA_ROOTS = [Path("/mnt/kingston"), Path("/mnt/data_sdb1")]
|
||||
POSITIONS_CSV = Path("/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv")
|
||||
OUTPUT_INDEX = Path("/mnt/kingston/seismic_webapp/data/index.json")
|
||||
SAMPLE_RATE = 200
|
||||
|
||||
def load_pos():
|
||||
positions = {}
|
||||
if not POSITIONS_CSV.exists(): return {}
|
||||
with open(POSITIONS_CSV, 'r', encoding='utf-8', errors='replace') as f:
|
||||
lines = f.readlines()
|
||||
if len(lines) < 5: return {}
|
||||
headers = lines[3].strip().split(',')
|
||||
try:
|
||||
ni = headers.index('NodeCode')
|
||||
ei = headers.index('Aslaid Easting') if 'Aslaid Easting' in headers else headers.index('Preplot Easting')
|
||||
oi = headers.index('Aslaid Northing') if 'Aslaid Northing' in headers else headers.index('Preplot Northing')
|
||||
except: return {}
|
||||
for line in lines[4:]:
|
||||
parts = line.strip().split(',')
|
||||
try:
|
||||
nid = parts[ni].strip()
|
||||
positions[nid] = {
|
||||
'easting': float(parts[ei]),
|
||||
'northing': float(parts[oi]),
|
||||
'depth': float(parts[headers.index('Aslaid Depth')]) if 'Aslaid Depth' in headers else 0.0
|
||||
}
|
||||
except: continue
|
||||
return positions
|
||||
|
||||
def scan():
|
||||
pos = load_pos()
|
||||
nodes = {}
|
||||
all_dates = set()
|
||||
file_count = 0
|
||||
|
||||
print("🔍 Scanning ONLY 'data' H5 files (ignoring 'aux')...")
|
||||
all_h5_files = []
|
||||
for root in DATA_ROOTS:
|
||||
all_h5_files.extend(list(root.rglob("*.h5")))
|
||||
|
||||
for h5_path in tqdm(all_h5_files):
|
||||
# FILTRE : Uniquement les fichiers contenant "data"
|
||||
if "_data_" not in h5_path.name.lower():
|
||||
continue
|
||||
|
||||
try:
|
||||
match = re.search(r'auto_(\d+)_(\d{6})_b(\d+)_.*?_(\d{10})\.h5$', h5_path.name)
|
||||
if not match: continue
|
||||
|
||||
julian_day = int(match.group(1))
|
||||
time_str = match.group(2)
|
||||
node_id = match.group(3)
|
||||
|
||||
date_ref = datetime(2020, 1, 1) + timedelta(days=julian_day - 1)
|
||||
date_str = date_ref.strftime('%Y-%m-%d')
|
||||
|
||||
h, m, s = int(time_str[:2]), int(time_str[2:4]), int(time_str[4:6])
|
||||
actual_start_ts = int(datetime(2020, 1, 1).timestamp() + (julian_day - 1) * 86400 + h * 3600 + m * 60 + s)
|
||||
|
||||
with h5py.File(h5_path, 'r') as f:
|
||||
if 'adc_values' not in f: continue
|
||||
duration = f['adc_values'].shape[0] / SAMPLE_RATE
|
||||
actual_end_ts = actual_start_ts + duration
|
||||
|
||||
all_dates.add(date_str)
|
||||
|
||||
if node_id not in nodes:
|
||||
nodes[node_id] = {
|
||||
'id': node_id,
|
||||
'position': pos.get(node_id),
|
||||
'files': []
|
||||
}
|
||||
|
||||
# On extrait le canal du nom de fichier pour un matching plus précis
|
||||
channel_match = re.search(r'_ch(\d+)_', h5_path.name)
|
||||
channel = f"ch{channel_match.group(1)}" if channel_match else "ch0"
|
||||
|
||||
nodes[node_id]['files'].append({
|
||||
'path': str(h5_path),
|
||||
'start': actual_start_ts,
|
||||
'end': actual_end_ts,
|
||||
'julian': julian_day,
|
||||
'channel': channel # Canal spécifique au fichier
|
||||
})
|
||||
file_count += 1
|
||||
except: continue
|
||||
|
||||
result = {
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'sample_rate_hz': SAMPLE_RATE,
|
||||
'nodes': nodes,
|
||||
'dates': sorted(list(all_dates))
|
||||
}
|
||||
|
||||
with open(OUTPUT_INDEX, 'w') as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
print(f"✅ Index updated: {file_count} 'data' files, {len(nodes)} nodes.")
|
||||
|
||||
if __name__ == '__main__': scan()
|
||||
158
scripts/inventory_h5.py
Executable file
158
scripts/inventory_h5.py
Executable file
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
Script d'inventaire des fichiers HDF5.
|
||||
Extrait les timestamps des noms de fichiers et génère un rapport.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
# Dossiers racine
|
||||
DATA_ROOTS = [
|
||||
Path(r"F:\2020-09-12"),
|
||||
Path(r"F:\2020-09-13"),
|
||||
Path(r"F:\2020-09-14"),
|
||||
Path(r"F:\2020-09-15"),
|
||||
Path(r"F:\2020-09-16"),
|
||||
Path(r"F:\2020-09-17"),
|
||||
Path(r"F:\2020-09-18"),
|
||||
Path(r"F:\2020-09-19"),
|
||||
Path(r"F:\2020-09-21"),
|
||||
Path(r"F:\2020-09-22"),
|
||||
Path(r"F:\2020-09-23"),
|
||||
]
|
||||
|
||||
# Pattern pour extraire node_id et timestamp
|
||||
# Exemple: auto_256_070617_b67_14_025708_data_rsn6027_seq1_ch0_1599057453.h5
|
||||
PATTERN = re.compile(r'_b(\d+)_.*?(\d{10})\.h5$', re.IGNORECASE)
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("INVENTAIRE DES FICHIERS HDF5")
|
||||
print("=" * 70)
|
||||
|
||||
# Structure: folder -> node_id -> list of (timestamp, filename, type)
|
||||
inventory = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
# Stats globales
|
||||
total_files = 0
|
||||
total_size = 0
|
||||
nodes_set = set()
|
||||
timestamps_set = set()
|
||||
|
||||
for root in DATA_ROOTS:
|
||||
if not root.exists():
|
||||
continue
|
||||
|
||||
folder_name = root.name
|
||||
|
||||
for h5_file in root.rglob("*.h5"):
|
||||
match = PATTERN.search(h5_file.name)
|
||||
if not match:
|
||||
continue
|
||||
|
||||
node_id = match.group(1)
|
||||
timestamp = int(match.group(2))
|
||||
|
||||
# Déterminer le type (data ou aux)
|
||||
file_type = "data" if "_data_" in h5_file.name else "aux" if "_aux_" in h5_file.name else "unknown"
|
||||
|
||||
# Extraire le channel si présent
|
||||
ch_match = re.search(r'_ch(\d+)_', h5_file.name)
|
||||
channel = f"ch{ch_match.group(1)}" if ch_match else "?"
|
||||
|
||||
file_size = h5_file.stat().st_size
|
||||
|
||||
inventory[folder_name][node_id].append({
|
||||
'timestamp': timestamp,
|
||||
'datetime': datetime.fromtimestamp(timestamp),
|
||||
'type': file_type,
|
||||
'channel': channel,
|
||||
'filename': h5_file.name,
|
||||
'size': file_size
|
||||
})
|
||||
|
||||
total_files += 1
|
||||
total_size += file_size
|
||||
nodes_set.add(node_id)
|
||||
timestamps_set.add(timestamp)
|
||||
|
||||
# Rapport par dossier
|
||||
print(f"\n{'DOSSIER':<15} {'NODES':<10} {'FICHIERS':<10} {'TAILLE':<15}")
|
||||
print("-" * 50)
|
||||
|
||||
for folder in sorted(inventory.keys()):
|
||||
folder_data = inventory[folder]
|
||||
n_nodes = len(folder_data)
|
||||
n_files = sum(len(files) for files in folder_data.values())
|
||||
folder_size = sum(f['size'] for files in folder_data.values() for f in files)
|
||||
print(f"{folder:<15} {n_nodes:<10} {n_files:<10} {folder_size / 1e9:.2f} GB")
|
||||
|
||||
# Stats globales
|
||||
print("\n" + "=" * 70)
|
||||
print("STATISTIQUES GLOBALES")
|
||||
print("=" * 70)
|
||||
print(f"Fichiers H5 totaux: {total_files}")
|
||||
print(f"Taille totale: {total_size / 1e9:.2f} GB")
|
||||
print(f"Nodes uniques: {len(nodes_set)}")
|
||||
|
||||
# Plage temporelle
|
||||
if timestamps_set:
|
||||
min_ts = min(timestamps_set)
|
||||
max_ts = max(timestamps_set)
|
||||
print(f"\nPlage temporelle des données:")
|
||||
print(f" Début: {datetime.fromtimestamp(min_ts)} (timestamp: {min_ts})")
|
||||
print(f" Fin: {datetime.fromtimestamp(max_ts)} (timestamp: {max_ts})")
|
||||
|
||||
# Détail par node (top 20)
|
||||
print("\n" + "=" * 70)
|
||||
print("DETAIL PAR NODE (nodes avec le plus de fichiers)")
|
||||
print("=" * 70)
|
||||
|
||||
# Agréger par node
|
||||
node_stats = defaultdict(lambda: {'files': 0, 'size': 0, 'timestamps': set(), 'folders': set()})
|
||||
|
||||
for folder, folder_data in inventory.items():
|
||||
for node_id, files in folder_data.items():
|
||||
node_stats[node_id]['files'] += len(files)
|
||||
node_stats[node_id]['size'] += sum(f['size'] for f in files)
|
||||
node_stats[node_id]['timestamps'].update(f['timestamp'] for f in files)
|
||||
node_stats[node_id]['folders'].add(folder)
|
||||
|
||||
# Trier par nombre de fichiers
|
||||
sorted_nodes = sorted(node_stats.items(), key=lambda x: x[1]['files'], reverse=True)
|
||||
|
||||
print(f"\n{'NODE':<8} {'FICHIERS':<10} {'TAILLE':<12} {'DATES':<25} {'DOSSIERS'}")
|
||||
print("-" * 90)
|
||||
|
||||
for node_id, stats in sorted_nodes[:30]:
|
||||
ts_list = sorted(stats['timestamps'])
|
||||
if ts_list:
|
||||
date_range = f"{datetime.fromtimestamp(ts_list[0]).strftime('%Y-%m-%d %H:%M')} -> {datetime.fromtimestamp(ts_list[-1]).strftime('%H:%M')}"
|
||||
else:
|
||||
date_range = "N/A"
|
||||
|
||||
folders = ", ".join(sorted(stats['folders']))
|
||||
print(f"b{node_id:<7} {stats['files']:<10} {stats['size']/1e6:.1f} MB {date_range:<25} {folders}")
|
||||
|
||||
# Dates uniques (jours)
|
||||
print("\n" + "=" * 70)
|
||||
print("JOURS DE DONNEES DISPONIBLES (basé sur timestamps)")
|
||||
print("=" * 70)
|
||||
|
||||
days = set()
|
||||
for ts in timestamps_set:
|
||||
days.add(datetime.fromtimestamp(ts).strftime('%Y-%m-%d'))
|
||||
|
||||
for day in sorted(days):
|
||||
# Compter les fichiers pour ce jour
|
||||
day_files = sum(1 for ts in timestamps_set
|
||||
if datetime.fromtimestamp(ts).strftime('%Y-%m-%d') == day)
|
||||
print(f" {day}: ~{day_files} timestamps uniques")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
45
scripts/migrate_all.py
Normal file
45
scripts/migrate_all.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import json, psycopg2, os
|
||||
from pathlib import Path
|
||||
from migrate_to_db import migrate_file
|
||||
|
||||
INDEX_PATH = "/mnt/kingston/seismic_webapp/data/index.json"
|
||||
DB_URL = "postgresql://postgres:seismic_pass@db:5432/seismic_data"
|
||||
|
||||
def update_status(processed, total, current):
|
||||
try:
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
cur.execute("UPDATE migration_status SET processed_files = %s, total_files = %s, current_file = %s, last_update = NOW() WHERE id = 1", (processed, total, current))
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"Status update error: {e}")
|
||||
|
||||
def main():
|
||||
with open(INDEX_PATH, 'r') as f:
|
||||
index = json.load(f)
|
||||
|
||||
nodes = index.get('nodes', {})
|
||||
all_files = []
|
||||
for nid, node in nodes.items():
|
||||
for f in node.get('files', []):
|
||||
if '_data_' in f['path']:
|
||||
all_files.append((nid, f))
|
||||
|
||||
total = len(all_files)
|
||||
print(f"Starting migration for {total} files...")
|
||||
|
||||
for i, (nid, f) in enumerate(all_files):
|
||||
filename = os.path.basename(f['path'])
|
||||
update_status(i, total, filename)
|
||||
try:
|
||||
# Migration de 1h de chaque fichier
|
||||
migrate_file(f['path'], nid, f.get('channel', 'ch0'), duration_sec=3600)
|
||||
except Exception as e:
|
||||
print(f"Error migrating {filename}: {e}")
|
||||
|
||||
update_status(total, total, "Terminé")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
53
scripts/migrate_to_db.py
Normal file
53
scripts/migrate_to_db.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import h5py
|
||||
import numpy as np
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_values
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
|
||||
DB_URL = "postgresql://postgres:seismic_pass@db:5432/seismic_data"
|
||||
|
||||
def fix_path(p):
|
||||
p = p.replace('\\', '/')
|
||||
if p.startswith('F:/'): return '/mnt/kingston/' + p[3:]
|
||||
if p.startswith('E:/'): return '/mnt/data_sdb1/' + p[3:]
|
||||
return p
|
||||
|
||||
def migrate_file(h5_path, node_id, channel, start_offset_sec=0, duration_sec=3600):
|
||||
h5_path = fix_path(h5_path)
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
with h5py.File(h5_path, 'r') as f:
|
||||
ds = f['adc_values']
|
||||
start_ts = int(ds.attrs['timestamp'])
|
||||
# On calcule le début réel
|
||||
actual_start = start_ts + start_offset_sec
|
||||
start_idx = start_offset_sec * 200
|
||||
end_idx = start_idx + (duration_sec * 200)
|
||||
|
||||
data = ds[start_idx:end_idx]
|
||||
print(f"Migrating {len(data)} samples...")
|
||||
|
||||
# Préparation des tuples pour insertion par lots
|
||||
batch_size = 10000
|
||||
for i in range(0, len(data), batch_size):
|
||||
batch = data[i:i+batch_size]
|
||||
values = []
|
||||
for j, val in enumerate(batch):
|
||||
ts = datetime.fromtimestamp(actual_start + (i + j) / 200, tz=timezone.utc)
|
||||
values.append((ts, node_id, channel, float(val)))
|
||||
|
||||
execute_values(cur, "INSERT INTO adc_samples (time, node_id, channel, value) VALUES %s", values)
|
||||
conn.commit()
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test sur Node 193, 1er septembre (Julian 245), 10 minutes
|
||||
# On cherche un fichier du node 193
|
||||
import sys
|
||||
migrate_file(sys.argv[1], sys.argv[2], sys.argv[3], duration_sec=600)
|
||||
62
scripts/precompute_all.py
Normal file
62
scripts/precompute_all.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import json, sys, os, numpy as np, h5py, re
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from tqdm import tqdm
|
||||
|
||||
INDEX_PATH = Path("/mnt/kingston/seismic_webapp/data/index.json")
|
||||
OUTPUT_DIR = Path("/mnt/kingston/seismic_webapp/data/rms_cache")
|
||||
SAMPLE_RATE = 200
|
||||
|
||||
def fix_path(p):
|
||||
p = p.replace('\\', '/')
|
||||
if p.startswith('F:/'): return '/mnt/kingston/' + p[3:]
|
||||
if p.startswith('E:/'): return '/mnt/data_sdb1/' + p[3:]
|
||||
return p
|
||||
|
||||
def compute_rms(h5_path):
|
||||
h5_path = fix_path(h5_path)
|
||||
if not os.path.exists(h5_path): return None
|
||||
try:
|
||||
# Extraire le timestamp réel du nom de fichier (Julian day)
|
||||
match = re.search(r'auto_(\d+)_(\d{6})_b', os.path.basename(h5_path))
|
||||
if not match: return None
|
||||
julian, time_str = int(match.group(1)), match.group(2)
|
||||
h, m, s = int(time_str[:2]), int(time_str[2:4]), int(time_str[4:6])
|
||||
start_ts = int(datetime(2020, 1, 1).timestamp() + (julian - 1) * 86400 + h * 3600 + m * 60 + s)
|
||||
|
||||
with h5py.File(h5_path, 'r') as f:
|
||||
ds = f['adc_values']
|
||||
samples = ds[0:5000]
|
||||
rms = float(np.sqrt(np.mean(samples.astype(np.float64)**2)))
|
||||
return [{'ts': start_ts, 'rms': rms}]
|
||||
except: return None
|
||||
|
||||
def main():
|
||||
with open(INDEX_PATH, 'r') as f: index = json.load(f)
|
||||
nodes = index.get('nodes', {})
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for date in index['dates']:
|
||||
channel = "ch0"
|
||||
output_file = OUTPUT_DIR / f"rms_{{date}}_{{channel}}.json"
|
||||
print(f"Processing {date}...")
|
||||
results = {}
|
||||
for nid, node in tqdm(nodes.items(), desc=f"Nodes {date}"):
|
||||
files = node.get('files', [])
|
||||
# Filtrer les fichiers par Julian Day correspondant à la date
|
||||
dt = datetime.strptime(date, '%Y-%m-%d')
|
||||
target_julian = dt.timetuple().tm_yday
|
||||
|
||||
target = next((f for f in files if f['julian'] == target_julian and f'_{{channel}}_' in f['path']), None)
|
||||
if not target and files:
|
||||
target = next((f for f in files if f['julian'] == target_julian), None)
|
||||
|
||||
if target:
|
||||
data = compute_rms(target['path'])
|
||||
if data: results[nid] = data
|
||||
|
||||
if results:
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump({'date':date, 'channel':channel, 'nodes':results}, f)
|
||||
|
||||
if __name__ == '__main__': main()
|
||||
189
scripts/precompute_rms.py
Executable file
189
scripts/precompute_rms.py
Executable file
@@ -0,0 +1,189 @@
|
||||
"""
|
||||
Pré-calcul des valeurs RMS ADC pour tous les nodes.
|
||||
Génère un fichier JSON avec les RMS à intervalles réguliers pour une lecture rapide.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any
|
||||
import numpy as np
|
||||
import h5py
|
||||
from tqdm import tqdm
|
||||
|
||||
# Configuration
|
||||
SAMPLE_RATE = 200 # Hz
|
||||
RMS_INTERVAL_SEC = 60 # Calculer RMS toutes les 60 secondes (plus rapide)
|
||||
RMS_WINDOW_SEC = 5 # Fenêtre de calcul RMS (5 secondes = 1000 samples)
|
||||
|
||||
INDEX_PATH = Path(r"F:\seismic_webapp\data\index.json")
|
||||
OUTPUT_DIR = Path(r"F:\seismic_webapp\data\rms_cache")
|
||||
|
||||
|
||||
def compute_rms_for_file(h5_path: str, interval_sec: int = RMS_INTERVAL_SEC, window_sec: int = RMS_WINDOW_SEC, max_duration_sec: int = 3600) -> List[Dict]:
|
||||
"""
|
||||
Calcule les valeurs RMS à intervalles réguliers pour un fichier HDF5.
|
||||
Retourne une liste de {timestamp, rms}
|
||||
|
||||
max_duration_sec: Limite à traiter (en secondes) pour accélérer
|
||||
"""
|
||||
results = []
|
||||
|
||||
try:
|
||||
with h5py.File(h5_path, 'r') as f:
|
||||
if 'adc_values' not in f:
|
||||
return results
|
||||
|
||||
dataset = f['adc_values']
|
||||
total_samples = dataset.shape[0]
|
||||
|
||||
# Récupérer le timestamp de début
|
||||
start_ts = None
|
||||
if 'timestamp' in dataset.attrs:
|
||||
start_ts = int(dataset.attrs['timestamp'])
|
||||
|
||||
if start_ts is None:
|
||||
return results
|
||||
|
||||
# Calculer RMS à intervalles réguliers
|
||||
window_samples = window_sec * SAMPLE_RATE
|
||||
interval_samples = interval_sec * SAMPLE_RATE
|
||||
|
||||
# Limiter la durée pour accélérer
|
||||
max_samples = min(total_samples, max_duration_sec * SAMPLE_RATE)
|
||||
|
||||
for idx in range(0, max_samples - window_samples, interval_samples):
|
||||
# Lire uniquement la fenêtre nécessaire
|
||||
samples = dataset[idx:idx + window_samples]
|
||||
|
||||
# Calculer RMS
|
||||
rms = float(np.sqrt(np.mean(samples.astype(np.float64) ** 2)))
|
||||
|
||||
# Timestamp pour ce point
|
||||
ts = start_ts + (idx // SAMPLE_RATE)
|
||||
|
||||
results.append({
|
||||
'ts': ts,
|
||||
'rms': rms
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Erreur lecture {h5_path}: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def precompute_for_date(index: Dict, date: str, channel: str = 'ch0') -> Dict[str, List[Dict]]:
|
||||
"""
|
||||
Pré-calcule les RMS pour tous les nodes pour une date donnée.
|
||||
Retourne {node_id: [{ts, rms}, ...]}
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Trouver tous les nodes avec données pour cette date
|
||||
nodes_with_data = []
|
||||
for node_id, node in index['nodes'].items():
|
||||
if node.get('dates') and date in node['dates']:
|
||||
nodes_with_data.append((node_id, node['dates'][date]))
|
||||
|
||||
print(f"Traitement de {len(nodes_with_data)} nodes pour {date}, canal {channel}")
|
||||
|
||||
for node_id, files in tqdm(nodes_with_data, desc=f"Date {date}"):
|
||||
# Trouver le fichier pour le canal demandé (priorité aux fichiers "data")
|
||||
channel_pattern = f'_{channel}_'
|
||||
target_file = None
|
||||
|
||||
for f in files:
|
||||
if channel_pattern in f['path'] and '_data_' in f['path']:
|
||||
target_file = f
|
||||
break
|
||||
|
||||
if not target_file:
|
||||
for f in files:
|
||||
if channel_pattern in f['path']:
|
||||
target_file = f
|
||||
break
|
||||
|
||||
if not target_file:
|
||||
continue
|
||||
|
||||
# Calculer les RMS
|
||||
rms_data = compute_rms_for_file(target_file['path'])
|
||||
|
||||
if rms_data:
|
||||
results[node_id] = rms_data
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Pré-calcul des RMS ADC')
|
||||
parser.add_argument('--date', help='Date spécifique (ex: 2020-09-02)')
|
||||
parser.add_argument('--channel', default='ch0', help='Canal (ch0-ch3)')
|
||||
parser.add_argument('--all', action='store_true', help='Traiter toutes les dates/canaux')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Charger l'index
|
||||
if not INDEX_PATH.exists():
|
||||
print(f"Index non trouvé: {INDEX_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
with open(INDEX_PATH, 'r') as f:
|
||||
index = json.load(f)
|
||||
|
||||
print(f"Index chargé: {len(index['nodes'])} nodes, {len(index['dates'])} dates")
|
||||
|
||||
# Créer le dossier de sortie
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Déterminer quoi traiter
|
||||
if args.date:
|
||||
dates_to_process = [args.date]
|
||||
channels_to_process = [args.channel]
|
||||
elif args.all:
|
||||
dates_to_process = index['dates']
|
||||
channels_to_process = ['ch0', 'ch1', 'ch2', 'ch3']
|
||||
else:
|
||||
# Par défaut, traiter la première date disponible, canal ch0
|
||||
dates_to_process = [index['dates'][0]] if index['dates'] else []
|
||||
channels_to_process = ['ch0']
|
||||
|
||||
for date in dates_to_process:
|
||||
for channel in channels_to_process:
|
||||
output_file = OUTPUT_DIR / f"rms_{date}_{channel}.json"
|
||||
|
||||
# Skip si déjà calculé
|
||||
if output_file.exists():
|
||||
print(f"Skip {output_file.name} (déjà existant)")
|
||||
continue
|
||||
|
||||
print(f"\n=== Traitement {date} - {channel} ===")
|
||||
|
||||
results = precompute_for_date(index, date, channel)
|
||||
|
||||
if results:
|
||||
# Sauvegarder
|
||||
output_data = {
|
||||
'date': date,
|
||||
'channel': channel,
|
||||
'interval_sec': RMS_INTERVAL_SEC,
|
||||
'window_sec': RMS_WINDOW_SEC,
|
||||
'nodes': results,
|
||||
'generated_at': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(output_data, f)
|
||||
|
||||
print(f"Sauvegardé: {output_file.name} ({len(results)} nodes)")
|
||||
else:
|
||||
print(f"Aucune donnée pour {date} - {channel}")
|
||||
|
||||
print("\n=== Terminé ===")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
83
scripts/rebuild_h5_db.py
Executable file
83
scripts/rebuild_h5_db.py
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Rebuild H5 metadata database for the seismic viewer."""
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
H5_ROOTS = [
|
||||
'/mnt/data_sdb1',
|
||||
'/mnt/kingston'
|
||||
]
|
||||
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
|
||||
FILE_PATTERN = re.compile(r'b(\d+)_.*_ch(\d+)')
|
||||
|
||||
SCHEMA = [
|
||||
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT)',
|
||||
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
|
||||
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
|
||||
]
|
||||
|
||||
|
||||
def rebuild_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
for stmt in SCHEMA:
|
||||
cur.execute(stmt)
|
||||
|
||||
cur.execute('DELETE FROM files')
|
||||
cur.execute('DELETE FROM positions')
|
||||
|
||||
files_counter = 0
|
||||
summary = {}
|
||||
|
||||
for root in H5_ROOTS:
|
||||
for dirpath, _, filenames in os.walk(root):
|
||||
for filename in filenames:
|
||||
if not filename.endswith('.h5'):
|
||||
continue
|
||||
filepath = os.path.join(dirpath, filename)
|
||||
match = FILE_PATTERN.search(filename)
|
||||
if not match:
|
||||
continue
|
||||
|
||||
node_code = int(match.group(1))
|
||||
channel = int(match.group(2))
|
||||
dataset = 'aux' if 'aux' in filename else 'data'
|
||||
stat = os.stat(filepath)
|
||||
mtime = int(stat.st_mtime)
|
||||
size = stat.st_size
|
||||
|
||||
summary.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
|
||||
summary[node_code]['count'] += 1
|
||||
summary[node_code]['last'] = max(summary[node_code]['last'], mtime)
|
||||
if dataset == 'data':
|
||||
summary[node_code]['data'] = True
|
||||
else:
|
||||
summary[node_code]['aux'] = True
|
||||
|
||||
cur.execute(
|
||||
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
|
||||
(filepath, node_code, channel, dataset, size, mtime)
|
||||
)
|
||||
files_counter += 1
|
||||
|
||||
print(f"Indexed {files_counter} H5 files")
|
||||
|
||||
for node_code, stats in summary.items():
|
||||
has_data = 1 if stats['data'] else 0
|
||||
has_aux = 1 if stats['aux'] else 0
|
||||
last_seen = datetime.utcfromtimestamp(stats['last']).isoformat()
|
||||
cur.execute(
|
||||
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen) VALUES (?, ?, ?, ?, ?)',
|
||||
(node_code, has_data, has_aux, stats['count'], last_seen)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"Rebuilt DB at {DB_PATH} with {len(summary)} positions")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
rebuild_db()
|
||||
104
scripts/rebuild_h5_db_v2.py
Executable file
104
scripts/rebuild_h5_db_v2.py
Executable file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Rebuild H5 metadata database - V2 (capture ALL patterns)."""
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
H5_ROOTS = ['/mnt/data_sdb1', '/mnt/kingston']
|
||||
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
|
||||
|
||||
# Pattern plus permissif - capture TOUS les b###
|
||||
FILE_PATTERN = re.compile(r'b(\d+)')
|
||||
CHANNEL_PATTERN = re.compile(r'ch(\d+)')
|
||||
|
||||
SCHEMA = [
|
||||
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT)',
|
||||
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
|
||||
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
|
||||
]
|
||||
|
||||
def rebuild_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
for stmt in SCHEMA:
|
||||
cur.execute(stmt)
|
||||
|
||||
cur.execute('DELETE FROM files')
|
||||
cur.execute('DELETE FROM positions')
|
||||
|
||||
files_counter = 0
|
||||
summary = {}
|
||||
|
||||
for root in H5_ROOTS:
|
||||
for dirpath, _, filenames in os.walk(root):
|
||||
for filename in filenames:
|
||||
if not filename.endswith('.h5'):
|
||||
continue
|
||||
|
||||
filepath = os.path.join(dirpath, filename)
|
||||
|
||||
# Extraire node_code
|
||||
node_match = FILE_PATTERN.search(filename)
|
||||
if not node_match:
|
||||
continue
|
||||
|
||||
node_code = int(node_match.group(1))
|
||||
|
||||
# Extraire channel (peut ne pas exister)
|
||||
channel_match = CHANNEL_PATTERN.search(filename)
|
||||
channel = int(channel_match.group(1)) if channel_match else -1
|
||||
|
||||
# Déterminer dataset (data vs aux)
|
||||
dataset = 'aux' if 'aux' in filename else 'data'
|
||||
|
||||
stat = os.stat(filepath)
|
||||
mtime = int(stat.st_mtime)
|
||||
size = stat.st_size
|
||||
|
||||
# Mise à jour summary
|
||||
summary.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
|
||||
summary[node_code]['count'] += 1
|
||||
summary[node_code]['last'] = max(summary[node_code]['last'], mtime)
|
||||
if dataset == 'data':
|
||||
summary[node_code]['data'] = True
|
||||
else:
|
||||
summary[node_code]['aux'] = True
|
||||
|
||||
# Insertion fichier
|
||||
cur.execute(
|
||||
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
|
||||
(filepath, node_code, channel, dataset, size, mtime)
|
||||
)
|
||||
files_counter += 1
|
||||
|
||||
print(f"✓ Indexed {files_counter} H5 files")
|
||||
|
||||
# Insertion positions
|
||||
for node_code, stats in summary.items():
|
||||
has_data = 1 if stats['data'] else 0
|
||||
has_aux = 1 if stats['aux'] else 0
|
||||
last_seen = datetime.fromtimestamp(stats['last']).isoformat()
|
||||
cur.execute(
|
||||
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen) VALUES (?, ?, ?, ?, ?)',
|
||||
(node_code, has_data, has_aux, stats['count'], last_seen)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
# Stats finales
|
||||
total_positions = len(summary)
|
||||
with_data = sum(1 for s in summary.values() if s['data'])
|
||||
with_aux = sum(1 for s in summary.values() if s['aux'])
|
||||
|
||||
print(f"✓ Rebuilt DB: {total_positions} positions total")
|
||||
print(f" • With data files: {with_data}")
|
||||
print(f" • With aux files: {with_aux}")
|
||||
print(f" • Both: {sum(1 for s in summary.values() if s['data'] and s['aux'])}")
|
||||
print(f" • Coverage: {(with_data/205*100):.1f}% (assuming 205 planned)")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
rebuild_db()
|
||||
137
scripts/rebuild_h5_db_v3.py
Executable file
137
scripts/rebuild_h5_db_v3.py
Executable file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Rebuild H5 metadata database - V3 (include expected positions from CSV)."""
|
||||
import os
|
||||
import re
|
||||
import csv
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
H5_ROOTS = ['/mnt/data_sdb1', '/mnt/kingston']
|
||||
CSV_PATH = '/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv'
|
||||
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
|
||||
|
||||
FILE_PATTERN = re.compile(r'b(\d+)')
|
||||
CHANNEL_PATTERN = re.compile(r'ch(\d+)')
|
||||
|
||||
SCHEMA = [
|
||||
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT, expected BOOLEAN)',
|
||||
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
|
||||
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
|
||||
]
|
||||
|
||||
def rebuild_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
for stmt in SCHEMA:
|
||||
cur.execute(stmt)
|
||||
|
||||
cur.execute('DELETE FROM files')
|
||||
cur.execute('DELETE FROM positions')
|
||||
|
||||
# 1. Charger les positions attendues depuis le CSV
|
||||
expected_nodes = set()
|
||||
try:
|
||||
with open(CSV_PATH, 'r', encoding='utf-8-sig') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
node_code = row.get('NodeCode', '').strip()
|
||||
if node_code and node_code.isdigit():
|
||||
expected_nodes.add(int(node_code))
|
||||
print(f"✓ Loaded {len(expected_nodes)} expected positions from CSV")
|
||||
except Exception as e:
|
||||
print(f"⚠ CSV not found or error: {e}")
|
||||
print(" Continuing with file scan only...")
|
||||
|
||||
# 2. Scanner les fichiers H5
|
||||
files_counter = 0
|
||||
found_nodes = {}
|
||||
|
||||
for root in H5_ROOTS:
|
||||
for dirpath, _, filenames in os.walk(root):
|
||||
for filename in filenames:
|
||||
if not filename.endswith('.h5'):
|
||||
continue
|
||||
|
||||
filepath = os.path.join(dirpath, filename)
|
||||
|
||||
node_match = FILE_PATTERN.search(filename)
|
||||
if not node_match:
|
||||
continue
|
||||
|
||||
node_code = int(node_match.group(1))
|
||||
channel_match = CHANNEL_PATTERN.search(filename)
|
||||
channel = int(channel_match.group(1)) if channel_match else -1
|
||||
dataset = 'aux' if 'aux' in filename else 'data'
|
||||
|
||||
stat = os.stat(filepath)
|
||||
mtime = int(stat.st_mtime)
|
||||
size = stat.st_size
|
||||
|
||||
found_nodes.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
|
||||
found_nodes[node_code]['count'] += 1
|
||||
found_nodes[node_code]['last'] = max(found_nodes[node_code]['last'], mtime)
|
||||
if dataset == 'data':
|
||||
found_nodes[node_code]['data'] = True
|
||||
else:
|
||||
found_nodes[node_code]['aux'] = True
|
||||
|
||||
cur.execute(
|
||||
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
|
||||
(filepath, node_code, channel, dataset, size, mtime)
|
||||
)
|
||||
files_counter += 1
|
||||
|
||||
print(f"✓ Indexed {files_counter} H5 files")
|
||||
print(f"✓ Found {len(found_nodes)} positions with data")
|
||||
|
||||
# 3. Créer les entrées pour TOUTES les positions (attendues + trouvées)
|
||||
all_nodes = expected_nodes | set(found_nodes.keys())
|
||||
|
||||
for node_code in all_nodes:
|
||||
is_expected = node_code in expected_nodes
|
||||
|
||||
if node_code in found_nodes:
|
||||
stats = found_nodes[node_code]
|
||||
has_data = 1 if stats['data'] else 0
|
||||
has_aux = 1 if stats['aux'] else 0
|
||||
last_seen = datetime.fromtimestamp(stats['last']).isoformat()
|
||||
sample_count = stats['count']
|
||||
else:
|
||||
# Position attendue mais sans données
|
||||
has_data = 0
|
||||
has_aux = 0
|
||||
last_seen = None
|
||||
sample_count = 0
|
||||
|
||||
cur.execute(
|
||||
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen, expected) VALUES (?, ?, ?, ?, ?, ?)',
|
||||
(node_code, has_data, has_aux, sample_count, last_seen, 1 if is_expected else 0)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
# Stats finales
|
||||
cur.execute('SELECT COUNT(*) FROM positions')
|
||||
total = cur.fetchone()[0]
|
||||
|
||||
cur.execute('SELECT COUNT(*) FROM positions WHERE has_data = 1')
|
||||
with_data = cur.fetchone()[0]
|
||||
|
||||
cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1')
|
||||
expected_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1 AND has_data = 0')
|
||||
missing = cur.fetchone()[0]
|
||||
|
||||
print(f"\n📊 Database Summary:")
|
||||
print(f" • Total positions in DB: {total}")
|
||||
print(f" • Expected (from CSV): {expected_count}")
|
||||
print(f" • With H5 data: {with_data}")
|
||||
print(f" • Missing (expected but no data): {missing}")
|
||||
print(f" • Coverage: {(with_data/expected_count*100 if expected_count else 0):.1f}%")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
rebuild_db()
|
||||
114
scripts/rebuild_index.py
Executable file
114
scripts/rebuild_index.py
Executable file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Reconstruit l'index.json de la webapp à partir de l'inventaire complet.
|
||||
Prend en compte tous les fichiers HDF5 sur tous les disques.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
def main():
|
||||
# Charger l'inventaire
|
||||
inv_path = Path(r'F:\seismic_webapp\inventory.json')
|
||||
inv = json.load(open(inv_path))
|
||||
print(f"Inventaire charge: {len(inv)} fichiers")
|
||||
|
||||
# Charger l'index existant pour garder les positions
|
||||
idx_path = Path(r'F:\seismic_webapp\data\index.json')
|
||||
old_idx = json.load(open(idx_path))
|
||||
print(f"Index existant: {len(old_idx.get('nodes', {}))} nodes")
|
||||
|
||||
# Construire le nouvel index
|
||||
nodes = {}
|
||||
|
||||
# Copier les positions existantes
|
||||
for node_id, node_data in old_idx.get('nodes', {}).items():
|
||||
nodes[node_id] = {
|
||||
'position': node_data.get('position'),
|
||||
'dates': {},
|
||||
'hasDates': False
|
||||
}
|
||||
|
||||
# Ajouter les fichiers de l'inventaire
|
||||
files_added = 0
|
||||
for f in inv:
|
||||
bumper_id = f['bumper_id']
|
||||
if not bumper_id:
|
||||
continue
|
||||
|
||||
# Créer le node s'il n'existe pas
|
||||
if bumper_id not in nodes:
|
||||
nodes[bumper_id] = {
|
||||
'position': None,
|
||||
'dates': {},
|
||||
'hasDates': False
|
||||
}
|
||||
|
||||
# Calculer la date depuis l'epoch
|
||||
if f['epoch_time']:
|
||||
dt = datetime.fromtimestamp(f['epoch_time'])
|
||||
date_str = dt.strftime('%Y-%m-%d')
|
||||
else:
|
||||
continue
|
||||
|
||||
# Ajouter à la liste des dates
|
||||
if date_str not in nodes[bumper_id]['dates']:
|
||||
nodes[bumper_id]['dates'][date_str] = []
|
||||
|
||||
# Déterminer les canaux (extraire du nom de fichier)
|
||||
channel = f['channel']
|
||||
channels = [channel] if channel else []
|
||||
|
||||
# Ajouter le fichier
|
||||
file_info = {
|
||||
'path': f['filepath'],
|
||||
'timestamp': f['epoch_time'],
|
||||
'channels': channels,
|
||||
'size_bytes': 0 # On n'a pas cette info
|
||||
}
|
||||
|
||||
# Éviter les doublons
|
||||
existing_paths = [fi['path'] for fi in nodes[bumper_id]['dates'][date_str]]
|
||||
if f['filepath'] not in existing_paths:
|
||||
nodes[bumper_id]['dates'][date_str].append(file_info)
|
||||
files_added += 1
|
||||
|
||||
# Marquer les nodes qui ont des dates
|
||||
for node_id, node_data in nodes.items():
|
||||
node_data['hasDates'] = len(node_data['dates']) > 0
|
||||
|
||||
# Statistiques
|
||||
nodes_with_data = sum(1 for n in nodes.values() if n['hasDates'])
|
||||
total_files = sum(
|
||||
len(files)
|
||||
for n in nodes.values()
|
||||
for files in n['dates'].values()
|
||||
)
|
||||
|
||||
print(f"\nNouvel index:")
|
||||
print(f" Nodes total: {len(nodes)}")
|
||||
print(f" Nodes avec donnees: {nodes_with_data}")
|
||||
print(f" Fichiers indexes: {total_files}")
|
||||
|
||||
# Sauvegarder
|
||||
new_idx = {
|
||||
'nodes': nodes,
|
||||
'sampleRateHz': old_idx.get('sampleRateHz', 200),
|
||||
'generated': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Backup de l'ancien
|
||||
backup_path = idx_path.with_suffix('.json.bak')
|
||||
with open(backup_path, 'w') as f:
|
||||
json.dump(old_idx, f)
|
||||
print(f"\nBackup sauvegarde: {backup_path}")
|
||||
|
||||
# Sauvegarder le nouveau
|
||||
with open(idx_path, 'w') as f:
|
||||
json.dump(new_idx, f, indent=2)
|
||||
print(f"Nouvel index sauvegarde: {idx_path}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
39
scripts/show_stats.py
Executable file
39
scripts/show_stats.py
Executable file
@@ -0,0 +1,39 @@
|
||||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
d = json.load(open(r'F:\seismic_webapp\inventory.json'))
|
||||
|
||||
by_channel = defaultdict(lambda: {'data': 0, 'aux': 0, 'bumpers': set()})
|
||||
for f in d:
|
||||
ch = f['channel'] or 'unknown'
|
||||
if f['file_type'] == 'data':
|
||||
by_channel[ch]['data'] += 1
|
||||
else:
|
||||
by_channel[ch]['aux'] += 1
|
||||
if f['bumper_id']:
|
||||
by_channel[ch]['bumpers'].add(f['bumper_id'])
|
||||
|
||||
print('=== RESUME PAR CANAL ===')
|
||||
print('Canal DATA AUX Bumpers')
|
||||
print('-' * 35)
|
||||
for ch in ['ch0', 'ch1', 'ch2', 'ch3', 'ch5', 'ch6', 'ch7', 'ch15', 'unknown']:
|
||||
if ch in by_channel:
|
||||
s = by_channel[ch]
|
||||
total = s['data'] + s['aux']
|
||||
print(f'{ch:8} {s["data"]:4} {s["aux"]:4} {len(s["bumpers"]):3}')
|
||||
|
||||
# Stats globales
|
||||
total_data = sum(s['data'] for s in by_channel.values())
|
||||
total_aux = sum(s['aux'] for s in by_channel.values())
|
||||
all_bumpers = set()
|
||||
for s in by_channel.values():
|
||||
all_bumpers.update(s['bumpers'])
|
||||
|
||||
print('-' * 35)
|
||||
print(f'TOTAL {total_data:4} {total_aux:4} {len(all_bumpers):3}')
|
||||
|
||||
errors = [f for f in d if f['error']]
|
||||
print(f'\nErreurs de lecture: {len(errors)} fichiers')
|
||||
if errors:
|
||||
for e in errors[:5]:
|
||||
print(f' - {e["filename"][:50]}...')
|
||||
31
scripts/test_hdf5.py
Executable file
31
scripts/test_hdf5.py
Executable file
@@ -0,0 +1,31 @@
|
||||
import h5py
|
||||
import numpy as np
|
||||
|
||||
# Test file
|
||||
filepath = r'F:\2020-09-22\data\auto_266_143513_b29_13_213605_data_rsn2648_seq1_ch0_1599039547.h5'
|
||||
|
||||
with h5py.File(filepath, 'r') as f:
|
||||
print("=== Structure du fichier ===")
|
||||
print("Datasets:", list(f.keys()))
|
||||
|
||||
if 'adc_values' in f:
|
||||
d = f['adc_values']
|
||||
print("\n=== Dataset adc_values ===")
|
||||
print("Shape:", d.shape)
|
||||
print("Dtype:", d.dtype)
|
||||
|
||||
print("\n=== Attributs du dataset ===")
|
||||
for k, v in d.attrs.items():
|
||||
print(f" {k}: {v}")
|
||||
|
||||
# Charger un échantillon
|
||||
sample = d[:2000]
|
||||
print("\n=== Statistiques (premiers 2000 samples) ===")
|
||||
print("Min:", np.min(sample))
|
||||
print("Max:", np.max(sample))
|
||||
print("Mean:", np.mean(sample))
|
||||
print("Std:", np.std(sample))
|
||||
print("RMS:", np.sqrt(np.mean(sample**2)))
|
||||
|
||||
print("\n=== Premiers 20 valeurs ===")
|
||||
print(sample[:20])
|
||||
Reference in New Issue
Block a user