Fix coverage: add /api/coverage route, remove stray gather code from loadCoverage
This commit is contained in:
137
scripts/rebuild_h5_db_v3.py
Executable file
137
scripts/rebuild_h5_db_v3.py
Executable file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Rebuild H5 metadata database - V3 (include expected positions from CSV)."""
|
||||
import os
|
||||
import re
|
||||
import csv
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
H5_ROOTS = ['/mnt/data_sdb1', '/mnt/kingston']
|
||||
CSV_PATH = '/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv'
|
||||
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
|
||||
|
||||
FILE_PATTERN = re.compile(r'b(\d+)')
|
||||
CHANNEL_PATTERN = re.compile(r'ch(\d+)')
|
||||
|
||||
SCHEMA = [
|
||||
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT, expected BOOLEAN)',
|
||||
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
|
||||
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
|
||||
]
|
||||
|
||||
def rebuild_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
for stmt in SCHEMA:
|
||||
cur.execute(stmt)
|
||||
|
||||
cur.execute('DELETE FROM files')
|
||||
cur.execute('DELETE FROM positions')
|
||||
|
||||
# 1. Charger les positions attendues depuis le CSV
|
||||
expected_nodes = set()
|
||||
try:
|
||||
with open(CSV_PATH, 'r', encoding='utf-8-sig') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
node_code = row.get('NodeCode', '').strip()
|
||||
if node_code and node_code.isdigit():
|
||||
expected_nodes.add(int(node_code))
|
||||
print(f"✓ Loaded {len(expected_nodes)} expected positions from CSV")
|
||||
except Exception as e:
|
||||
print(f"⚠ CSV not found or error: {e}")
|
||||
print(" Continuing with file scan only...")
|
||||
|
||||
# 2. Scanner les fichiers H5
|
||||
files_counter = 0
|
||||
found_nodes = {}
|
||||
|
||||
for root in H5_ROOTS:
|
||||
for dirpath, _, filenames in os.walk(root):
|
||||
for filename in filenames:
|
||||
if not filename.endswith('.h5'):
|
||||
continue
|
||||
|
||||
filepath = os.path.join(dirpath, filename)
|
||||
|
||||
node_match = FILE_PATTERN.search(filename)
|
||||
if not node_match:
|
||||
continue
|
||||
|
||||
node_code = int(node_match.group(1))
|
||||
channel_match = CHANNEL_PATTERN.search(filename)
|
||||
channel = int(channel_match.group(1)) if channel_match else -1
|
||||
dataset = 'aux' if 'aux' in filename else 'data'
|
||||
|
||||
stat = os.stat(filepath)
|
||||
mtime = int(stat.st_mtime)
|
||||
size = stat.st_size
|
||||
|
||||
found_nodes.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
|
||||
found_nodes[node_code]['count'] += 1
|
||||
found_nodes[node_code]['last'] = max(found_nodes[node_code]['last'], mtime)
|
||||
if dataset == 'data':
|
||||
found_nodes[node_code]['data'] = True
|
||||
else:
|
||||
found_nodes[node_code]['aux'] = True
|
||||
|
||||
cur.execute(
|
||||
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
|
||||
(filepath, node_code, channel, dataset, size, mtime)
|
||||
)
|
||||
files_counter += 1
|
||||
|
||||
print(f"✓ Indexed {files_counter} H5 files")
|
||||
print(f"✓ Found {len(found_nodes)} positions with data")
|
||||
|
||||
# 3. Créer les entrées pour TOUTES les positions (attendues + trouvées)
|
||||
all_nodes = expected_nodes | set(found_nodes.keys())
|
||||
|
||||
for node_code in all_nodes:
|
||||
is_expected = node_code in expected_nodes
|
||||
|
||||
if node_code in found_nodes:
|
||||
stats = found_nodes[node_code]
|
||||
has_data = 1 if stats['data'] else 0
|
||||
has_aux = 1 if stats['aux'] else 0
|
||||
last_seen = datetime.fromtimestamp(stats['last']).isoformat()
|
||||
sample_count = stats['count']
|
||||
else:
|
||||
# Position attendue mais sans données
|
||||
has_data = 0
|
||||
has_aux = 0
|
||||
last_seen = None
|
||||
sample_count = 0
|
||||
|
||||
cur.execute(
|
||||
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen, expected) VALUES (?, ?, ?, ?, ?, ?)',
|
||||
(node_code, has_data, has_aux, sample_count, last_seen, 1 if is_expected else 0)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
# Stats finales
|
||||
cur.execute('SELECT COUNT(*) FROM positions')
|
||||
total = cur.fetchone()[0]
|
||||
|
||||
cur.execute('SELECT COUNT(*) FROM positions WHERE has_data = 1')
|
||||
with_data = cur.fetchone()[0]
|
||||
|
||||
cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1')
|
||||
expected_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1 AND has_data = 0')
|
||||
missing = cur.fetchone()[0]
|
||||
|
||||
print(f"\n📊 Database Summary:")
|
||||
print(f" • Total positions in DB: {total}")
|
||||
print(f" • Expected (from CSV): {expected_count}")
|
||||
print(f" • With H5 data: {with_data}")
|
||||
print(f" • Missing (expected but no data): {missing}")
|
||||
print(f" • Coverage: {(with_data/expected_count*100 if expected_count else 0):.1f}%")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
rebuild_db()
|
||||
Reference in New Issue
Block a user