#!/usr/bin/env python3 """Rebuild H5 metadata database - V3 (include expected positions from CSV).""" import os import re import csv import sqlite3 from datetime import datetime H5_ROOTS = ['/mnt/data_sdb1', '/mnt/kingston'] CSV_PATH = '/mnt/kingston/Copie de SETE_AUV_DARFV4-Copier(1).csv' DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db' FILE_PATTERN = re.compile(r'b(\d+)') CHANNEL_PATTERN = re.compile(r'ch(\d+)') SCHEMA = [ 'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT, expected BOOLEAN)', 'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))', 'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)' ] def rebuild_db(): conn = sqlite3.connect(DB_PATH) cur = conn.cursor() for stmt in SCHEMA: cur.execute(stmt) cur.execute('DELETE FROM files') cur.execute('DELETE FROM positions') # 1. Charger les positions attendues depuis le CSV expected_nodes = set() try: with open(CSV_PATH, 'r', encoding='utf-8-sig') as f: reader = csv.DictReader(f) for row in reader: node_code = row.get('NodeCode', '').strip() if node_code and node_code.isdigit(): expected_nodes.add(int(node_code)) print(f"✓ Loaded {len(expected_nodes)} expected positions from CSV") except Exception as e: print(f"⚠ CSV not found or error: {e}") print(" Continuing with file scan only...") # 2. Scanner les fichiers H5 files_counter = 0 found_nodes = {} for root in H5_ROOTS: for dirpath, _, filenames in os.walk(root): for filename in filenames: if not filename.endswith('.h5'): continue filepath = os.path.join(dirpath, filename) node_match = FILE_PATTERN.search(filename) if not node_match: continue node_code = int(node_match.group(1)) channel_match = CHANNEL_PATTERN.search(filename) channel = int(channel_match.group(1)) if channel_match else -1 dataset = 'aux' if 'aux' in filename else 'data' stat = os.stat(filepath) mtime = int(stat.st_mtime) size = stat.st_size found_nodes.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0}) found_nodes[node_code]['count'] += 1 found_nodes[node_code]['last'] = max(found_nodes[node_code]['last'], mtime) if dataset == 'data': found_nodes[node_code]['data'] = True else: found_nodes[node_code]['aux'] = True cur.execute( 'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)', (filepath, node_code, channel, dataset, size, mtime) ) files_counter += 1 print(f"✓ Indexed {files_counter} H5 files") print(f"✓ Found {len(found_nodes)} positions with data") # 3. Créer les entrées pour TOUTES les positions (attendues + trouvées) all_nodes = expected_nodes | set(found_nodes.keys()) for node_code in all_nodes: is_expected = node_code in expected_nodes if node_code in found_nodes: stats = found_nodes[node_code] has_data = 1 if stats['data'] else 0 has_aux = 1 if stats['aux'] else 0 last_seen = datetime.fromtimestamp(stats['last']).isoformat() sample_count = stats['count'] else: # Position attendue mais sans données has_data = 0 has_aux = 0 last_seen = None sample_count = 0 cur.execute( 'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen, expected) VALUES (?, ?, ?, ?, ?, ?)', (node_code, has_data, has_aux, sample_count, last_seen, 1 if is_expected else 0) ) conn.commit() # Stats finales cur.execute('SELECT COUNT(*) FROM positions') total = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM positions WHERE has_data = 1') with_data = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1') expected_count = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM positions WHERE expected = 1 AND has_data = 0') missing = cur.fetchone()[0] print(f"\n📊 Database Summary:") print(f" • Total positions in DB: {total}") print(f" • Expected (from CSV): {expected_count}") print(f" • With H5 data: {with_data}") print(f" • Missing (expected but no data): {missing}") print(f" • Coverage: {(with_data/expected_count*100 if expected_count else 0):.1f}%") conn.close() if __name__ == '__main__': rebuild_db()