105 lines
3.9 KiB
Python
Executable File
105 lines
3.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Rebuild H5 metadata database - V2 (capture ALL patterns)."""
|
|
import os
|
|
import re
|
|
import sqlite3
|
|
from datetime import datetime
|
|
|
|
H5_ROOTS = ['/mnt/data_sdb1', '/mnt/kingston']
|
|
DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db'
|
|
|
|
# Pattern plus permissif - capture TOUS les b###
|
|
FILE_PATTERN = re.compile(r'b(\d+)')
|
|
CHANNEL_PATTERN = re.compile(r'ch(\d+)')
|
|
|
|
SCHEMA = [
|
|
'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT)',
|
|
'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))',
|
|
'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)'
|
|
]
|
|
|
|
def rebuild_db():
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cur = conn.cursor()
|
|
|
|
for stmt in SCHEMA:
|
|
cur.execute(stmt)
|
|
|
|
cur.execute('DELETE FROM files')
|
|
cur.execute('DELETE FROM positions')
|
|
|
|
files_counter = 0
|
|
summary = {}
|
|
|
|
for root in H5_ROOTS:
|
|
for dirpath, _, filenames in os.walk(root):
|
|
for filename in filenames:
|
|
if not filename.endswith('.h5'):
|
|
continue
|
|
|
|
filepath = os.path.join(dirpath, filename)
|
|
|
|
# Extraire node_code
|
|
node_match = FILE_PATTERN.search(filename)
|
|
if not node_match:
|
|
continue
|
|
|
|
node_code = int(node_match.group(1))
|
|
|
|
# Extraire channel (peut ne pas exister)
|
|
channel_match = CHANNEL_PATTERN.search(filename)
|
|
channel = int(channel_match.group(1)) if channel_match else -1
|
|
|
|
# Déterminer dataset (data vs aux)
|
|
dataset = 'aux' if 'aux' in filename else 'data'
|
|
|
|
stat = os.stat(filepath)
|
|
mtime = int(stat.st_mtime)
|
|
size = stat.st_size
|
|
|
|
# Mise à jour summary
|
|
summary.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0})
|
|
summary[node_code]['count'] += 1
|
|
summary[node_code]['last'] = max(summary[node_code]['last'], mtime)
|
|
if dataset == 'data':
|
|
summary[node_code]['data'] = True
|
|
else:
|
|
summary[node_code]['aux'] = True
|
|
|
|
# Insertion fichier
|
|
cur.execute(
|
|
'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)',
|
|
(filepath, node_code, channel, dataset, size, mtime)
|
|
)
|
|
files_counter += 1
|
|
|
|
print(f"✓ Indexed {files_counter} H5 files")
|
|
|
|
# Insertion positions
|
|
for node_code, stats in summary.items():
|
|
has_data = 1 if stats['data'] else 0
|
|
has_aux = 1 if stats['aux'] else 0
|
|
last_seen = datetime.fromtimestamp(stats['last']).isoformat()
|
|
cur.execute(
|
|
'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen) VALUES (?, ?, ?, ?, ?)',
|
|
(node_code, has_data, has_aux, stats['count'], last_seen)
|
|
)
|
|
|
|
conn.commit()
|
|
|
|
# Stats finales
|
|
total_positions = len(summary)
|
|
with_data = sum(1 for s in summary.values() if s['data'])
|
|
with_aux = sum(1 for s in summary.values() if s['aux'])
|
|
|
|
print(f"✓ Rebuilt DB: {total_positions} positions total")
|
|
print(f" • With data files: {with_data}")
|
|
print(f" • With aux files: {with_aux}")
|
|
print(f" • Both: {sum(1 for s in summary.values() if s['data'] and s['aux'])}")
|
|
print(f" • Coverage: {(with_data/205*100):.1f}% (assuming 205 planned)")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == '__main__':
|
|
rebuild_db()
|