#!/usr/bin/env python3 """Rebuild H5 metadata database for the seismic viewer.""" import os import re import sqlite3 from datetime import datetime H5_ROOTS = [ '/mnt/data_sdb1', '/mnt/kingston' ] DB_PATH = '/home/floppyrj45/docker/seismic-nodes-viewer/h5_data.db' FILE_PATTERN = re.compile(r'b(\d+)_.*_ch(\d+)') SCHEMA = [ 'CREATE TABLE IF NOT EXISTS positions (node_code INTEGER PRIMARY KEY, has_data BOOLEAN, has_aux BOOLEAN, sample_count INTEGER, last_seen TEXT)', 'CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT, node_code INTEGER, channel INTEGER, dataset TEXT, size INTEGER, mtime INTEGER, FOREIGN KEY(node_code) REFERENCES positions(node_code))', 'CREATE INDEX IF NOT EXISTS idx_files_node ON files(node_code)' ] def rebuild_db(): conn = sqlite3.connect(DB_PATH) cur = conn.cursor() for stmt in SCHEMA: cur.execute(stmt) cur.execute('DELETE FROM files') cur.execute('DELETE FROM positions') files_counter = 0 summary = {} for root in H5_ROOTS: for dirpath, _, filenames in os.walk(root): for filename in filenames: if not filename.endswith('.h5'): continue filepath = os.path.join(dirpath, filename) match = FILE_PATTERN.search(filename) if not match: continue node_code = int(match.group(1)) channel = int(match.group(2)) dataset = 'aux' if 'aux' in filename else 'data' stat = os.stat(filepath) mtime = int(stat.st_mtime) size = stat.st_size summary.setdefault(node_code, {'data': False, 'aux': False, 'count': 0, 'last': 0}) summary[node_code]['count'] += 1 summary[node_code]['last'] = max(summary[node_code]['last'], mtime) if dataset == 'data': summary[node_code]['data'] = True else: summary[node_code]['aux'] = True cur.execute( 'INSERT INTO files (path, node_code, channel, dataset, size, mtime) VALUES (?, ?, ?, ?, ?, ?)', (filepath, node_code, channel, dataset, size, mtime) ) files_counter += 1 print(f"Indexed {files_counter} H5 files") for node_code, stats in summary.items(): has_data = 1 if stats['data'] else 0 has_aux = 1 if stats['aux'] else 0 last_seen = datetime.utcfromtimestamp(stats['last']).isoformat() cur.execute( 'INSERT INTO positions (node_code, has_data, has_aux, sample_count, last_seen) VALUES (?, ?, ?, ?, ?)', (node_code, has_data, has_aux, stats['count'], last_seen) ) conn.commit() conn.close() print(f"Rebuilt DB at {DB_PATH} with {len(summary)} positions") if __name__ == '__main__': rebuild_db()