#!/usr/bin/env python3 """ Flask API server for SeaKESP H5 data and node metadata Provides both H5 data access and node/date endpoints """ from flask import Flask, jsonify, request, send_file from flask_cors import CORS import h5py import numpy as np import os import json from pathlib import Path from datetime import datetime app = Flask(__name__) CORS(app) # Paths DATA_DIR = Path("/data") H5_DIR = DATA_DIR / "h5" DOCS_DIR = DATA_DIR / "docs" INDEX_FILE = DATA_DIR / "index.json" # Load node index nodes_data = {} dates_list = [] def load_index(): """Load node data from index.json""" global nodes_data, dates_list if not INDEX_FILE.exists(): print(f"Warning: {INDEX_FILE} not found") return try: with open(INDEX_FILE, 'r') as f: data = json.load(f) nodes_data = data.get('nodes', {}) dates_list = data.get('dates', []) print(f"Loaded {len(nodes_data)} nodes and {len(dates_list)} dates from index.json") except Exception as e: print(f"Error loading index.json: {e}") # Load index on startup load_index() # ============================================================================ # Node & Date Endpoints (replacing Node.js backend) # ============================================================================ @app.route('/api/nodes', methods=['GET']) def get_nodes(): """Return list of all nodes with metadata""" nodes_list = [] for node_id, node_info in nodes_data.items(): nodes_list.append({ 'id': node_id, 'position': node_info.get('position', {}), 'file_count': len(node_info.get('files', [])) }) return jsonify(nodes_list) @app.route('/api/dates', methods=['GET']) def get_dates(): """Return list of available dates""" return jsonify(dates_list) @app.route('/api/migration-status', methods=['GET']) def migration_status(): """Return H5 migration status summary""" h5_count = len(list(H5_DIR.glob("*.h5"))) if H5_DIR.exists() else 0 return jsonify({ 'total_h5_files': h5_count, 'nodes_count': len(nodes_data), 'dates_count': len(dates_list), 'last_updated': datetime.now().isoformat() }) @app.route('/api/rms-timeline', methods=['GET']) def rms_timeline(): """Placeholder for RMS timeline (requires processing)""" return jsonify([]) @app.route('/api/global-history', methods=['GET']) def global_history(): """Placeholder for global history (requires processing)""" return jsonify([]) # ============================================================================ # H5 Data Endpoints # ============================================================================ @app.route('/api/h5/files', methods=['GET']) def list_h5_files(): """List all H5 files with metadata""" if not H5_DIR.exists(): return jsonify([]) files = [] for h5_file in sorted(H5_DIR.glob("*.h5")): try: with h5py.File(h5_file, 'r') as f: metadata = dict(f['metadata'].attrs) # Extract node ID and date from filename # Format: node_NODEID_YYYYMMDD.h5 parts = h5_file.stem.split('_') node_id = parts[1] if len(parts) > 1 else "unknown" date_str = parts[2] if len(parts) > 2 else "unknown" files.append({ 'filename': h5_file.name, 'path': str(h5_file), 'nodeId': node_id, 'date': date_str, 'duration_sec': float(metadata.get('duration_sec', 0)), 'sample_rate_hz': int(metadata.get('sample_rate_hz', 500)), 'n_channels': int(metadata.get('n_channels', 4)), 'n_samples': int(metadata.get('n_samples', 0)) }) except Exception as e: print(f"Error reading {h5_file}: {e}") return jsonify(files) @app.route('/api/h5/data', methods=['GET']) def get_h5_data(): """Get waveform data from H5 file""" filename = request.args.get('file') channel = request.args.get('channel', 'channel_1') start = float(request.args.get('start', 0)) duration = float(request.args.get('duration', 10)) if not filename: return jsonify({'error': 'Missing file parameter'}), 400 h5_path = DATA_DIR / filename if not h5_path.exists(): return jsonify({'error': 'File not found'}), 404 try: with h5py.File(h5_path, 'r') as f: # Get metadata sample_rate = int(f['metadata'].attrs['sample_rate_hz']) # Calculate sample range start_sample = int(start * sample_rate) n_samples = int(duration * sample_rate) end_sample = start_sample + n_samples # Read calibrated data data = f['calibrated_data'][channel][start_sample:end_sample] # Calculate statistics stats = { 'mean': float(np.mean(data)), 'std': float(np.std(data)), 'min': float(np.min(data)), 'max': float(np.max(data)), 'rms': float(np.sqrt(np.mean(data**2))) } # Create time array time = np.arange(len(data)) / sample_rate + start return jsonify({ 'time': time.tolist(), 'data': data.tolist(), 'stats': stats, 'sample_rate': sample_rate, 'channel': channel, 'start': start, 'duration': duration, 'n_samples': len(data) }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/api/h5/coverage', methods=['GET']) def h5_coverage(): """Get H5 coverage summary by node and date""" if not H5_DIR.exists(): return jsonify([]) coverage = {} for h5_file in H5_DIR.glob("*.h5"): try: parts = h5_file.stem.split('_') node_id = parts[1] if len(parts) > 1 else "unknown" date_str = parts[2] if len(parts) > 2 else "unknown" key = f"{node_id}_{date_str}" with h5py.File(h5_file, 'r') as f: duration = float(f['metadata'].attrs.get('duration_sec', 0)) if key not in coverage: coverage[key] = { 'nodeId': node_id, 'date': date_str, 'total_duration_hours': 0, 'file_count': 0 } coverage[key]['total_duration_hours'] += duration / 3600 coverage[key]['file_count'] += 1 except Exception as e: print(f"Error processing {h5_file}: {e}") return jsonify(list(coverage.values())) @app.route('/api/h5/gaps', methods=['GET']) def h5_gaps(): """Identify gaps in H5 data coverage""" # TODO: Implement gap detection logic return jsonify([]) # ============================================================================ # Campaign Documentation Endpoints # ============================================================================ @app.route('/api/docs/manifest', methods=['GET']) def get_docs_manifest(): """Get campaign documentation manifest""" manifest_path = DOCS_DIR / "campaign_manifest.json" if not manifest_path.exists(): return jsonify({'error': 'Manifest not found'}), 404 try: with open(manifest_path, 'r') as f: manifest = json.load(f) return jsonify(manifest) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/api/docs/', methods=['GET']) def download_doc(filename): """Download a campaign document""" doc_path = DOCS_DIR / filename if not doc_path.exists(): return jsonify({'error': 'Document not found'}), 404 return send_file(doc_path, as_attachment=True) # ============================================================================ # Health Check # ============================================================================ @app.route('/health', methods=['GET']) def health(): """Health check endpoint""" return jsonify({ 'status': 'ok', 'h5_files': len(list(H5_DIR.glob("*.h5"))) if H5_DIR.exists() else 0, 'nodes_loaded': len(nodes_data), 'dates_loaded': len(dates_list) }) if __name__ == '__main__': app.run(host='0.0.0.0', port=3004, debug=False, threaded=True) # === Additional routes for frontend compatibility === @app.route('/api/files', methods=['GET']) def list_files_alias(): """Alias for /api/h5/files — used by the SeiSee frontend""" return list_h5_files() @app.route('/api/file/', methods=['GET']) def get_file_info(filename): """Get detailed file info including datasets — needed for channel selector""" h5_path = DATA_DIR / filename if not h5_path.exists(): return jsonify({'error': 'File not found'}), 404 try: with h5py.File(h5_path, 'r') as f: metadata = dict(f['metadata'].attrs) if 'metadata' in f else {} calibration = dict(f['calibration'].attrs) if 'calibration' in f else {} datasets = [] def collect_datasets(name, obj): if isinstance(obj, h5py.Dataset): datasets.append({ 'path': name, 'shape': list(obj.shape), 'dtype': str(obj.dtype), 'chunks': list(obj.chunks) if obj.chunks else None, 'compression': obj.compression }) f.visititems(collect_datasets) duration_sec = float(metadata.get('duration_sec', 0)) sample_rate = int(metadata.get('sample_rate_hz', 500)) n_channels = int(metadata.get('n_channels', 4)) n_samples = int(metadata.get('n_samples', 0)) # Human readable duration hours = int(duration_sec // 3600) mins = int((duration_sec % 3600) // 60) duration_human = f'{hours}h{mins:02d}' if hours else f'{mins}min' return jsonify({ 'filename': filename, 'type': 'h5', 'duration_sec': duration_sec, 'duration_human': duration_human, 'sample_rate_hz': sample_rate, 'num_channels': n_channels, 'samples_per_channel': n_samples, 'datasets': datasets, 'calibration': {k: float(v) if hasattr(v, '__float__') else str(v) for k, v in calibration.items()}, 'metadata': {k: float(v) if hasattr(v, '__float__') else str(v) for k, v in metadata.items()} }) except Exception as e: return jsonify({'error': str(e)}), 500