Files
seisee/h5_api_server.py

326 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Flask API server for SeaKESP H5 data and node metadata
Provides both H5 data access and node/date endpoints
"""
from flask import Flask, jsonify, request, send_file
from flask_cors import CORS
import h5py
import numpy as np
import os
import json
from pathlib import Path
from datetime import datetime
app = Flask(__name__)
CORS(app)
# Paths
DATA_DIR = Path("/data")
H5_DIR = DATA_DIR / "h5"
DOCS_DIR = DATA_DIR / "docs"
INDEX_FILE = DATA_DIR / "index.json"
# Load node index
nodes_data = {}
dates_list = []
def load_index():
"""Load node data from index.json"""
global nodes_data, dates_list
if not INDEX_FILE.exists():
print(f"Warning: {INDEX_FILE} not found")
return
try:
with open(INDEX_FILE, 'r') as f:
data = json.load(f)
nodes_data = data.get('nodes', {})
dates_list = data.get('dates', [])
print(f"Loaded {len(nodes_data)} nodes and {len(dates_list)} dates from index.json")
except Exception as e:
print(f"Error loading index.json: {e}")
# Load index on startup
load_index()
# ============================================================================
# Node & Date Endpoints (replacing Node.js backend)
# ============================================================================
@app.route('/api/nodes', methods=['GET'])
def get_nodes():
"""Return list of all nodes with metadata"""
nodes_list = []
for node_id, node_info in nodes_data.items():
nodes_list.append({
'id': node_id,
'position': node_info.get('position', {}),
'file_count': len(node_info.get('files', []))
})
return jsonify(nodes_list)
@app.route('/api/dates', methods=['GET'])
def get_dates():
"""Return list of available dates"""
return jsonify(dates_list)
@app.route('/api/migration-status', methods=['GET'])
def migration_status():
"""Return H5 migration status summary"""
h5_count = len(list(H5_DIR.glob("*.h5"))) if H5_DIR.exists() else 0
return jsonify({
'total_h5_files': h5_count,
'nodes_count': len(nodes_data),
'dates_count': len(dates_list),
'last_updated': datetime.now().isoformat()
})
@app.route('/api/rms-timeline', methods=['GET'])
def rms_timeline():
"""Placeholder for RMS timeline (requires processing)"""
return jsonify([])
@app.route('/api/global-history', methods=['GET'])
def global_history():
"""Placeholder for global history (requires processing)"""
return jsonify([])
# ============================================================================
# H5 Data Endpoints
# ============================================================================
@app.route('/api/h5/files', methods=['GET'])
def list_h5_files():
"""List all H5 files with metadata"""
if not H5_DIR.exists():
return jsonify([])
files = []
for h5_file in sorted(H5_DIR.glob("*.h5")):
try:
with h5py.File(h5_file, 'r') as f:
metadata = dict(f['metadata'].attrs)
# Extract node ID and date from filename
# Format: node_NODEID_YYYYMMDD.h5
parts = h5_file.stem.split('_')
node_id = parts[1] if len(parts) > 1 else "unknown"
date_str = parts[2] if len(parts) > 2 else "unknown"
files.append({
'filename': h5_file.name,
'path': str(h5_file),
'nodeId': node_id,
'date': date_str,
'duration_sec': float(metadata.get('duration_sec', 0)),
'sample_rate_hz': int(metadata.get('sample_rate_hz', 500)),
'n_channels': int(metadata.get('n_channels', 4)),
'n_samples': int(metadata.get('n_samples', 0))
})
except Exception as e:
print(f"Error reading {h5_file}: {e}")
return jsonify(files)
@app.route('/api/h5/data', methods=['GET'])
def get_h5_data():
"""Get waveform data from H5 file"""
filename = request.args.get('file')
channel = request.args.get('channel', 'channel_1')
start = float(request.args.get('start', 0))
duration = float(request.args.get('duration', 10))
if not filename:
return jsonify({'error': 'Missing file parameter'}), 400
h5_path = DATA_DIR / filename
if not h5_path.exists():
return jsonify({'error': 'File not found'}), 404
try:
with h5py.File(h5_path, 'r') as f:
# Get metadata
sample_rate = int(f['metadata'].attrs['sample_rate_hz'])
# Calculate sample range
start_sample = int(start * sample_rate)
n_samples = int(duration * sample_rate)
end_sample = start_sample + n_samples
# Read calibrated data
data = f['calibrated_data'][channel][start_sample:end_sample]
# Calculate statistics
stats = {
'mean': float(np.mean(data)),
'std': float(np.std(data)),
'min': float(np.min(data)),
'max': float(np.max(data)),
'rms': float(np.sqrt(np.mean(data**2)))
}
# Create time array
time = np.arange(len(data)) / sample_rate + start
return jsonify({
'time': time.tolist(),
'data': data.tolist(),
'stats': stats,
'sample_rate': sample_rate,
'channel': channel,
'start': start,
'duration': duration,
'n_samples': len(data)
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/h5/coverage', methods=['GET'])
def h5_coverage():
"""Get H5 coverage summary by node and date"""
if not H5_DIR.exists():
return jsonify([])
coverage = {}
for h5_file in H5_DIR.glob("*.h5"):
try:
parts = h5_file.stem.split('_')
node_id = parts[1] if len(parts) > 1 else "unknown"
date_str = parts[2] if len(parts) > 2 else "unknown"
key = f"{node_id}_{date_str}"
with h5py.File(h5_file, 'r') as f:
duration = float(f['metadata'].attrs.get('duration_sec', 0))
if key not in coverage:
coverage[key] = {
'nodeId': node_id,
'date': date_str,
'total_duration_hours': 0,
'file_count': 0
}
coverage[key]['total_duration_hours'] += duration / 3600
coverage[key]['file_count'] += 1
except Exception as e:
print(f"Error processing {h5_file}: {e}")
return jsonify(list(coverage.values()))
@app.route('/api/h5/gaps', methods=['GET'])
def h5_gaps():
"""Identify gaps in H5 data coverage"""
# TODO: Implement gap detection logic
return jsonify([])
# ============================================================================
# Campaign Documentation Endpoints
# ============================================================================
@app.route('/api/docs/manifest', methods=['GET'])
def get_docs_manifest():
"""Get campaign documentation manifest"""
manifest_path = DOCS_DIR / "campaign_manifest.json"
if not manifest_path.exists():
return jsonify({'error': 'Manifest not found'}), 404
try:
with open(manifest_path, 'r') as f:
manifest = json.load(f)
return jsonify(manifest)
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/docs/<filename>', methods=['GET'])
def download_doc(filename):
"""Download a campaign document"""
doc_path = DOCS_DIR / filename
if not doc_path.exists():
return jsonify({'error': 'Document not found'}), 404
return send_file(doc_path, as_attachment=True)
# ============================================================================
# Health Check
# ============================================================================
@app.route('/health', methods=['GET'])
def health():
"""Health check endpoint"""
return jsonify({
'status': 'ok',
'h5_files': len(list(H5_DIR.glob("*.h5"))) if H5_DIR.exists() else 0,
'nodes_loaded': len(nodes_data),
'dates_loaded': len(dates_list)
})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=3004, debug=False, threaded=True)
# === Additional routes for frontend compatibility ===
@app.route('/api/files', methods=['GET'])
def list_files_alias():
"""Alias for /api/h5/files — used by the SeiSee frontend"""
return list_h5_files()
@app.route('/api/file/<path:filename>', methods=['GET'])
def get_file_info(filename):
"""Get detailed file info including datasets — needed for channel selector"""
h5_path = DATA_DIR / filename
if not h5_path.exists():
return jsonify({'error': 'File not found'}), 404
try:
with h5py.File(h5_path, 'r') as f:
metadata = dict(f['metadata'].attrs) if 'metadata' in f else {}
calibration = dict(f['calibration'].attrs) if 'calibration' in f else {}
datasets = []
def collect_datasets(name, obj):
if isinstance(obj, h5py.Dataset):
datasets.append({
'path': name,
'shape': list(obj.shape),
'dtype': str(obj.dtype),
'chunks': list(obj.chunks) if obj.chunks else None,
'compression': obj.compression
})
f.visititems(collect_datasets)
duration_sec = float(metadata.get('duration_sec', 0))
sample_rate = int(metadata.get('sample_rate_hz', 500))
n_channels = int(metadata.get('n_channels', 4))
n_samples = int(metadata.get('n_samples', 0))
# Human readable duration
hours = int(duration_sec // 3600)
mins = int((duration_sec % 3600) // 60)
duration_human = f'{hours}h{mins:02d}' if hours else f'{mins}min'
return jsonify({
'filename': filename,
'type': 'h5',
'duration_sec': duration_sec,
'duration_human': duration_human,
'sample_rate_hz': sample_rate,
'num_channels': n_channels,
'samples_per_channel': n_samples,
'datasets': datasets,
'calibration': {k: float(v) if hasattr(v, '__float__') else str(v) for k, v in calibration.items()},
'metadata': {k: float(v) if hasattr(v, '__float__') else str(v) for k, v in metadata.items()}
})
except Exception as e:
return jsonify({'error': str(e)}), 500