seisee/h5_api_server.py

#!/usr/bin/env python3
"""
Flask API server for SeaKESP H5 data and node metadata
Provides both H5 data access and node/date endpoints
"""

from flask import Flask, jsonify, request, send_file
from flask_cors import CORS
import h5py
import numpy as np
import os
import json
from pathlib import Path
from datetime import datetime

app = Flask(__name__)
CORS(app)

# Paths
DATA_DIR = Path("/data")
H5_DIR = DATA_DIR / "h5"
DOCS_DIR = DATA_DIR / "docs"
INDEX_FILE = DATA_DIR / "index.json"

# Load node index
nodes_data = {}
dates_list = []

def load_index():
    """Load node data from index.json"""
    global nodes_data, dates_list

    if not INDEX_FILE.exists():
        print(f"Warning: {INDEX_FILE} not found")
        return

    try:
        with open(INDEX_FILE, 'r') as f:
            data = json.load(f)
            nodes_data = data.get('nodes', {})
            dates_list = data.get('dates', [])
            print(f"Loaded {len(nodes_data)} nodes and {len(dates_list)} dates from index.json")
    except Exception as e:
        print(f"Error loading index.json: {e}")

# Load index on startup
load_index()

# ============================================================================
# Node & Date Endpoints (replacing Node.js backend)
# ============================================================================

@app.route('/api/nodes', methods=['GET'])
def get_nodes():
    """Return list of all nodes with metadata"""
    nodes_list = []
    for node_id, node_info in nodes_data.items():
        nodes_list.append({
            'id': node_id,
            'position': node_info.get('position', {}),
            'file_count': len(node_info.get('files', []))
        })
    return jsonify(nodes_list)

@app.route('/api/dates', methods=['GET'])
def get_dates():
    """Return list of available dates"""
    return jsonify(dates_list)

@app.route('/api/migration-status', methods=['GET'])
def migration_status():
    """Return H5 migration status summary"""
    h5_count = len(list(H5_DIR.glob("*.h5"))) if H5_DIR.exists() else 0

    return jsonify({
        'total_h5_files': h5_count,
        'nodes_count': len(nodes_data),
        'dates_count': len(dates_list),
        'last_updated': datetime.now().isoformat()
    })

@app.route('/api/rms-timeline', methods=['GET'])
def rms_timeline():
    """Placeholder for RMS timeline (requires processing)"""
    return jsonify([])

@app.route('/api/global-history', methods=['GET'])
def global_history():
    """Placeholder for global history (requires processing)"""
    return jsonify([])

# ============================================================================
# H5 Data Endpoints
# ============================================================================

@app.route('/api/h5/files', methods=['GET'])
def list_h5_files():
    """List all H5 files with metadata"""
    if not H5_DIR.exists():
        return jsonify([])

    files = []
    for h5_file in sorted(H5_DIR.glob("*.h5")):
        try:
            with h5py.File(h5_file, 'r') as f:
                metadata = dict(f['metadata'].attrs)

                # Extract node ID and date from filename
                # Format: node_NODEID_YYYYMMDD.h5
                parts = h5_file.stem.split('_')
                node_id = parts[1] if len(parts) > 1 else "unknown"
                date_str = parts[2] if len(parts) > 2 else "unknown"

                files.append({
                    'filename': h5_file.name,
                    'path': str(h5_file),
                    'nodeId': node_id,
                    'date': date_str,
                    'duration_sec': float(metadata.get('duration_sec', 0)),
                    'sample_rate_hz': int(metadata.get('sample_rate_hz', 500)),
                    'n_channels': int(metadata.get('n_channels', 4)),
                    'n_samples': int(metadata.get('n_samples', 0))
                })
        except Exception as e:
            print(f"Error reading {h5_file}: {e}")

    return jsonify(files)

@app.route('/api/h5/data', methods=['GET'])
def get_h5_data():
    """Get waveform data from H5 file"""
    filename = request.args.get('file')
    channel = request.args.get('channel', 'channel_1')
    start = float(request.args.get('start', 0))
    duration = float(request.args.get('duration', 10))

    if not filename:
        return jsonify({'error': 'Missing file parameter'}), 400

    h5_path = DATA_DIR / filename

    if not h5_path.exists():
        return jsonify({'error': 'File not found'}), 404

    try:
        with h5py.File(h5_path, 'r') as f:
            # Get metadata
            sample_rate = int(f['metadata'].attrs['sample_rate_hz'])

            # Calculate sample range
            start_sample = int(start * sample_rate)
            n_samples = int(duration * sample_rate)
            end_sample = start_sample + n_samples

            # Read calibrated data
            data = f['calibrated_data'][channel][start_sample:end_sample]

            # Calculate statistics
            stats = {
                'mean': float(np.mean(data)),
                'std': float(np.std(data)),
                'min': float(np.min(data)),
                'max': float(np.max(data)),
                'rms': float(np.sqrt(np.mean(data**2)))
            }

            # Create time array
            time = np.arange(len(data)) / sample_rate + start

            return jsonify({
                'time': time.tolist(),
                'data': data.tolist(),
                'stats': stats,
                'sample_rate': sample_rate,
                'channel': channel,
                'start': start,
                'duration': duration,
                'n_samples': len(data)
            })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/h5/coverage', methods=['GET'])
def h5_coverage():
    """Get H5 coverage summary by node and date"""
    if not H5_DIR.exists():
        return jsonify([])

    coverage = {}

    for h5_file in H5_DIR.glob("*.h5"):
        try:
            parts = h5_file.stem.split('_')
            node_id = parts[1] if len(parts) > 1 else "unknown"
            date_str = parts[2] if len(parts) > 2 else "unknown"

            key = f"{node_id}_{date_str}"

            with h5py.File(h5_file, 'r') as f:
                duration = float(f['metadata'].attrs.get('duration_sec', 0))

                if key not in coverage:
                    coverage[key] = {
                        'nodeId': node_id,
                        'date': date_str,
                        'total_duration_hours': 0,
                        'file_count': 0
                    }

                coverage[key]['total_duration_hours'] += duration / 3600
                coverage[key]['file_count'] += 1

        except Exception as e:
            print(f"Error processing {h5_file}: {e}")

    return jsonify(list(coverage.values()))

@app.route('/api/h5/gaps', methods=['GET'])
def h5_gaps():
    """Identify gaps in H5 data coverage"""
    # TODO: Implement gap detection logic
    return jsonify([])

# ============================================================================
# Campaign Documentation Endpoints
# ============================================================================

@app.route('/api/docs/manifest', methods=['GET'])
def get_docs_manifest():
    """Get campaign documentation manifest"""
    manifest_path = DOCS_DIR / "campaign_manifest.json"

    if not manifest_path.exists():
        return jsonify({'error': 'Manifest not found'}), 404

    try:
        with open(manifest_path, 'r') as f:
            manifest = json.load(f)
        return jsonify(manifest)
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/docs/<filename>', methods=['GET'])
def download_doc(filename):
    """Download a campaign document"""
    doc_path = DOCS_DIR / filename

    if not doc_path.exists():
        return jsonify({'error': 'Document not found'}), 404

    return send_file(doc_path, as_attachment=True)

# ============================================================================
# Health Check
# ============================================================================

@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint"""
    return jsonify({
        'status': 'ok',
        'h5_files': len(list(H5_DIR.glob("*.h5"))) if H5_DIR.exists() else 0,
        'nodes_loaded': len(nodes_data),
        'dates_loaded': len(dates_list)
    })

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=3004, debug=False, threaded=True)

# === Additional routes for frontend compatibility ===

@app.route('/api/files', methods=['GET'])
def list_files_alias():
    """Alias for /api/h5/files — used by the SeiSee frontend"""
    return list_h5_files()

@app.route('/api/file/<path:filename>', methods=['GET'])
def get_file_info(filename):
    """Get detailed file info including datasets — needed for channel selector"""
    h5_path = DATA_DIR / filename
    if not h5_path.exists():
        return jsonify({'error': 'File not found'}), 404

    try:
        with h5py.File(h5_path, 'r') as f:
            metadata = dict(f['metadata'].attrs) if 'metadata' in f else {}
            calibration = dict(f['calibration'].attrs) if 'calibration' in f else {}

            datasets = []
            def collect_datasets(name, obj):
                if isinstance(obj, h5py.Dataset):
                    datasets.append({
                        'path': name,
                        'shape': list(obj.shape),
                        'dtype': str(obj.dtype),
                        'chunks': list(obj.chunks) if obj.chunks else None,
                        'compression': obj.compression
                    })
            f.visititems(collect_datasets)

            duration_sec = float(metadata.get('duration_sec', 0))
            sample_rate = int(metadata.get('sample_rate_hz', 500))
            n_channels = int(metadata.get('n_channels', 4))
            n_samples = int(metadata.get('n_samples', 0))

            # Human readable duration
            hours = int(duration_sec // 3600)
            mins = int((duration_sec % 3600) // 60)
            duration_human = f'{hours}h{mins:02d}' if hours else f'{mins}min'

            return jsonify({
                'filename': filename,
                'type': 'h5',
                'duration_sec': duration_sec,
                'duration_human': duration_human,
                'sample_rate_hz': sample_rate,
                'num_channels': n_channels,
                'samples_per_channel': n_samples,
                'datasets': datasets,
                'calibration': {k: float(v) if hasattr(v, '__float__') else str(v) for k, v in calibration.items()},
                'metadata': {k: float(v) if hasattr(v, '__float__') else str(v) for k, v in metadata.items()}
            })
    except Exception as e:
        return jsonify({'error': str(e)}), 500