seisee/app.py.final

from flask import Flask, jsonify, send_from_directory, request, send_file, abort
from flask_cors import CORS
import os
import re
import h5py
import numpy as np
from datetime import datetime, timedelta
import base64
import struct
import traceback
import json
import io
import mimetypes
from pyproj import Transformer
import shutil
import threading
import uuid
import time
import zipfile
import csv as csv_mod

app = Flask(__name__, static_folder='static', static_url_path='')
CORS(app)

DATA_DIR = os.environ.get('DATA_DIR', '/data')
LOCATIONS_FILE = '/home/floppyrj45/seismic-viewer/locations.json'
DARF_FILE = os.path.join(os.path.dirname(__file__), 'static', 'darf_nodes.json')
TEMP_DIR = '/tmp/extracts'

if not os.path.exists(TEMP_DIR):
    os.makedirs(TEMP_DIR)

# Global task store
TASKS = {}

# ========== UTILITY FUNCTIONS ==========

def cleanup_old_tasks():
    """Remove tasks older than 1 hour."""
    now = time.time()
    to_delete = []
    for tid, task in TASKS.items():
        if now - task['created_at'] > 3600:
            to_delete.append(tid)
            # Remove file if exists
            if 'file_path' in task and os.path.exists(task['file_path']):
                try:
                    os.remove(task['file_path'])
                except:
                    pass
    for tid in to_delete:
        del TASKS[tid]

def load_darf_nodes():
    """Load DARF node metadata from JSON file."""
    try:
        with open(DARF_FILE, 'r') as f:
            return json.load(f)
    except Exception:
        return {}

def extract_board_id(filename):
    """Extract board_id from filename pattern: auto_XXX_HHMMSS_b{BOARD_ID}_rsnXXX_..."""
    m = re.search(r'_b(\d+)_', filename)
    return m.group(1) if m else None

def load_locations():
    """Load locations from JSON file."""
    if os.path.exists(LOCATIONS_FILE):
        try:
            with open(LOCATIONS_FILE, 'r') as f:
                return json.load(f)
        except:
            return {}
    return {}

def save_locations(locations):
    """Save locations to JSON file."""
    with open(LOCATIONS_FILE, 'w') as f:
        json.dump(locations, f, indent=2)

def get_h5_metadata(filepath):
    """Extract comprehensive metadata from H5 file."""
    metadata = {}
    try:
        with h5py.File(filepath, 'r') as f:
            # File-level attributes
            for k, v in f.attrs.items():
                try:
                    metadata[k] = v.item() if hasattr(v, 'item') else str(v)
                except:
                    metadata[k] = str(v)

            # Metadata group attributes
            if 'metadata' in f:
                for k, v in f['metadata'].attrs.items():
                    try:
                        metadata[k] = v.item() if hasattr(v, 'item') else str(v)
                    except:
                        metadata[k] = str(v)

            # Calibration group attributes
            if 'calibration' in f:
                calibration = {}
                for k, v in f['calibration'].attrs.items():
                    try:
                        calibration[k] = v.item() if hasattr(v, 'item') else str(v)
                    except:
                        calibration[k] = str(v)
                metadata['calibration'] = calibration

            # Channel information
            channels = []
            for group_name in ['calibrated_data', 'raw_data']:
                if group_name in f:
                    for ds_name in f[group_name].keys():
                        ds = f[group_name][ds_name]
                        ch_info = {
                            'name': f"{group_name}/{ds_name}",
                            'shape': list(ds.shape),
                            'dtype': str(ds.dtype),
                            'samples': int(ds.shape[0]) if len(ds.shape) > 0 else 0
                        }
                        for k, v in ds.attrs.items():
                            try:
                                ch_info[k] = v.item() if hasattr(v, 'item') else str(v)
                            except:
                                ch_info[k] = str(v)
                        channels.append(ch_info)
            metadata['channels'] = channels

            # Compute timestamps
            if 'creation_date' in metadata:
                try:
                    start_time = datetime.fromisoformat(metadata['creation_date'].replace('Z', '+00:00'))
                    metadata['start_timestamp'] = start_time.isoformat()

                    if 'duration_sec' in metadata:
                        duration_sec = float(metadata['duration_sec'])
                        end_time = start_time + timedelta(seconds=duration_sec)
                        metadata['end_timestamp'] = end_time.isoformat()

                        # Human-readable duration
                        hours = int(duration_sec // 3600)
                        minutes = int((duration_sec % 3600) // 60)
                        seconds = int(duration_sec % 60)
                        metadata['duration_human'] = f"{hours}h {minutes}m {seconds}s"
                except Exception as e:
                    metadata['timestamp_error'] = str(e)

            return metadata
    except Exception as e:
        return {'error': str(e)}

# ========== ROUTES ==========

@app.route('/')
def index():
    return send_from_directory('static', 'index.html')

@app.route('/api/files')
def list_files():
    try:
        files = []
        for fn in os.listdir(DATA_DIR):
            if fn.endswith(('.segy', '.sgy', '.h5')):
                fp = os.path.join(DATA_DIR, fn)
                stat = os.stat(fp)
                ftype = 'h5' if fn.endswith('.h5') else 'segy'
                files.append({
                    'name': fn, 'size': stat.st_size,
                    'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
                    'type': ftype
                })
        files.sort(key=lambda x: x['name'])
        return jsonify({'files': files, 'count': len(files)})
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/file/<filename>/info')
def file_info(filename):
    try:
        fp = os.path.join(DATA_DIR, filename)
        if not os.path.exists(fp):
            return jsonify({'error': 'Not found'}), 404
        stat = os.stat(fp)
        info = {
            'name': filename,
            'size': stat.st_size,
            'size_mb': round(stat.st_size / 1048576, 2),
            'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
            'type': 'h5' if filename.endswith('.h5') else 'segy',
        }
        if filename.endswith('.h5'):
            with h5py.File(fp, 'r') as f:
                datasets = []
                def visitor(name, obj):
                    if isinstance(obj, h5py.Dataset):
                        datasets.append({
                            'path': name,
                            'shape': list(obj.shape),
                            'dtype': str(obj.dtype),
                            'samples': int(obj.shape[0]) if len(obj.shape) > 0 else 0
                        })
                f.visititems(visitor)
                info['datasets'] = datasets
                info['num_datasets'] = len(datasets)
                cal = [d for d in datasets if 'calibrated' in d['path']]
                raw = [d for d in datasets if 'raw' in d['path']]
                info['calibrated_channels'] = len(cal)
                info['raw_channels'] = len(raw)
                if cal:
                    info['samples_per_channel'] = cal[0]['samples']

                # Add sample rate and duration for sidebar display
                if 'metadata' in f:
                    sample_rate = f['metadata'].attrs.get('sample_rate_hz', None)
                    if sample_rate and cal:
                        total_samples = cal[0]['samples']
                        duration_sec = float(total_samples) / float(sample_rate)
                        hours = int(duration_sec // 3600)
                        minutes = int((duration_sec % 3600) // 60)
                        info['duration_human'] = f"{hours}h{minutes:02d}m"
                        info['sample_rate_hz'] = int(sample_rate)
                        info['num_channels'] = int(len(cal))
        elif filename.endswith(('.segy', '.sgy')):
            try:
                from obspy import read as obspy_read
                st = obspy_read(fp, headonly=True)
                info['num_traces'] = len(st)
                if len(st) > 0:
                    info['samples_per_trace'] = st[0].stats.npts
                    info['sample_rate'] = st[0].stats.sampling_rate
                    info['delta'] = st[0].stats.delta
            except Exception as e:
                info['segy_error'] = str(e)
        return jsonify(info)
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/file/<filename>/metadata')
def file_metadata(filename):
    """Return detailed metadata for a file."""
    try:
        fp = os.path.join(DATA_DIR, filename)
        if not os.path.exists(fp):
            return jsonify({'error': 'Not found'}), 404

        stat = os.stat(fp)
        result = {
            'filename': filename,
            'file_size_bytes': stat.st_size,
            'file_size_mb': round(stat.st_size / 1048576, 2),
            'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
        }

        if filename.endswith('.h5'):
            metadata = get_h5_metadata(fp)
            result.update(metadata)
        elif filename.endswith(('.segy', '.sgy')):
            try:
                from obspy import read as obspy_read
                st = obspy_read(fp, headonly=True)
                result['num_traces'] = len(st)
                if len(st) > 0:
                    result['sample_rate'] = st[0].stats.sampling_rate
                    result['samples_per_trace'] = st[0].stats.npts
                    result['start_timestamp'] = st[0].stats.starttime.isoformat()
                    result['end_timestamp'] = st[0].stats.endtime.isoformat()
                    duration_sec = (st[0].stats.endtime - st[0].stats.starttime)
                    hours = int(duration_sec // 3600)
                    minutes = int((duration_sec % 3600) // 60)
                    seconds = int(duration_sec % 60)
                    result['duration_human'] = f"{hours}h {minutes}m {seconds}s"
                    result['duration_sec'] = duration_sec
            except Exception as e:
                result['segy_error'] = str(e)

        return jsonify(result)
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/locations', methods=['GET'])
def get_locations():
    """Get all file locations."""
    try:
        locations = load_locations()
        return jsonify(locations)
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/locations', methods=['POST'])
def update_location():
    """Update location for a file."""
    try:
        data = request.json
        filename = data.get('filename')
        lat = data.get('lat')
        lon = data.get('lon')

        if not all([filename, lat is not None, lon is not None]):
            return jsonify({'error': 'Missing filename, lat, or lon'}), 400

        locations = load_locations()
        locations[filename] = {
            'lat': float(lat),
            'lon': float(lon),
            'updated': datetime.now().isoformat()
        }
        save_locations(locations)

        return jsonify({'success': True, 'locations': locations})
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/file/<filename>/extract')
def extract_segment(filename):
    """Extract a time segment from a file and return as CSV or H5."""
    try:
        fp = os.path.join(DATA_DIR, filename)
        if not os.path.exists(fp):
            return jsonify({'error': 'Not found'}), 404

        # Parameters
        start_sec = float(request.args.get('start_sec', 0))
        duration_sec = float(request.args.get('duration_sec', 1200))  # Default 20 min
        channel = request.args.get('channel', 'calibrated_data/channel_1')
        format_type = request.args.get('format', 'csv')  # csv or h5

        if not filename.endswith('.h5'):
            return jsonify({'error': 'Extract only supported for H5 files'}), 400

        with h5py.File(fp, 'r') as f:
            if channel not in f:
                return jsonify({'error': f'Channel {channel} not found'}), 404

            # Get metadata
            sample_rate = f['metadata'].attrs.get('sample_rate_hz', 500)
            total_samples = f[channel].shape[0]

            # Calculate sample range
            start_sample = int(start_sec * sample_rate)
            end_sample = int((start_sec + duration_sec) * sample_rate)

            # Clamp to valid range
            start_sample = max(0, min(start_sample, total_samples - 1))
            end_sample = max(start_sample + 1, min(end_sample, total_samples))

            # Extract data
            data = f[channel][start_sample:end_sample]

            if format_type == 'csv':
                # Generate CSV
                output = io.StringIO()
                output.write(f"# Channel: {channel}\n")
                output.write(f"# Start: {start_sec}s, Duration: {duration_sec}s\n")
                output.write(f"# Sample rate: {sample_rate} Hz\n")
                output.write(f"# Samples: {len(data)}\n")
                output.write("sample,time_sec,value\n")

                for i, val in enumerate(data):
                    time_s = (start_sample + i) / sample_rate
                    output.write(f"{start_sample + i},{time_s:.6f},{val}\n")

                output.seek(0)
                return send_file(
                    io.BytesIO(output.getvalue().encode('utf-8')),
                    mimetype='text/csv',
                    as_attachment=True,
                    download_name=f'{filename.replace(".h5", "")}_extract_{int(start_sec)}_{int(duration_sec)}s.csv'
                )

            elif format_type == 'h5':
                # Generate H5
                output = io.BytesIO()
                with h5py.File(output, 'w') as out_f:
                    # Copy metadata
                    if 'metadata' in f:
                        meta_group = out_f.create_group('metadata')
                        for k, v in f['metadata'].attrs.items():
                            meta_group.attrs[k] = v
                        meta_group.attrs['extract_start_sec'] = start_sec
                        meta_group.attrs['extract_duration_sec'] = duration_sec
                        meta_group.attrs['extract_start_sample'] = start_sample
                        meta_group.attrs['extract_end_sample'] = end_sample

                    # Create dataset
                    ds = out_f.create_dataset('data', data=data, compression='gzip')
                    ds.attrs['channel'] = channel
                    ds.attrs['sample_rate_hz'] = sample_rate

                    # Copy channel attributes
                    if channel in f:
                        for k, v in f[channel].attrs.items():
                            ds.attrs[k] = v

                output.seek(0)
                return send_file(
                    output,
                    mimetype='application/x-hdf5',
                    as_attachment=True,
                    download_name=f'{filename.replace(".h5", "")}_extract_{int(start_sec)}_{int(duration_sec)}s.h5'
                )

            else:
                return jsonify({'error': 'Invalid format. Use csv or h5'}), 400

    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/file/<filename>/section')
def file_section(filename):
    """Return 2D section data as base64 float32 array."""
    try:
        fp = os.path.join(DATA_DIR, filename)
        if not os.path.exists(fp):
            return jsonify({'error': 'Not found'}), 404

        trace_start = int(request.args.get('trace_start', 0))
        trace_count = int(request.args.get('trace_count', 200))
        samples_per_trace = min(int(request.args.get('samples_per_trace', 1000)), 50000)
        channel = request.args.get('channel', 'calibrated_data/channel_1')
        max_samples = int(request.args.get('max_samples', 500))

        if filename.endswith('.h5'):
            with h5py.File(fp, 'r') as f:
                if channel not in f:
                    avail = []
                    f.visititems(lambda n, o: avail.append(n) if isinstance(o, h5py.Dataset) else None)
                    return jsonify({'error': f'Channel not found', 'available': avail}), 404

                data = f[channel]
                total_samples = data.shape[0]
                total_traces = total_samples // samples_per_trace

                trace_start = max(0, min(trace_start, max(0, total_traces - 1)))
                trace_end = min(trace_start + trace_count, total_traces)
                actual_count = trace_end - trace_start

                if actual_count <= 0:
                    return jsonify({'error': 'No traces in range'}), 400

                start_sample = trace_start * samples_per_trace
                end_sample = trace_end * samples_per_trace
                raw = data[start_sample:end_sample]

                section = raw.reshape(actual_count, samples_per_trace).astype(np.float32)

                if samples_per_trace > max_samples:
                    factor = samples_per_trace // max_samples
                    trimmed = section[:, :factor * max_samples]
                    section = trimmed.reshape(actual_count, max_samples, factor).mean(axis=2).astype(np.float32)
                    actual_spt = max_samples
                else:
                    actual_spt = samples_per_trace

                vmin = float(np.min(section))
                vmax = float(np.max(section))
                encoded = base64.b64encode(section.tobytes()).decode('ascii')

                return jsonify({
                    'num_traces': int(actual_count),
                    'samples_per_trace': actual_spt,
                    'trace_start': trace_start,
                    'total_traces': total_traces,
                    'total_samples': int(total_samples),
                    'vmin': vmin, 'vmax': vmax,
                    'data_b64': encoded,
                    'dtype': 'float32'
                })

        elif filename.endswith(('.segy', '.sgy')):
            try:
                from obspy import read as obspy_read
                st = obspy_read(fp)
                total_traces = len(st)

                trace_start = max(0, min(trace_start, total_traces - 1))
                trace_end = min(trace_start + trace_count, total_traces)
                actual_count = trace_end - trace_start

                if actual_count <= 0:
                    return jsonify({'error': 'No traces'}), 400

                spt = st[0].stats.npts
                actual_spt = spt

                section = np.zeros((actual_count, spt), dtype=np.float32)
                for i in range(actual_count):
                    tr = st[trace_start + i]
                    n = min(spt, tr.stats.npts)
                    section[i, :n] = tr.data[:n].astype(np.float32)

                if spt > max_samples:
                    factor = spt // max_samples
                    trimmed = section[:, :factor * max_samples]
                    section = trimmed.reshape(actual_count, max_samples, factor).mean(axis=2).astype(np.float32)
                    actual_spt = max_samples

                vmin = float(np.min(section))
                vmax = float(np.max(section))
                encoded = base64.b64encode(section.tobytes()).decode('ascii')

                return jsonify({
                    'num_traces': int(actual_count),
                    'samples_per_trace': actual_spt,
                    'trace_start': trace_start,
                    'total_traces': total_traces,
                    'total_samples': int(total_traces * spt),
                    'vmin': vmin, 'vmax': vmax,
                    'data_b64': encoded,
                    'dtype': 'float32'
                })
            except Exception as e:
                return jsonify({'error': f'SEGY read error: {e}'}), 500

        return jsonify({'error': 'Unsupported format'}), 400
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/file/<filename>/headers')
def file_headers(filename):
    """Return trace headers for SEGY or dataset attributes for H5."""
    try:
        fp = os.path.join(DATA_DIR, filename)
        if not os.path.exists(fp):
            return jsonify({'error': 'Not found'}), 404

        if filename.endswith('.h5'):
            with h5py.File(fp, 'r') as f:
                attrs = {}
                for k, v in f.attrs.items():
                    try:
                        attrs[k] = v.item() if hasattr(v, 'item') else str(v)
                    except:
                        attrs[k] = str(v)

                datasets = []
                def visitor(name, obj):
                    if isinstance(obj, h5py.Dataset):
                        ds_info = {
                            'path': name,
                            'shape': list(obj.shape),
                            'dtype': str(obj.dtype),
                            'size': int(np.prod(obj.shape)),
                        }
                        for ak, av in obj.attrs.items():
                            try:
                                ds_info[f'attr:{ak}'] = av.item() if hasattr(av, 'item') else str(av)
                            except:
                                ds_info[f'attr:{ak}'] = str(av)
                        datasets.append(ds_info)
                f.visititems(visitor)

                groups = []
                def gvisitor(name, obj):
                    if isinstance(obj, h5py.Group):
                        g_info = {'path': name}
                        for ak, av in obj.attrs.items():
                            try:
                                g_info[f'attr:{ak}'] = av.item() if hasattr(av, 'item') else str(av)
                            except:
                                g_info[f'attr:{ak}'] = str(av)
                        if len(g_info) > 1:
                            groups.append(g_info)
                f.visititems(gvisitor)

                return jsonify({
                    'type': 'h5',
                    'file_attrs': attrs,
                    'datasets': datasets,
                    'groups': groups
                })

        elif filename.endswith(('.segy', '.sgy')):
            try:
                from obspy import read as obspy_read
                from obspy.io.segy.segy import _read_segy

                segy = _read_segy(fp)

                bfh = {}
                if hasattr(segy, 'binary_file_header'):
                    h = segy.binary_file_header
                    for attr in ['job_identification_number', 'line_number', 'reel_number',
                                 'number_of_data_traces_per_ensemble', 'number_of_auxiliary_traces_per_ensemble',
                                 'sample_interval_in_microseconds', 'number_of_samples_per_data_trace',
                                 'data_sample_format_code']:
                        if hasattr(h, attr):
                            bfh[attr] = getattr(h, attr)

                trace_headers = []
                max_traces = min(50, len(segy.traces))
                for i in range(max_traces):
                    th = segy.traces[i].header
                    trace_headers.append({
                        'trace': i,
                        'inline': getattr(th, 'trace_sequence_number_within_line', None),
                        'crossline': getattr(th, 'trace_sequence_number_within_segy_file', None),
                        'field_record': getattr(th, 'original_field_record_number', None),
                        'trace_number': getattr(th, 'trace_number_within_the_original_field_record', None),
                        'cdp': getattr(th, 'ensemble_number', None),
                        'trace_in_ensemble': getattr(th, 'trace_number_within_the_ensemble', None),
                        'offset': getattr(th, 'distance_from_center_of_the_source_point_to_the_center_of_the_receiver_group', None),
                        'source_x': getattr(th, 'source_coordinate_x', None),
                        'source_y': getattr(th, 'source_coordinate_y', None),
                        'group_x': getattr(th, 'group_coordinate_x', None),
                        'group_y': getattr(th, 'group_coordinate_y', None),
                        'num_samples': getattr(th, 'number_of_samples_in_this_trace', None),
                        'sample_interval': getattr(th, 'sample_interval_in_ms_for_this_trace', None),
                    })

                return jsonify({
                    'type': 'segy',
                    'binary_header': bfh,
                    'trace_headers': trace_headers,
                    'total_traces': len(segy.traces)
                })
            except Exception as e:
                return jsonify({'error': f'SEGY header read error: {e}'}), 500

        return jsonify({'error': 'Unsupported'}), 400
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/file/<filename>/waveform')
def file_waveform(filename):
    try:
        fp = os.path.join(DATA_DIR, filename)
        if not os.path.exists(fp):
            return jsonify({'error': 'Not found'}), 404
        max_pts = int(request.args.get('points', 5000))
        channels_param = request.args.get('channels', '')
        channel = request.args.get('channel', 'calibrated_data/channel_1')
        start_sec = request.args.get('start', None)
        duration_sec = request.args.get('duration', None)
        if filename.endswith('.h5'):
            with h5py.File(fp, 'r') as f:
                # Determine sample rate from file metadata
                sample_rate = 500
                if 'sample_rate' in f.attrs:
                    sample_rate = float(f.attrs['sample_rate'])
                elif 'acquisition' in f and 'sample_rate' in f['acquisition'].attrs:
                    sample_rate = float(f['acquisition'].attrs['sample_rate'])

                # Multi-channel mode
                if channels_param:
                    ch_list = [c.strip() for c in channels_param.split(',') if c.strip()]
                else:
                    ch_list = [channel]

                # Validate all channels exist
                for ch in ch_list:
                    if ch not in f:
                        return jsonify({'error': f'Channel not found: {ch}'}), 404

                total_samples = f[ch_list[0]].shape[0]

                # Compute sample window from start/duration
                if start_sec is not None:
                    s0 = max(0, int(float(start_sec) * sample_rate))
                else:
                    s0 = 0
                if duration_sec is not None and duration_sec != 'all':
                    try:
                        s1 = min(total_samples, s0 + int(float(duration_sec) * sample_rate))
                    except ValueError:
                        s1 = total_samples
                else:
                    s1 = total_samples

                result = {
                    'name': filename,
                    'sample_rate': int(sample_rate),
                    'total_samples': total_samples,
                    'channels': {}
                }

                for ch in ch_list:
                    dataset = f[ch]
                    n = s1 - s0
                    # For large datasets, read only the samples we need (sparse read)
                    if n > max_pts:
                        step = n // max_pts
                        indices = list(range(s0, s1, step))[:max_pts]
                        # Read only needed indices from HDF5 (much faster than loading all)
                        values = [float(dataset[i]) for i in indices]
                        times = [i / sample_rate for i in indices]
                        # Stats from the downsampled values
                        vmin = min(values)
                        vmax = max(values)
                        vmean = sum(values) / len(values)
                        vstd = (sum((v - vmean)**2 for v in values) / len(values)) ** 0.5
                    else:
                        data = dataset[s0:s1]
                        values = data.tolist()
                        times = [(s0 + i) / sample_rate for i in range(n)]
                        vmin = float(np.min(data))
                        vmax = float(np.max(data))
                        vmean = float(np.mean(data))
                        vstd = float(np.std(data))
                    result['channels'][ch] = {
                        'channel': ch,
                        'displayed_points': len(values),
                        'min': vmin,
                        'max': vmax,
                        'mean': vmean,
                        'std': vstd,
                        'x': times,
                        'y': values
                    }

                # Backward compat: if single channel, also put x/y at top level
                if len(ch_list) == 1:
                    ch_data = result['channels'][ch_list[0]]
                    result['channel'] = ch_list[0]
                    result['displayed_points'] = ch_data['displayed_points']
                    result['min'] = ch_data['min']
                    result['max'] = ch_data['max']
                    result['mean'] = ch_data['mean']
                    result['std'] = ch_data['std']
                    result['x'] = ch_data['x']
                    result['y'] = ch_data['y']

                return jsonify(result)
        return jsonify({'error': 'Waveform only for H5'}), 400
    except Exception as e:
        return jsonify({'error': str(e)}), 500


# ========== GEOSUP ENDPOINTS ==========

# UTM Zone 31N to WGS84 transformer
utm_to_wgs84 = Transformer.from_crs("EPSG:32631", "EPSG:4326", always_xy=True)

GEOSUP_DIR = os.path.join(app.static_folder, '')  # Static folder

def parse_sps_line(line):
    """Parse SPS S01/R01 format line. Fixed-width format."""
    if len(line) < 65:
        return None
    try:
        record_type = line[0].strip()  # S or R
        line_num = line[1:11].strip()
        point = line[11:21].strip()
        index = line[21:23].strip()
        # Easting and Northing are in columns ~26-52
        coord_part = line[46:66].strip()
        # Parse XXXXXX.XNNNNNNN.NN format
        match = re.match(r'(\d{5,6}\.\d{1,2})(\d{7}\.\d{1,2})', coord_part)
        if match:
            easting = float(match.group(1))
            northing = float(match.group(2))
        else:
            parts = coord_part.split('.')
            if len(parts) >= 2:
                easting_int = parts[0]
                rest = '.'.join(parts[1:])
                if len(rest) > 9:
                    easting = float(easting_int + '.' + rest[:2])
                    northing = float(rest[2:])
                else:
                    return None
            else:
                return None

        depth = line[66:72].strip() if len(line) > 52 else '0'
        depth = float(depth) if depth else 0

        return {
            'type': record_type,
            'line': line_num,
            'point': point,
            'index': index,
            'easting': easting,
            'northing': northing,
            'depth': depth
        }
    except Exception:
        return None

def parse_deployment_csv(filepath):
    """Parse deployment CSV file (SETE-0965P1002.csv format)."""
    records = []
    try:
        with open(filepath, 'r') as f:
            for line in f:
                parts = line.strip().split(',')
                if len(parts) >= 8:
                    try:
                        record = {
                            'line': parts[0],
                            'point': parts[1],
                            'index': parts[2],
                            'name': parts[3],
                            'easting': float(parts[5]),
                            'northing': float(parts[6]),
                            'depth': float(parts[7]),
                        }
                        if len(parts) >= 16:
                            record['year'] = int(parts[10])
                            record['day'] = int(parts[11])
                            record['hour'] = int(parts[12])
                            record['minute'] = int(parts[13])
                            record['second'] = int(parts[14])
                        records.append(record)
                    except (ValueError, IndexError):
                        continue
    except Exception:
        pass
    return records

@app.route('/api/geosup/positions')
def geosup_positions():
    """Return GeoJSON with source, receiver, and deployment positions."""
    try:
        features = []

        # Parse sources (S01)
        s01_path = os.path.join(GEOSUP_DIR, 'SeteSxPreplots.s01')
        if os.path.exists(s01_path):
            with open(s01_path, 'r') as f:
                for line in f:
                    record = parse_sps_line(line)
                    if record and record['type'] == 'S':
                        lon, lat = utm_to_wgs84.transform(record['easting'], record['northing'])
                        features.append({
                            'type': 'Feature',
                            'geometry': {'type': 'Point', 'coordinates': [lon, lat]},
                            'properties': {
                                'category': 'source',
                                'line': record['line'],
                                'point': record['point'],
                                'easting': record['easting'],
                                'northing': record['northing'],
                                'depth': record['depth']
                            }
                        })

        # Parse receivers (R01)
        r01_path = os.path.join(GEOSUP_DIR, 'SeteRxPreplots.r01')
        if os.path.exists(r01_path):
            with open(r01_path, 'r') as f:
                for line in f:
                    record = parse_sps_line(line)
                    if record and record['type'] == 'R':
                        lon, lat = utm_to_wgs84.transform(record['easting'], record['northing'])
                        features.append({
                            'type': 'Feature',
                            'geometry': {'type': 'Point', 'coordinates': [lon, lat]},
                            'properties': {
                                'category': 'receiver',
                                'line': record['line'],
                                'point': record['point'],
                                'easting': record['easting'],
                                'northing': record['northing'],
                                'depth': record['depth']
                            }
                        })

        # Parse deployments (CSV)
        csv_path = os.path.join(GEOSUP_DIR, 'SETE-0965P1002.csv')
        if os.path.exists(csv_path):
            deployments = parse_deployment_csv(csv_path)
            for d in deployments:
                lon, lat = utm_to_wgs84.transform(d['easting'], d['northing'])
                props = {
                    'category': 'deployment',
                    'line': d['line'],
                    'point': d['point'],
                    'name': d['name'],
                    'easting': d['easting'],
                    'northing': d['northing'],
                    'depth': d['depth']
                }
                if 'year' in d:
                    props['timestamp'] = f"{d['year']}-{d['day']:03d} {d['hour']:02d}:{d['minute']:02d}:{d['second']:02d}"
                features.append({
                    'type': 'Feature',
                    'geometry': {'type': 'Point', 'coordinates': [lon, lat]},
                    'properties': props
                })

        sources = len([f for f in features if f['properties']['category'] == 'source'])
        receivers = len([f for f in features if f['properties']['category'] == 'receiver'])
        deployments = len([f for f in features if f['properties']['category'] == 'deployment'])

        return jsonify({
            'type': 'FeatureCollection',
            'features': features,
            'counts': {'sources': sources, 'receivers': receivers, 'deployments': deployments, 'total': len(features)}
        })
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/geosup/documents')
def geosup_documents():
    """List all geosup documents in static folder."""
    try:
        documents = []
        doc_extensions = ('.pdf', '.docx', '.xlsx', '.doc', '.xls')

        for fn in os.listdir(GEOSUP_DIR):
            if fn.lower().endswith(doc_extensions):
                fp = os.path.join(GEOSUP_DIR, fn)
                stat = os.stat(fp)
                ext = os.path.splitext(fn)[1].lower()
                doc_type = {'.pdf': 'PDF', '.docx': 'Word', '.doc': 'Word', '.xlsx': 'Excel', '.xls': 'Excel'}.get(ext, 'Document')
                icon = {'.pdf': '📕', '.docx': '📘', '.doc': '📘', '.xlsx': '📗', '.xls': '📗'}.get(ext, '📄')
                is_gundalf = 'GUNDALF' in fn.upper()
                is_params = 'Acquisition_Parameters' in fn or 'SpiceRack' in fn

                documents.append({
                    'name': fn,
                    'size': stat.st_size,
                    'size_human': f"{stat.st_size / 1024:.1f} KB" if stat.st_size < 1048576 else f"{stat.st_size / 1048576:.1f} MB",
                    'type': doc_type,
                    'icon': icon,
                    'is_gundalf': is_gundalf,
                    'is_params': is_params,
                    'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()
                })

        documents.sort(key=lambda d: (not d['is_gundalf'], not d['is_params'], d['name']))
        return jsonify({'documents': documents, 'count': len(documents)})
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/geosup/documents/<path:filename>')
def geosup_download(filename):
    """Download a geosup document."""
    try:
        safe_name = os.path.basename(filename)
        filepath = os.path.join(GEOSUP_DIR, safe_name)
        if not os.path.exists(filepath):
            return jsonify({'error': 'File not found'}), 404
        mime_type, _ = mimetypes.guess_type(filepath)
        return send_file(filepath, mimetype=mime_type or 'application/octet-stream', as_attachment=True, download_name=safe_name)
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/geosup/acquisition-params')
def geosup_acquisition_params():
    """Extract key parameters from the acquisition spreadsheet."""
    try:
        xlsx_path = os.path.join(GEOSUP_DIR, 'SBGS_Sete_SpiceRack_Acquisition_Parameters_Spreadsheet.xlsx')
        if not os.path.exists(xlsx_path):
            return jsonify({'error': 'Acquisition parameters file not found'}), 404
        from openpyxl import load_workbook
        wb = load_workbook(xlsx_path, data_only=True)
        params = {'sheets': [], 'summary': {}}
        for sheet_name in wb.sheetnames:
            ws = wb[sheet_name]
            sheet_data = {'name': sheet_name, 'rows': []}
            row_count = 0
            for row in ws.iter_rows(max_row=30, values_only=True):
                if any(cell is not None for cell in row):
                    sheet_data['rows'].append([str(cell) if cell is not None else '' for cell in row[:10]])
                    row_count += 1
                    if row_count >= 20:
                        break
            params['sheets'].append(sheet_data)
        wb.close()
        return jsonify(params)
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/pipeline-status')
def pipeline_status():
    """Return pipeline statistics and disk usage."""
    try:
        raw_dir = '/mnt/usb/Recordings'
        output_dir = '/mnt/usb/H5-Output'

        # Count files
        raw_count = 0
        if os.path.exists(raw_dir):
            raw_count = len([n for n in os.listdir(raw_dir) if n.lower().endswith(('.raw', '.manta'))])

        segy_count = 0
        h5_count = 0
        if os.path.exists(output_dir):
            files = os.listdir(output_dir)
            segy_count = len([n for n in files if n.lower().endswith(('.segy', '.sgy'))])
            h5_count = len([n for n in files if n.lower().endswith('.h5')])

        # Disk usage
        total, used, free = shutil.disk_usage(output_dir)

        # Conversion progress
        progress = None
        if os.path.exists('/tmp/segy2h5_progress.json'):
            try:
                with open('/tmp/segy2h5_progress.json', 'r') as f:
                    progress = json.load(f)
            except:
                pass

        return jsonify({
            'counts': {
                'raw': raw_count,
                'segy': segy_count,
                'h5': h5_count,
                'total_expected': 345  # Hardcoded target from context
            },
            'disk': {
                'total_gb': round(total / (1024**3), 2),
                'used_gb': round(used / (1024**3), 2),
                'free_gb': round(free / (1024**3), 2),
                'percent': round((used / total) * 100, 1)
            },
            'progress': progress
        })
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

# ========== GATHER ENDPOINT ==========


@app.route('/api/gather_info', methods=['POST'])
def gather_info():
    """Return time window info with overlap clustering for a set of files."""
    try:
        body = request.get_json() or {}
        files_param = body.get('files', '')
        filenames = [f.strip() for f in files_param.split(',') if f.strip()]

        import re as _re
        from datetime import datetime
        windows = []
        for fn in filenames:
            fp = os.path.join(DATA_DIR, fn)
            if not os.path.exists(fp) or not fn.endswith('.h5'):
                continue
            m = _re.search(r'_(\d{10})', fn)
            ts = int(m.group(1)) if m else 0
            if ts == 0:
                continue
            try:
                with h5py.File(fp, 'r') as hf:
                    ch = 'calibrated_data/channel_1'
                    if ch not in hf:
                        continue
                    sr = 500
                    if 'metadata' in hf:
                        sr = int(hf['metadata'].attrs.get('sample_rate_hz', 500))
                    n = hf[ch].shape[0]
                    dur = n / sr
                    windows.append({'file': fn, 'start_ts': ts, 'end_ts': ts + int(dur), 'duration': round(dur, 1)})
            except:
                continue

        if not windows:
            return jsonify({'error': 'No valid files'}), 400

        windows.sort(key=lambda w: w['start_ts'])

        # Cluster into overlapping groups
        groups = []
        for w in windows:
            placed = False
            for g in groups:
                g_start = min(x['start_ts'] for x in g)
                g_end = max(x['end_ts'] for x in g)
                if w['start_ts'] < g_end and w['end_ts'] > g_start:
                    g.append(w)
                    placed = True
                    break
            if not placed:
                groups.append([w])

        groups.sort(key=lambda g: len(g), reverse=True)

        group_summaries = []
        for i, g in enumerate(groups):
            g_start = min(x['start_ts'] for x in g)
            g_end = max(x['end_ts'] for x in g)
            latest_start = max(x['start_ts'] for x in g)
            earliest_end = min(x['end_ts'] for x in g)
            union_duration = g_end - g_start
            group_summaries.append({
                'index': i,
                'count': len(g),
                'files': [x['file'] for x in g],
                'start_ts': g_start,
                'end_ts': g_end,
                'overlap_start': g_start,
                'overlap_duration': union_duration,
                'union_duration': union_duration,
                'overlap_start_utc': datetime.utcfromtimestamp(g_start).strftime('%Y-%m-%d %H:%M:%S'),
                'date': datetime.utcfromtimestamp(g_start).strftime('%Y-%m-%d')
            })

        best = group_summaries[0] if group_summaries else None

        return jsonify({
            'total_files': len(windows),
            'groups': group_summaries,
            'best_group': best,
            'num_groups': len(groups)
        })
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/gather', methods=['GET', 'POST'])
def gather():
    """Return multi-file trace data for gather view."""
    try:
        if request.method == 'POST' and request.is_json:
            body = request.get_json()
            files_param = body.get('files', '')
            channel = body.get('channel', 'calibrated_data/channel_1')
            start = float(body.get('start', 0))
            duration = float(body.get('duration', 60))
            points = int(body.get('points', 2000))
            sync = str(body.get('sync', 'true')).lower() == 'true'
        else:
            files_param = request.args.get('files', '')
            channel = request.args.get('channel', 'calibrated_data/channel_1')
            start = float(request.args.get('start', 0))
            duration = float(request.args.get('duration', 60))
            points = int(request.args.get('points', 2000))
            sync = request.args.get('sync', 'true').lower() == 'true'

        filenames = [f.strip() for f in files_param.split(',') if f.strip()]
        if not filenames:
            return jsonify({'error': 'No files specified'}), 400

        import re as _re
        file_timestamps = {}
        for fn in filenames:
            m = _re.search(r'_(\d{10})', fn)
            file_timestamps[fn] = int(m.group(1)) if m else 0

        if sync:
            valid_ts = [file_timestamps[fn] for fn in filenames if file_timestamps[fn] > 0]
            min_ts = min(valid_ts) if valid_ts else 0
            abs_time_param = None
            if request.method == 'POST' and request.is_json:
                abs_time_param = body.get('abs_time', None)
            else:
                abs_time_param = request.args.get('abs_time', None)

            if abs_time_param is not None:
                abs_start = float(abs_time_param)
            else:
                abs_start = min_ts + start

        traces = []
        time_axis = None
        sample_rate = None
        max_dur = 0

        for fn in filenames:
            fp = os.path.join(DATA_DIR, fn)
            if not os.path.exists(fp) or not fn.endswith('.h5'):
                continue

            board_id = extract_board_id(fn)

            try:
                with h5py.File(fp, 'r') as f:
                    if channel not in f:
                        continue

                    sr = 500
                    if 'metadata' in f:
                        sr = int(f['metadata'].attrs.get('sample_rate_hz', 500))

                    if sample_rate is None:
                        sample_rate = sr

                    total_samples = f[channel].shape[0]
                    file_duration = total_samples / sr

                    if sync and file_timestamps[fn] > 0:
                        file_ts = file_timestamps[fn]
                        local_start = abs_start - file_ts
                        local_end = local_start + duration
                        max_dur = max(max_dur, file_duration + (file_ts - min_ts))
                    else:
                        local_start = start
                        local_end = start + duration
                        max_dur = max(max_dur, file_duration)

                    start_sample = max(0, int(local_start * sr))
                    end_sample = min(int(local_end * sr), total_samples)

                    if end_sample <= start_sample:
                        trace_info = {
                            'filename': fn,
                            'board_id': board_id,
                            'y': [],
                            'min': 0, 'max': 0,
                            'start_ts': file_timestamps.get(fn, 0),
                            'empty': True
                        }
                        traces.append(trace_info)
                        continue

                    data = f[channel][start_sample:end_sample]
                    n = len(data)

                    if n > points:
                        step = n // points
                        indices = list(range(0, n, step))[:points]
                        values = [float(data[i]) for i in indices]
                    else:
                        values = [float(v) for v in data]
                        indices = list(range(n))

                    trace_info = {
                        'filename': fn,
                        'board_id': board_id,
                        'y': values,
                        'min': float(np.min(data)),
                        'max': float(np.max(data)),
                        'start_ts': file_timestamps.get(fn, 0)
                    }
                    traces.append(trace_info)

                    if time_axis is None:
                        time_axis = [local_start + i / sr for i in indices]
                        if sync:
                            time_axis = [(i / sr) for i in indices]

            except Exception as e:
                traceback.print_exc()
                continue

        actual_sr = sample_rate or 500
        requested_samples = int(duration * actual_sr)
        resp = {
            'traces': traces,
            'sample_rate': actual_sr,
            'time_axis': time_axis or [],
            'file_duration': round(max_dur, 1),
            'sync': sync,
            'decimation': {
                'display_points': points,
                'original_samples': requested_samples,
                'ratio': max(1, requested_samples // max(1, points)),
                'is_decimated': requested_samples > points
            }
        }
        if sync and file_timestamps:
            min_ts = min(t for t in file_timestamps.values() if t > 0) if any(t > 0 for t in file_timestamps.values()) else 0
            resp['epoch_start'] = min_ts
            resp['window_start'] = min_ts + start if min_ts else start
            from datetime import datetime
            if min_ts:
                resp['date_start'] = datetime.utcfromtimestamp(min_ts).strftime('%Y-%m-%d %H:%M:%S UTC')

        return jsonify(resp)
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

# ========== BACKGROUND EXTRACT LOGIC ==========

def background_extract(task_id, filenames, start, duration, channels_opt, include_nodes, include_shots, include_metadata):
    """Worker function for async extraction."""
    try:
        TASKS[task_id]['status'] = 'processing'
        TASKS[task_id]['total_files'] = len(filenames)

        # Load resources
        darf = load_darf_nodes()
        shot_times = []
        if include_shots:
            try:
                shot_path = os.path.join(app.static_folder, 'shot_times.json')
                if os.path.exists(shot_path):
                    with open(shot_path) as sf:
                        shot_times = json.load(sf)
            except:
                pass

        import re as _re
        file_timestamps = {}
        for fn in filenames:
            m = _re.search(r'_(\d{10})', fn)
            file_timestamps[fn] = int(m.group(1)) if m else 0

        valid_ts = [t for t in file_timestamps.values() if t > 0]
        min_ts = min(valid_ts) if valid_ts else 0
        abs_start = min_ts + start
        abs_end = abs_start + duration

        export_name = 'gather_extract_s%d_d%d.zip' % (int(start), int(duration))
        temp_zip_path = os.path.join(TEMP_DIR, f"{task_id}.zip")

        with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
            metadata = {
                'export_time': datetime.utcnow().isoformat() + 'Z',
                'start_offset_s': start,
                'duration_s': duration,
                'abs_start_epoch': abs_start,
                'abs_end_epoch': abs_end,
                'sample_rate_hz': 500,
                'files': [],
                'decimation': 'none - full resolution export'
            }

            ch_map = {
                'all8': ['calibrated_data/channel_1','calibrated_data/channel_2','calibrated_data/channel_3','calibrated_data/channel_4',
                         'raw_data/channel_1','raw_data/channel_2','raw_data/channel_3','raw_data/channel_4'],
                'all_cal': ['calibrated_data/channel_1','calibrated_data/channel_2','calibrated_data/channel_3','calibrated_data/channel_4'],
                'all_raw': ['raw_data/channel_1','raw_data/channel_2','raw_data/channel_3','raw_data/channel_4'],
                'cal_1': ['calibrated_data/channel_1'], 'cal_2': ['calibrated_data/channel_2'],
                'cal_3': ['calibrated_data/channel_3'], 'cal_4': ['calibrated_data/channel_4'],
                'raw_1': ['raw_data/channel_1'], 'raw_2': ['raw_data/channel_2'],
                'raw_3': ['raw_data/channel_3'], 'raw_4': ['raw_data/channel_4'],
            }
            all_channels = ch_map.get(channels_opt, ch_map['all8'])

            processed_count = 0
            for fn in filenames:
                processed_count += 1
                TASKS[task_id]['current_file'] = fn
                TASKS[task_id]['progress'] = int((processed_count / len(filenames)) * 95)
                TASKS[task_id]['processed_files'] = processed_count

                fp = os.path.join(DATA_DIR, fn)
                if not os.path.exists(fp) or not fn.endswith('.h5'):
                    continue

                board_id = extract_board_id(fn)
                node_info = darf.get(str(board_id), {}) if board_id else {}
                file_ts = file_timestamps.get(fn, 0)

                if file_ts > 0:
                    local_start = abs_start - file_ts
                    local_end = abs_end - file_ts
                else:
                    local_start = start
                    local_end = start + duration

                try:
                    with h5py.File(fp, 'r') as hf:
                        sr = 500
                        if 'metadata' in hf:
                            sr = int(hf['metadata'].attrs.get('sample_rate_hz', 500))

                        total_samples = hf['calibrated_data/channel_1'].shape[0]
                        s0 = max(0, int(local_start * sr))
                        s1 = min(total_samples, int(local_end * sr))

                        if s1 <= s0:
                            continue

                        n_samples = s1 - s0

                        channel_data = {}
                        available_channels = []
                        for ch in all_channels:
                            if ch in hf:
                                channel_data[ch] = hf[ch][s0:s1]
                                available_channels.append(ch)

                        if not channel_data:
                            continue

                        csv_name = fn.replace('.h5', '') + '_extract.csv'
                        csv_buf = io.StringIO()
                        writer = csv_mod.writer(csv_buf)

                        header = ['sample_index', 'time_s', 'time_utc']
                        for ch in available_channels:
                            short = ch.replace('calibrated_data/channel_', 'cal_ch').replace('raw_data/channel_', 'raw_ch')
                            header.append(short)
                        writer.writerow(header)

                        from datetime import timezone
                        for i in range(n_samples):
                            sample_idx = s0 + i
                            time_s = round(sample_idx / sr, 4)
                            if file_ts > 0:
                                utc_ts = file_ts + time_s
                                time_utc = datetime.fromtimestamp(utc_ts, tz=timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.') + str(int((utc_ts % 1) * 1000)).zfill(3)
                            else:
                                time_utc = ''

                            row = [sample_idx, time_s, time_utc]
                            for ch in available_channels:
                                row.append(channel_data[ch][i])
                            writer.writerow(row)

                        zf.writestr(csv_name, csv_buf.getvalue())

                        file_meta = {
                            'filename': fn,
                            'board_id': board_id,
                            'line': node_info.get('line'),
                            'point': node_info.get('point'),
                            'start_epoch': file_ts,
                            'sample_range': [s0, s1],
                            'n_samples': n_samples,
                            'duration_s': round(n_samples / sr, 2),
                            'channels': available_channels
                        }
                        metadata['files'].append(file_meta)

                except Exception:
                    traceback.print_exc()
                    continue

            # Matching shots
            if shot_times:
                day_start_h = abs_start % 86400
                day_end_h = abs_end % 86400
                matching_shots = []
                for shot in shot_times:
                    shot_tod = shot['time_s']
                    if day_start_h <= day_end_h:
                        if shot_tod >= day_start_h and shot_tod <= day_end_h:
                            matching_shots.append(shot)
                    else:
                        if shot_tod >= day_start_h or shot_tod <= day_end_h:
                            matching_shots.append(shot)
                if matching_shots:
                    shot_buf = io.StringIO()
                    writer = csv_mod.writer(shot_buf)
                    writer.writerow(['line', 'time_of_day_s', 'time_str', 'easting', 'northing'])
                    for s in matching_shots:
                        writer.writerow([s.get('line',''), s.get('time_s',''), s.get('time_str',''), s.get('easting',''), s.get('northing','')])
                    zf.writestr('shots.csv', shot_buf.getvalue())
                    metadata['matching_shots'] = len(matching_shots)

            # Node positions
            if include_nodes and darf:
                node_buf = io.StringIO()
                nw = csv_mod.writer(node_buf)
                nw.writerow(['board_id','line','point','preplot_easting','preplot_northing','preplot_lat','preplot_lon',
                             'aslaid_easting','aslaid_northing','aslaid_lat','aslaid_lon','depth'])
                exported_bids = set()
                for fm in metadata['files']:
                    if fm.get('board_id'):
                        exported_bids.add(str(fm['board_id']))
                for bid, node in sorted(darf.items(), key=lambda x: (x[1].get('line',0), x[1].get('point',0))):
                    if bid in exported_bids:
                        nw.writerow([
                            bid, node.get('line',''), node.get('point',''),
                            node.get('preplot_e',''), node.get('preplot_n',''),
                            node.get('preplot_lat',''), node.get('preplot_lon',''),
                            node.get('aslaid_e',''), node.get('aslaid_n',''),
                            node.get('aslaid_lat',''), node.get('aslaid_lon',''),
                            node.get('depth','')
                        ])
                zf.writestr('node_positions.csv', node_buf.getvalue())

            if include_metadata:
                zf.writestr('metadata.json', json.dumps(metadata, indent=2, default=str))

        TASKS[task_id]['progress'] = 100
        TASKS[task_id]['status'] = 'completed'
        TASKS[task_id]['file_path'] = temp_zip_path
        TASKS[task_id]['file_name'] = export_name

    except Exception as e:
        TASKS[task_id]['status'] = 'error'
        TASKS[task_id]['error'] = str(e)
        traceback.print_exc()

@app.route('/api/gather_extract', methods=['POST'])
def gather_extract():
    """Start async extraction job."""
    try:
        cleanup_old_tasks()

        body = request.get_json() or {}
        files_param = body.get('files', '')
        start = float(body.get('start', 0))
        duration = float(body.get('duration', 60))
        filenames = [f.strip() for f in files_param.split(',') if f.strip()]

        if not filenames:
            return jsonify({'error': 'No files specified'}), 400

        task_id = str(uuid.uuid4())
        TASKS[task_id] = {
            'id': task_id,
            'status': 'pending',
            'created_at': time.time(),
            'progress': 0,
            'total_files': len(filenames),
            'processed_files': 0,
            'current_file': ''
        }

        thread = threading.Thread(target=background_extract, args=(
            task_id, filenames, start, duration,
            body.get('channels', 'all8'),
            body.get('include_nodes', True),
            body.get('include_shots', True),
            body.get('include_metadata', True)
        ))
        thread.daemon = True
        thread.start()

        return jsonify({'task_id': task_id, 'status': 'pending'})

    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/api/tasks/<task_id>', methods=['GET'])
def get_task_status(task_id):
    """Get status of a background task."""
    task = TASKS.get(task_id)
    if not task:
        return jsonify({'error': 'Task not found'}), 404
    return jsonify(task)

@app.route('/api/tasks/<task_id>/download', methods=['GET'])
def download_task_result(task_id):
    """Download the result of a completed task."""
    task = TASKS.get(task_id)
    if not task:
        return jsonify({'error': 'Task not found'}), 404
    if task['status'] != 'completed' or not os.path.exists(task.get('file_path', '')):
        return jsonify({'error': 'Task not ready or file missing'}), 400

    return send_file(
        task['file_path'],
        mimetype='application/zip',
        as_attachment=True,
        download_name=task.get('file_name', 'extract.zip')
    )

@app.route('/api/shots')
def get_shots():
    """Return shot times matching visible time window."""
    import re as _re
    from datetime import datetime as _dt, timedelta as _td, timezone as _tz
    filename = request.args.get('file', '')
    start_sec = float(request.args.get('start', 0))
    duration_sec = float(request.args.get('duration', 3600))
    shot_path = os.path.join(os.path.dirname(__file__), 'static', 'shot_times.json')
    if not os.path.exists(shot_path):
        return jsonify({'error': 'shot_times.json not found'}), 404
    with open(shot_path) as f:
        all_shots = json.load(f)
    m = _re.search(r'_(\d{10,})\.h5', filename)
    if not m:
        return jsonify({'error': 'Cannot parse timestamp from filename'}), 400
    file_start_unix = int(m.group(1))
    file_start_dt = _dt.fromtimestamp(file_start_unix, tz=_tz.utc)
    win_start_dt = file_start_dt + _td(seconds=start_sec)
    win_end_dt = file_start_dt + _td(seconds=start_sec + duration_sec)
    results = []
    day = win_start_dt.date()
    end_day = win_end_dt.date()
    while day <= end_day:
        day_start = _dt(day.year, day.month, day.day, tzinfo=_tz.utc)
        eff_start = max(win_start_dt, day_start)
        eff_end = min(win_end_dt, day_start + _td(days=1))
        tod_start = (eff_start - day_start).total_seconds()
        tod_end = (eff_end - day_start).total_seconds()
        for shot in all_shots:
            if tod_start <= shot['time_s'] <= tod_end:
                abs_shot_time = day_start + _td(seconds=shot['time_s'])
                offset = (abs_shot_time - file_start_dt).total_seconds()
                results.append({
                    'offset_s': round(offset, 2),
                    'time_str': shot['time_str'],
                    'line': shot['line'],
                    'date': str(day)
                })
        day += _td(days=1)
    results.sort(key=lambda x: x['offset_s'])
    return jsonify({'shots': results, 'count': len(results)})


@app.route('/api/shots_by_time')
def shots_by_time():
    """Return shots matching an absolute time window (unix timestamps)."""
    from datetime import datetime as _dt, timedelta as _td, timezone as _tz
    start_unix = float(request.args.get('start_unix', 0))
    duration = float(request.args.get('duration', 60))
    shot_path = os.path.join(os.path.dirname(__file__), 'static', 'shot_times.json')
    if not os.path.exists(shot_path):
        return jsonify({'error': 'shot_times.json not found'}), 404
    with open(shot_path) as f:
        all_shots = json.load(f)
    win_start_dt = _dt.fromtimestamp(start_unix, tz=_tz.utc)
    win_end_dt = _dt.fromtimestamp(start_unix + duration, tz=_tz.utc)
    results = []
    day = win_start_dt.date()
    end_day = win_end_dt.date()
    while day <= end_day:
        day_start = _dt(day.year, day.month, day.day, tzinfo=_tz.utc)
        eff_start = max(win_start_dt, day_start)
        eff_end = min(win_end_dt, day_start + _td(days=1))
        tod_start = (eff_start - day_start).total_seconds()
        tod_end = (eff_end - day_start).total_seconds()
        for shot in all_shots:
            if tod_start <= shot['time_s'] <= tod_end:
                abs_shot_time = day_start + _td(seconds=shot['time_s'])
                offset = (abs_shot_time - win_start_dt).total_seconds()
                results.append({
                    'offset_s': round(offset, 2),
                    'time_str': shot['time_str'],
                    'line': shot['line'],
                    'date': str(day)
                })
        day += _td(days=1)
    results.sort(key=lambda x: x['offset_s'])
    return jsonify({'shots': results, 'count': len(results)})


@app.route('/api/real_shots')
def real_shots():
    """Return real shot positions from SPS S01 files as GeoJSON."""
    shot_path = os.path.join(os.path.dirname(__file__), 'static', 'real_shots.json')
    if not os.path.exists(shot_path):
        return jsonify({'error': 'real_shots.json not found'}), 404
    with open(shot_path) as f:
        return jsonify(json.load(f))


@app.route('/api/coverage')
def get_coverage():
    """Get coverage data for all H5 files."""
    import re as _re
    try:
        darf = {}
        if os.path.exists(DARF_FILE):
            with open(DARF_FILE) as _f:
                darf = json.load(_f)
        result = []
        for f in os.listdir(DATA_DIR):
            if not f.endswith('.h5'):
                continue
            fpath = os.path.join(DATA_DIR, f)
            info = {'name': f, 'size': os.path.getsize(fpath)}
            m = _re.match(r'auto_(\d+)_(\d{2})(\d{2})(\d{2})_b(\d+)_rsn(\d+)_seq(\d+)_(\d+)', f)
            if m:
                day, hh, mm, ss, board_id, rsn, seq, ts = m.groups()
                info['board_id'] = int(board_id)
                info['day'] = int(day)
                from datetime import datetime as _dt, timedelta as _td
                base = _dt(2020, 1, 1) + _td(days=int(day)-1, hours=int(hh), minutes=int(mm), seconds=int(ss))
                info['date_str'] = base.isoformat()
                bid_str = str(board_id)
                if bid_str in darf:
                    info['line'] = darf[bid_str].get('line')
                    info['point'] = darf[bid_str].get('point')
            info['duration_sec'] = 8000
            try:
                import h5py
                with h5py.File(fpath, 'r') as h5f:
                    attrs = dict(h5f.attrs)
                    info['duration_sec'] = float(attrs.get('duration', attrs.get('record_length', 8000)))
            except:
                pass
            result.append(info)
        result.sort(key=lambda x: x.get('date_str', ''))
        return jsonify({'files': result, 'count': len(result)})
    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=3001, debug=False)