1676 lines
69 KiB
Plaintext
1676 lines
69 KiB
Plaintext
from flask import Flask, jsonify, send_from_directory, request, send_file, abort
|
|
from flask_cors import CORS
|
|
import os
|
|
import re
|
|
import h5py
|
|
import numpy as np
|
|
from datetime import datetime, timedelta
|
|
import base64
|
|
import struct
|
|
import traceback
|
|
import json
|
|
import io
|
|
import mimetypes
|
|
from pyproj import Transformer
|
|
import shutil
|
|
import threading
|
|
import uuid
|
|
import time
|
|
import zipfile
|
|
import csv as csv_mod
|
|
|
|
app = Flask(__name__, static_folder='static', static_url_path='')
|
|
CORS(app)
|
|
|
|
DATA_DIR = os.environ.get('DATA_DIR', '/data')
|
|
LOCATIONS_FILE = '/home/floppyrj45/seismic-viewer/locations.json'
|
|
DARF_FILE = os.path.join(os.path.dirname(__file__), 'static', 'darf_nodes.json')
|
|
TEMP_DIR = '/tmp/extracts'
|
|
|
|
if not os.path.exists(TEMP_DIR):
|
|
os.makedirs(TEMP_DIR)
|
|
|
|
# Global task store
|
|
TASKS = {}
|
|
|
|
# ========== UTILITY FUNCTIONS ==========
|
|
|
|
def cleanup_old_tasks():
|
|
"""Remove tasks older than 1 hour."""
|
|
now = time.time()
|
|
to_delete = []
|
|
for tid, task in TASKS.items():
|
|
if now - task['created_at'] > 3600:
|
|
to_delete.append(tid)
|
|
# Remove file if exists
|
|
if 'file_path' in task and os.path.exists(task['file_path']):
|
|
try:
|
|
os.remove(task['file_path'])
|
|
except:
|
|
pass
|
|
for tid in to_delete:
|
|
del TASKS[tid]
|
|
|
|
def load_darf_nodes():
|
|
"""Load DARF node metadata from JSON file."""
|
|
try:
|
|
with open(DARF_FILE, 'r') as f:
|
|
return json.load(f)
|
|
except Exception:
|
|
return {}
|
|
|
|
def extract_board_id(filename):
|
|
"""Extract board_id from filename pattern: auto_XXX_HHMMSS_b{BOARD_ID}_rsnXXX_..."""
|
|
m = re.search(r'_b(\d+)_', filename)
|
|
return m.group(1) if m else None
|
|
|
|
def load_locations():
|
|
"""Load locations from JSON file."""
|
|
if os.path.exists(LOCATIONS_FILE):
|
|
try:
|
|
with open(LOCATIONS_FILE, 'r') as f:
|
|
return json.load(f)
|
|
except:
|
|
return {}
|
|
return {}
|
|
|
|
def save_locations(locations):
|
|
"""Save locations to JSON file."""
|
|
with open(LOCATIONS_FILE, 'w') as f:
|
|
json.dump(locations, f, indent=2)
|
|
|
|
def get_h5_metadata(filepath):
|
|
"""Extract comprehensive metadata from H5 file."""
|
|
metadata = {}
|
|
try:
|
|
with h5py.File(filepath, 'r') as f:
|
|
# File-level attributes
|
|
for k, v in f.attrs.items():
|
|
try:
|
|
metadata[k] = v.item() if hasattr(v, 'item') else str(v)
|
|
except:
|
|
metadata[k] = str(v)
|
|
|
|
# Metadata group attributes
|
|
if 'metadata' in f:
|
|
for k, v in f['metadata'].attrs.items():
|
|
try:
|
|
metadata[k] = v.item() if hasattr(v, 'item') else str(v)
|
|
except:
|
|
metadata[k] = str(v)
|
|
|
|
# Calibration group attributes
|
|
if 'calibration' in f:
|
|
calibration = {}
|
|
for k, v in f['calibration'].attrs.items():
|
|
try:
|
|
calibration[k] = v.item() if hasattr(v, 'item') else str(v)
|
|
except:
|
|
calibration[k] = str(v)
|
|
metadata['calibration'] = calibration
|
|
|
|
# Channel information
|
|
channels = []
|
|
for group_name in ['calibrated_data', 'raw_data']:
|
|
if group_name in f:
|
|
for ds_name in f[group_name].keys():
|
|
ds = f[group_name][ds_name]
|
|
ch_info = {
|
|
'name': f"{group_name}/{ds_name}",
|
|
'shape': list(ds.shape),
|
|
'dtype': str(ds.dtype),
|
|
'samples': int(ds.shape[0]) if len(ds.shape) > 0 else 0
|
|
}
|
|
for k, v in ds.attrs.items():
|
|
try:
|
|
ch_info[k] = v.item() if hasattr(v, 'item') else str(v)
|
|
except:
|
|
ch_info[k] = str(v)
|
|
channels.append(ch_info)
|
|
metadata['channels'] = channels
|
|
|
|
# Compute timestamps
|
|
if 'creation_date' in metadata:
|
|
try:
|
|
start_time = datetime.fromisoformat(metadata['creation_date'].replace('Z', '+00:00'))
|
|
metadata['start_timestamp'] = start_time.isoformat()
|
|
|
|
if 'duration_sec' in metadata:
|
|
duration_sec = float(metadata['duration_sec'])
|
|
end_time = start_time + timedelta(seconds=duration_sec)
|
|
metadata['end_timestamp'] = end_time.isoformat()
|
|
|
|
# Human-readable duration
|
|
hours = int(duration_sec // 3600)
|
|
minutes = int((duration_sec % 3600) // 60)
|
|
seconds = int(duration_sec % 60)
|
|
metadata['duration_human'] = f"{hours}h {minutes}m {seconds}s"
|
|
except Exception as e:
|
|
metadata['timestamp_error'] = str(e)
|
|
|
|
return metadata
|
|
except Exception as e:
|
|
return {'error': str(e)}
|
|
|
|
# ========== ROUTES ==========
|
|
|
|
@app.route('/')
|
|
def index():
|
|
return send_from_directory('static', 'index.html')
|
|
|
|
@app.route('/api/files')
|
|
def list_files():
|
|
try:
|
|
files = []
|
|
for fn in os.listdir(DATA_DIR):
|
|
if fn.endswith(('.segy', '.sgy', '.h5')):
|
|
fp = os.path.join(DATA_DIR, fn)
|
|
stat = os.stat(fp)
|
|
ftype = 'h5' if fn.endswith('.h5') else 'segy'
|
|
files.append({
|
|
'name': fn, 'size': stat.st_size,
|
|
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
'type': ftype
|
|
})
|
|
files.sort(key=lambda x: x['name'])
|
|
return jsonify({'files': files, 'count': len(files)})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/file/<filename>/info')
|
|
def file_info(filename):
|
|
try:
|
|
fp = os.path.join(DATA_DIR, filename)
|
|
if not os.path.exists(fp):
|
|
return jsonify({'error': 'Not found'}), 404
|
|
stat = os.stat(fp)
|
|
info = {
|
|
'name': filename,
|
|
'size': stat.st_size,
|
|
'size_mb': round(stat.st_size / 1048576, 2),
|
|
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
'type': 'h5' if filename.endswith('.h5') else 'segy',
|
|
}
|
|
if filename.endswith('.h5'):
|
|
with h5py.File(fp, 'r') as f:
|
|
datasets = []
|
|
def visitor(name, obj):
|
|
if isinstance(obj, h5py.Dataset):
|
|
datasets.append({
|
|
'path': name,
|
|
'shape': list(obj.shape),
|
|
'dtype': str(obj.dtype),
|
|
'samples': int(obj.shape[0]) if len(obj.shape) > 0 else 0
|
|
})
|
|
f.visititems(visitor)
|
|
info['datasets'] = datasets
|
|
info['num_datasets'] = len(datasets)
|
|
cal = [d for d in datasets if 'calibrated' in d['path']]
|
|
raw = [d for d in datasets if 'raw' in d['path']]
|
|
info['calibrated_channels'] = len(cal)
|
|
info['raw_channels'] = len(raw)
|
|
if cal:
|
|
info['samples_per_channel'] = cal[0]['samples']
|
|
|
|
# Add sample rate and duration for sidebar display
|
|
if 'metadata' in f:
|
|
sample_rate = f['metadata'].attrs.get('sample_rate_hz', None)
|
|
if sample_rate and cal:
|
|
total_samples = cal[0]['samples']
|
|
duration_sec = float(total_samples) / float(sample_rate)
|
|
hours = int(duration_sec // 3600)
|
|
minutes = int((duration_sec % 3600) // 60)
|
|
info['duration_human'] = f"{hours}h{minutes:02d}m"
|
|
info['sample_rate_hz'] = int(sample_rate)
|
|
info['num_channels'] = int(len(cal))
|
|
elif filename.endswith(('.segy', '.sgy')):
|
|
try:
|
|
from obspy import read as obspy_read
|
|
st = obspy_read(fp, headonly=True)
|
|
info['num_traces'] = len(st)
|
|
if len(st) > 0:
|
|
info['samples_per_trace'] = st[0].stats.npts
|
|
info['sample_rate'] = st[0].stats.sampling_rate
|
|
info['delta'] = st[0].stats.delta
|
|
except Exception as e:
|
|
info['segy_error'] = str(e)
|
|
return jsonify(info)
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/file/<filename>/metadata')
|
|
def file_metadata(filename):
|
|
"""Return detailed metadata for a file."""
|
|
try:
|
|
fp = os.path.join(DATA_DIR, filename)
|
|
if not os.path.exists(fp):
|
|
return jsonify({'error': 'Not found'}), 404
|
|
|
|
stat = os.stat(fp)
|
|
result = {
|
|
'filename': filename,
|
|
'file_size_bytes': stat.st_size,
|
|
'file_size_mb': round(stat.st_size / 1048576, 2),
|
|
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
}
|
|
|
|
if filename.endswith('.h5'):
|
|
metadata = get_h5_metadata(fp)
|
|
result.update(metadata)
|
|
elif filename.endswith(('.segy', '.sgy')):
|
|
try:
|
|
from obspy import read as obspy_read
|
|
st = obspy_read(fp, headonly=True)
|
|
result['num_traces'] = len(st)
|
|
if len(st) > 0:
|
|
result['sample_rate'] = st[0].stats.sampling_rate
|
|
result['samples_per_trace'] = st[0].stats.npts
|
|
result['start_timestamp'] = st[0].stats.starttime.isoformat()
|
|
result['end_timestamp'] = st[0].stats.endtime.isoformat()
|
|
duration_sec = (st[0].stats.endtime - st[0].stats.starttime)
|
|
hours = int(duration_sec // 3600)
|
|
minutes = int((duration_sec % 3600) // 60)
|
|
seconds = int(duration_sec % 60)
|
|
result['duration_human'] = f"{hours}h {minutes}m {seconds}s"
|
|
result['duration_sec'] = duration_sec
|
|
except Exception as e:
|
|
result['segy_error'] = str(e)
|
|
|
|
return jsonify(result)
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/locations', methods=['GET'])
|
|
def get_locations():
|
|
"""Get all file locations."""
|
|
try:
|
|
locations = load_locations()
|
|
return jsonify(locations)
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/locations', methods=['POST'])
|
|
def update_location():
|
|
"""Update location for a file."""
|
|
try:
|
|
data = request.json
|
|
filename = data.get('filename')
|
|
lat = data.get('lat')
|
|
lon = data.get('lon')
|
|
|
|
if not all([filename, lat is not None, lon is not None]):
|
|
return jsonify({'error': 'Missing filename, lat, or lon'}), 400
|
|
|
|
locations = load_locations()
|
|
locations[filename] = {
|
|
'lat': float(lat),
|
|
'lon': float(lon),
|
|
'updated': datetime.now().isoformat()
|
|
}
|
|
save_locations(locations)
|
|
|
|
return jsonify({'success': True, 'locations': locations})
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/file/<filename>/extract')
|
|
def extract_segment(filename):
|
|
"""Extract a time segment from a file and return as CSV or H5."""
|
|
try:
|
|
fp = os.path.join(DATA_DIR, filename)
|
|
if not os.path.exists(fp):
|
|
return jsonify({'error': 'Not found'}), 404
|
|
|
|
# Parameters
|
|
start_sec = float(request.args.get('start_sec', 0))
|
|
duration_sec = float(request.args.get('duration_sec', 1200)) # Default 20 min
|
|
channel = request.args.get('channel', 'calibrated_data/channel_1')
|
|
format_type = request.args.get('format', 'csv') # csv or h5
|
|
|
|
if not filename.endswith('.h5'):
|
|
return jsonify({'error': 'Extract only supported for H5 files'}), 400
|
|
|
|
with h5py.File(fp, 'r') as f:
|
|
if channel not in f:
|
|
return jsonify({'error': f'Channel {channel} not found'}), 404
|
|
|
|
# Get metadata
|
|
sample_rate = f['metadata'].attrs.get('sample_rate_hz', 500)
|
|
total_samples = f[channel].shape[0]
|
|
|
|
# Calculate sample range
|
|
start_sample = int(start_sec * sample_rate)
|
|
end_sample = int((start_sec + duration_sec) * sample_rate)
|
|
|
|
# Clamp to valid range
|
|
start_sample = max(0, min(start_sample, total_samples - 1))
|
|
end_sample = max(start_sample + 1, min(end_sample, total_samples))
|
|
|
|
# Extract data
|
|
data = f[channel][start_sample:end_sample]
|
|
|
|
if format_type == 'csv':
|
|
# Generate CSV
|
|
output = io.StringIO()
|
|
output.write(f"# Channel: {channel}\n")
|
|
output.write(f"# Start: {start_sec}s, Duration: {duration_sec}s\n")
|
|
output.write(f"# Sample rate: {sample_rate} Hz\n")
|
|
output.write(f"# Samples: {len(data)}\n")
|
|
output.write("sample,time_sec,value\n")
|
|
|
|
for i, val in enumerate(data):
|
|
time_s = (start_sample + i) / sample_rate
|
|
output.write(f"{start_sample + i},{time_s:.6f},{val}\n")
|
|
|
|
output.seek(0)
|
|
return send_file(
|
|
io.BytesIO(output.getvalue().encode('utf-8')),
|
|
mimetype='text/csv',
|
|
as_attachment=True,
|
|
download_name=f'{filename.replace(".h5", "")}_extract_{int(start_sec)}_{int(duration_sec)}s.csv'
|
|
)
|
|
|
|
elif format_type == 'h5':
|
|
# Generate H5
|
|
output = io.BytesIO()
|
|
with h5py.File(output, 'w') as out_f:
|
|
# Copy metadata
|
|
if 'metadata' in f:
|
|
meta_group = out_f.create_group('metadata')
|
|
for k, v in f['metadata'].attrs.items():
|
|
meta_group.attrs[k] = v
|
|
meta_group.attrs['extract_start_sec'] = start_sec
|
|
meta_group.attrs['extract_duration_sec'] = duration_sec
|
|
meta_group.attrs['extract_start_sample'] = start_sample
|
|
meta_group.attrs['extract_end_sample'] = end_sample
|
|
|
|
# Create dataset
|
|
ds = out_f.create_dataset('data', data=data, compression='gzip')
|
|
ds.attrs['channel'] = channel
|
|
ds.attrs['sample_rate_hz'] = sample_rate
|
|
|
|
# Copy channel attributes
|
|
if channel in f:
|
|
for k, v in f[channel].attrs.items():
|
|
ds.attrs[k] = v
|
|
|
|
output.seek(0)
|
|
return send_file(
|
|
output,
|
|
mimetype='application/x-hdf5',
|
|
as_attachment=True,
|
|
download_name=f'{filename.replace(".h5", "")}_extract_{int(start_sec)}_{int(duration_sec)}s.h5'
|
|
)
|
|
|
|
else:
|
|
return jsonify({'error': 'Invalid format. Use csv or h5'}), 400
|
|
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/file/<filename>/section')
|
|
def file_section(filename):
|
|
"""Return 2D section data as base64 float32 array."""
|
|
try:
|
|
fp = os.path.join(DATA_DIR, filename)
|
|
if not os.path.exists(fp):
|
|
return jsonify({'error': 'Not found'}), 404
|
|
|
|
trace_start = int(request.args.get('trace_start', 0))
|
|
trace_count = int(request.args.get('trace_count', 200))
|
|
samples_per_trace = min(int(request.args.get('samples_per_trace', 1000)), 50000)
|
|
channel = request.args.get('channel', 'calibrated_data/channel_1')
|
|
max_samples = int(request.args.get('max_samples', 500))
|
|
|
|
if filename.endswith('.h5'):
|
|
with h5py.File(fp, 'r') as f:
|
|
if channel not in f:
|
|
avail = []
|
|
f.visititems(lambda n, o: avail.append(n) if isinstance(o, h5py.Dataset) else None)
|
|
return jsonify({'error': f'Channel not found', 'available': avail}), 404
|
|
|
|
data = f[channel]
|
|
total_samples = data.shape[0]
|
|
total_traces = total_samples // samples_per_trace
|
|
|
|
trace_start = max(0, min(trace_start, max(0, total_traces - 1)))
|
|
trace_end = min(trace_start + trace_count, total_traces)
|
|
actual_count = trace_end - trace_start
|
|
|
|
if actual_count <= 0:
|
|
return jsonify({'error': 'No traces in range'}), 400
|
|
|
|
start_sample = trace_start * samples_per_trace
|
|
end_sample = trace_end * samples_per_trace
|
|
raw = data[start_sample:end_sample]
|
|
|
|
section = raw.reshape(actual_count, samples_per_trace).astype(np.float32)
|
|
|
|
if samples_per_trace > max_samples:
|
|
factor = samples_per_trace // max_samples
|
|
trimmed = section[:, :factor * max_samples]
|
|
section = trimmed.reshape(actual_count, max_samples, factor).mean(axis=2).astype(np.float32)
|
|
actual_spt = max_samples
|
|
else:
|
|
actual_spt = samples_per_trace
|
|
|
|
vmin = float(np.min(section))
|
|
vmax = float(np.max(section))
|
|
encoded = base64.b64encode(section.tobytes()).decode('ascii')
|
|
|
|
return jsonify({
|
|
'num_traces': int(actual_count),
|
|
'samples_per_trace': actual_spt,
|
|
'trace_start': trace_start,
|
|
'total_traces': total_traces,
|
|
'total_samples': int(total_samples),
|
|
'vmin': vmin, 'vmax': vmax,
|
|
'data_b64': encoded,
|
|
'dtype': 'float32'
|
|
})
|
|
|
|
elif filename.endswith(('.segy', '.sgy')):
|
|
try:
|
|
from obspy import read as obspy_read
|
|
st = obspy_read(fp)
|
|
total_traces = len(st)
|
|
|
|
trace_start = max(0, min(trace_start, total_traces - 1))
|
|
trace_end = min(trace_start + trace_count, total_traces)
|
|
actual_count = trace_end - trace_start
|
|
|
|
if actual_count <= 0:
|
|
return jsonify({'error': 'No traces'}), 400
|
|
|
|
spt = st[0].stats.npts
|
|
actual_spt = spt
|
|
|
|
section = np.zeros((actual_count, spt), dtype=np.float32)
|
|
for i in range(actual_count):
|
|
tr = st[trace_start + i]
|
|
n = min(spt, tr.stats.npts)
|
|
section[i, :n] = tr.data[:n].astype(np.float32)
|
|
|
|
if spt > max_samples:
|
|
factor = spt // max_samples
|
|
trimmed = section[:, :factor * max_samples]
|
|
section = trimmed.reshape(actual_count, max_samples, factor).mean(axis=2).astype(np.float32)
|
|
actual_spt = max_samples
|
|
|
|
vmin = float(np.min(section))
|
|
vmax = float(np.max(section))
|
|
encoded = base64.b64encode(section.tobytes()).decode('ascii')
|
|
|
|
return jsonify({
|
|
'num_traces': int(actual_count),
|
|
'samples_per_trace': actual_spt,
|
|
'trace_start': trace_start,
|
|
'total_traces': total_traces,
|
|
'total_samples': int(total_traces * spt),
|
|
'vmin': vmin, 'vmax': vmax,
|
|
'data_b64': encoded,
|
|
'dtype': 'float32'
|
|
})
|
|
except Exception as e:
|
|
return jsonify({'error': f'SEGY read error: {e}'}), 500
|
|
|
|
return jsonify({'error': 'Unsupported format'}), 400
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/file/<filename>/headers')
|
|
def file_headers(filename):
|
|
"""Return trace headers for SEGY or dataset attributes for H5."""
|
|
try:
|
|
fp = os.path.join(DATA_DIR, filename)
|
|
if not os.path.exists(fp):
|
|
return jsonify({'error': 'Not found'}), 404
|
|
|
|
if filename.endswith('.h5'):
|
|
with h5py.File(fp, 'r') as f:
|
|
attrs = {}
|
|
for k, v in f.attrs.items():
|
|
try:
|
|
attrs[k] = v.item() if hasattr(v, 'item') else str(v)
|
|
except:
|
|
attrs[k] = str(v)
|
|
|
|
datasets = []
|
|
def visitor(name, obj):
|
|
if isinstance(obj, h5py.Dataset):
|
|
ds_info = {
|
|
'path': name,
|
|
'shape': list(obj.shape),
|
|
'dtype': str(obj.dtype),
|
|
'size': int(np.prod(obj.shape)),
|
|
}
|
|
for ak, av in obj.attrs.items():
|
|
try:
|
|
ds_info[f'attr:{ak}'] = av.item() if hasattr(av, 'item') else str(av)
|
|
except:
|
|
ds_info[f'attr:{ak}'] = str(av)
|
|
datasets.append(ds_info)
|
|
f.visititems(visitor)
|
|
|
|
groups = []
|
|
def gvisitor(name, obj):
|
|
if isinstance(obj, h5py.Group):
|
|
g_info = {'path': name}
|
|
for ak, av in obj.attrs.items():
|
|
try:
|
|
g_info[f'attr:{ak}'] = av.item() if hasattr(av, 'item') else str(av)
|
|
except:
|
|
g_info[f'attr:{ak}'] = str(av)
|
|
if len(g_info) > 1:
|
|
groups.append(g_info)
|
|
f.visititems(gvisitor)
|
|
|
|
return jsonify({
|
|
'type': 'h5',
|
|
'file_attrs': attrs,
|
|
'datasets': datasets,
|
|
'groups': groups
|
|
})
|
|
|
|
elif filename.endswith(('.segy', '.sgy')):
|
|
try:
|
|
from obspy import read as obspy_read
|
|
from obspy.io.segy.segy import _read_segy
|
|
|
|
segy = _read_segy(fp)
|
|
|
|
bfh = {}
|
|
if hasattr(segy, 'binary_file_header'):
|
|
h = segy.binary_file_header
|
|
for attr in ['job_identification_number', 'line_number', 'reel_number',
|
|
'number_of_data_traces_per_ensemble', 'number_of_auxiliary_traces_per_ensemble',
|
|
'sample_interval_in_microseconds', 'number_of_samples_per_data_trace',
|
|
'data_sample_format_code']:
|
|
if hasattr(h, attr):
|
|
bfh[attr] = getattr(h, attr)
|
|
|
|
trace_headers = []
|
|
max_traces = min(50, len(segy.traces))
|
|
for i in range(max_traces):
|
|
th = segy.traces[i].header
|
|
trace_headers.append({
|
|
'trace': i,
|
|
'inline': getattr(th, 'trace_sequence_number_within_line', None),
|
|
'crossline': getattr(th, 'trace_sequence_number_within_segy_file', None),
|
|
'field_record': getattr(th, 'original_field_record_number', None),
|
|
'trace_number': getattr(th, 'trace_number_within_the_original_field_record', None),
|
|
'cdp': getattr(th, 'ensemble_number', None),
|
|
'trace_in_ensemble': getattr(th, 'trace_number_within_the_ensemble', None),
|
|
'offset': getattr(th, 'distance_from_center_of_the_source_point_to_the_center_of_the_receiver_group', None),
|
|
'source_x': getattr(th, 'source_coordinate_x', None),
|
|
'source_y': getattr(th, 'source_coordinate_y', None),
|
|
'group_x': getattr(th, 'group_coordinate_x', None),
|
|
'group_y': getattr(th, 'group_coordinate_y', None),
|
|
'num_samples': getattr(th, 'number_of_samples_in_this_trace', None),
|
|
'sample_interval': getattr(th, 'sample_interval_in_ms_for_this_trace', None),
|
|
})
|
|
|
|
return jsonify({
|
|
'type': 'segy',
|
|
'binary_header': bfh,
|
|
'trace_headers': trace_headers,
|
|
'total_traces': len(segy.traces)
|
|
})
|
|
except Exception as e:
|
|
return jsonify({'error': f'SEGY header read error: {e}'}), 500
|
|
|
|
return jsonify({'error': 'Unsupported'}), 400
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/file/<filename>/waveform')
|
|
def file_waveform(filename):
|
|
try:
|
|
fp = os.path.join(DATA_DIR, filename)
|
|
if not os.path.exists(fp):
|
|
return jsonify({'error': 'Not found'}), 404
|
|
max_pts = int(request.args.get('points', 5000))
|
|
channels_param = request.args.get('channels', '')
|
|
channel = request.args.get('channel', 'calibrated_data/channel_1')
|
|
start_sec = request.args.get('start', None)
|
|
duration_sec = request.args.get('duration', None)
|
|
if filename.endswith('.h5'):
|
|
with h5py.File(fp, 'r') as f:
|
|
# Determine sample rate from file metadata
|
|
sample_rate = 500
|
|
if 'sample_rate' in f.attrs:
|
|
sample_rate = float(f.attrs['sample_rate'])
|
|
elif 'acquisition' in f and 'sample_rate' in f['acquisition'].attrs:
|
|
sample_rate = float(f['acquisition'].attrs['sample_rate'])
|
|
|
|
# Multi-channel mode
|
|
if channels_param:
|
|
ch_list = [c.strip() for c in channels_param.split(',') if c.strip()]
|
|
else:
|
|
ch_list = [channel]
|
|
|
|
# Validate all channels exist
|
|
for ch in ch_list:
|
|
if ch not in f:
|
|
return jsonify({'error': f'Channel not found: {ch}'}), 404
|
|
|
|
total_samples = f[ch_list[0]].shape[0]
|
|
|
|
# Compute sample window from start/duration
|
|
if start_sec is not None:
|
|
s0 = max(0, int(float(start_sec) * sample_rate))
|
|
else:
|
|
s0 = 0
|
|
if duration_sec is not None and duration_sec != 'all':
|
|
try:
|
|
s1 = min(total_samples, s0 + int(float(duration_sec) * sample_rate))
|
|
except ValueError:
|
|
s1 = total_samples
|
|
else:
|
|
s1 = total_samples
|
|
|
|
result = {
|
|
'name': filename,
|
|
'sample_rate': int(sample_rate),
|
|
'total_samples': total_samples,
|
|
'channels': {}
|
|
}
|
|
|
|
for ch in ch_list:
|
|
dataset = f[ch]
|
|
n = s1 - s0
|
|
# For large datasets, read only the samples we need (sparse read)
|
|
if n > max_pts:
|
|
step = n // max_pts
|
|
indices = list(range(s0, s1, step))[:max_pts]
|
|
# Read only needed indices from HDF5 (much faster than loading all)
|
|
values = [float(dataset[i]) for i in indices]
|
|
times = [i / sample_rate for i in indices]
|
|
# Stats from the downsampled values
|
|
vmin = min(values)
|
|
vmax = max(values)
|
|
vmean = sum(values) / len(values)
|
|
vstd = (sum((v - vmean)**2 for v in values) / len(values)) ** 0.5
|
|
else:
|
|
data = dataset[s0:s1]
|
|
values = data.tolist()
|
|
times = [(s0 + i) / sample_rate for i in range(n)]
|
|
vmin = float(np.min(data))
|
|
vmax = float(np.max(data))
|
|
vmean = float(np.mean(data))
|
|
vstd = float(np.std(data))
|
|
result['channels'][ch] = {
|
|
'channel': ch,
|
|
'displayed_points': len(values),
|
|
'min': vmin,
|
|
'max': vmax,
|
|
'mean': vmean,
|
|
'std': vstd,
|
|
'x': times,
|
|
'y': values
|
|
}
|
|
|
|
# Backward compat: if single channel, also put x/y at top level
|
|
if len(ch_list) == 1:
|
|
ch_data = result['channels'][ch_list[0]]
|
|
result['channel'] = ch_list[0]
|
|
result['displayed_points'] = ch_data['displayed_points']
|
|
result['min'] = ch_data['min']
|
|
result['max'] = ch_data['max']
|
|
result['mean'] = ch_data['mean']
|
|
result['std'] = ch_data['std']
|
|
result['x'] = ch_data['x']
|
|
result['y'] = ch_data['y']
|
|
|
|
return jsonify(result)
|
|
return jsonify({'error': 'Waveform only for H5'}), 400
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
# ========== GEOSUP ENDPOINTS ==========
|
|
|
|
# UTM Zone 31N to WGS84 transformer
|
|
utm_to_wgs84 = Transformer.from_crs("EPSG:32631", "EPSG:4326", always_xy=True)
|
|
|
|
GEOSUP_DIR = os.path.join(app.static_folder, '') # Static folder
|
|
|
|
def parse_sps_line(line):
|
|
"""Parse SPS S01/R01 format line. Fixed-width format."""
|
|
if len(line) < 65:
|
|
return None
|
|
try:
|
|
record_type = line[0].strip() # S or R
|
|
line_num = line[1:11].strip()
|
|
point = line[11:21].strip()
|
|
index = line[21:23].strip()
|
|
# Easting and Northing are in columns ~26-52
|
|
coord_part = line[46:66].strip()
|
|
# Parse XXXXXX.XNNNNNNN.NN format
|
|
match = re.match(r'(\d{5,6}\.\d{1,2})(\d{7}\.\d{1,2})', coord_part)
|
|
if match:
|
|
easting = float(match.group(1))
|
|
northing = float(match.group(2))
|
|
else:
|
|
parts = coord_part.split('.')
|
|
if len(parts) >= 2:
|
|
easting_int = parts[0]
|
|
rest = '.'.join(parts[1:])
|
|
if len(rest) > 9:
|
|
easting = float(easting_int + '.' + rest[:2])
|
|
northing = float(rest[2:])
|
|
else:
|
|
return None
|
|
else:
|
|
return None
|
|
|
|
depth = line[66:72].strip() if len(line) > 52 else '0'
|
|
depth = float(depth) if depth else 0
|
|
|
|
return {
|
|
'type': record_type,
|
|
'line': line_num,
|
|
'point': point,
|
|
'index': index,
|
|
'easting': easting,
|
|
'northing': northing,
|
|
'depth': depth
|
|
}
|
|
except Exception:
|
|
return None
|
|
|
|
def parse_deployment_csv(filepath):
|
|
"""Parse deployment CSV file (SETE-0965P1002.csv format)."""
|
|
records = []
|
|
try:
|
|
with open(filepath, 'r') as f:
|
|
for line in f:
|
|
parts = line.strip().split(',')
|
|
if len(parts) >= 8:
|
|
try:
|
|
record = {
|
|
'line': parts[0],
|
|
'point': parts[1],
|
|
'index': parts[2],
|
|
'name': parts[3],
|
|
'easting': float(parts[5]),
|
|
'northing': float(parts[6]),
|
|
'depth': float(parts[7]),
|
|
}
|
|
if len(parts) >= 16:
|
|
record['year'] = int(parts[10])
|
|
record['day'] = int(parts[11])
|
|
record['hour'] = int(parts[12])
|
|
record['minute'] = int(parts[13])
|
|
record['second'] = int(parts[14])
|
|
records.append(record)
|
|
except (ValueError, IndexError):
|
|
continue
|
|
except Exception:
|
|
pass
|
|
return records
|
|
|
|
@app.route('/api/geosup/positions')
|
|
def geosup_positions():
|
|
"""Return GeoJSON with source, receiver, and deployment positions."""
|
|
try:
|
|
features = []
|
|
|
|
# Parse sources (S01)
|
|
s01_path = os.path.join(GEOSUP_DIR, 'SeteSxPreplots.s01')
|
|
if os.path.exists(s01_path):
|
|
with open(s01_path, 'r') as f:
|
|
for line in f:
|
|
record = parse_sps_line(line)
|
|
if record and record['type'] == 'S':
|
|
lon, lat = utm_to_wgs84.transform(record['easting'], record['northing'])
|
|
features.append({
|
|
'type': 'Feature',
|
|
'geometry': {'type': 'Point', 'coordinates': [lon, lat]},
|
|
'properties': {
|
|
'category': 'source',
|
|
'line': record['line'],
|
|
'point': record['point'],
|
|
'easting': record['easting'],
|
|
'northing': record['northing'],
|
|
'depth': record['depth']
|
|
}
|
|
})
|
|
|
|
# Parse receivers (R01)
|
|
r01_path = os.path.join(GEOSUP_DIR, 'SeteRxPreplots.r01')
|
|
if os.path.exists(r01_path):
|
|
with open(r01_path, 'r') as f:
|
|
for line in f:
|
|
record = parse_sps_line(line)
|
|
if record and record['type'] == 'R':
|
|
lon, lat = utm_to_wgs84.transform(record['easting'], record['northing'])
|
|
features.append({
|
|
'type': 'Feature',
|
|
'geometry': {'type': 'Point', 'coordinates': [lon, lat]},
|
|
'properties': {
|
|
'category': 'receiver',
|
|
'line': record['line'],
|
|
'point': record['point'],
|
|
'easting': record['easting'],
|
|
'northing': record['northing'],
|
|
'depth': record['depth']
|
|
}
|
|
})
|
|
|
|
# Parse deployments (CSV)
|
|
csv_path = os.path.join(GEOSUP_DIR, 'SETE-0965P1002.csv')
|
|
if os.path.exists(csv_path):
|
|
deployments = parse_deployment_csv(csv_path)
|
|
for d in deployments:
|
|
lon, lat = utm_to_wgs84.transform(d['easting'], d['northing'])
|
|
props = {
|
|
'category': 'deployment',
|
|
'line': d['line'],
|
|
'point': d['point'],
|
|
'name': d['name'],
|
|
'easting': d['easting'],
|
|
'northing': d['northing'],
|
|
'depth': d['depth']
|
|
}
|
|
if 'year' in d:
|
|
props['timestamp'] = f"{d['year']}-{d['day']:03d} {d['hour']:02d}:{d['minute']:02d}:{d['second']:02d}"
|
|
features.append({
|
|
'type': 'Feature',
|
|
'geometry': {'type': 'Point', 'coordinates': [lon, lat]},
|
|
'properties': props
|
|
})
|
|
|
|
sources = len([f for f in features if f['properties']['category'] == 'source'])
|
|
receivers = len([f for f in features if f['properties']['category'] == 'receiver'])
|
|
deployments = len([f for f in features if f['properties']['category'] == 'deployment'])
|
|
|
|
return jsonify({
|
|
'type': 'FeatureCollection',
|
|
'features': features,
|
|
'counts': {'sources': sources, 'receivers': receivers, 'deployments': deployments, 'total': len(features)}
|
|
})
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/geosup/documents')
|
|
def geosup_documents():
|
|
"""List all geosup documents in static folder."""
|
|
try:
|
|
documents = []
|
|
doc_extensions = ('.pdf', '.docx', '.xlsx', '.doc', '.xls')
|
|
|
|
for fn in os.listdir(GEOSUP_DIR):
|
|
if fn.lower().endswith(doc_extensions):
|
|
fp = os.path.join(GEOSUP_DIR, fn)
|
|
stat = os.stat(fp)
|
|
ext = os.path.splitext(fn)[1].lower()
|
|
doc_type = {'.pdf': 'PDF', '.docx': 'Word', '.doc': 'Word', '.xlsx': 'Excel', '.xls': 'Excel'}.get(ext, 'Document')
|
|
icon = {'.pdf': '📕', '.docx': '📘', '.doc': '📘', '.xlsx': '📗', '.xls': '📗'}.get(ext, '📄')
|
|
is_gundalf = 'GUNDALF' in fn.upper()
|
|
is_params = 'Acquisition_Parameters' in fn or 'SpiceRack' in fn
|
|
|
|
documents.append({
|
|
'name': fn,
|
|
'size': stat.st_size,
|
|
'size_human': f"{stat.st_size / 1024:.1f} KB" if stat.st_size < 1048576 else f"{stat.st_size / 1048576:.1f} MB",
|
|
'type': doc_type,
|
|
'icon': icon,
|
|
'is_gundalf': is_gundalf,
|
|
'is_params': is_params,
|
|
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()
|
|
})
|
|
|
|
documents.sort(key=lambda d: (not d['is_gundalf'], not d['is_params'], d['name']))
|
|
return jsonify({'documents': documents, 'count': len(documents)})
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/geosup/documents/<path:filename>')
|
|
def geosup_download(filename):
|
|
"""Download a geosup document."""
|
|
try:
|
|
safe_name = os.path.basename(filename)
|
|
filepath = os.path.join(GEOSUP_DIR, safe_name)
|
|
if not os.path.exists(filepath):
|
|
return jsonify({'error': 'File not found'}), 404
|
|
mime_type, _ = mimetypes.guess_type(filepath)
|
|
return send_file(filepath, mimetype=mime_type or 'application/octet-stream', as_attachment=True, download_name=safe_name)
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/geosup/acquisition-params')
|
|
def geosup_acquisition_params():
|
|
"""Extract key parameters from the acquisition spreadsheet."""
|
|
try:
|
|
xlsx_path = os.path.join(GEOSUP_DIR, 'SBGS_Sete_SpiceRack_Acquisition_Parameters_Spreadsheet.xlsx')
|
|
if not os.path.exists(xlsx_path):
|
|
return jsonify({'error': 'Acquisition parameters file not found'}), 404
|
|
from openpyxl import load_workbook
|
|
wb = load_workbook(xlsx_path, data_only=True)
|
|
params = {'sheets': [], 'summary': {}}
|
|
for sheet_name in wb.sheetnames:
|
|
ws = wb[sheet_name]
|
|
sheet_data = {'name': sheet_name, 'rows': []}
|
|
row_count = 0
|
|
for row in ws.iter_rows(max_row=30, values_only=True):
|
|
if any(cell is not None for cell in row):
|
|
sheet_data['rows'].append([str(cell) if cell is not None else '' for cell in row[:10]])
|
|
row_count += 1
|
|
if row_count >= 20:
|
|
break
|
|
params['sheets'].append(sheet_data)
|
|
wb.close()
|
|
return jsonify(params)
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/pipeline-status')
|
|
def pipeline_status():
|
|
"""Return pipeline statistics and disk usage."""
|
|
try:
|
|
raw_dir = '/mnt/usb/Recordings'
|
|
output_dir = '/mnt/usb/H5-Output'
|
|
|
|
# Count files
|
|
raw_count = 0
|
|
if os.path.exists(raw_dir):
|
|
raw_count = len([n for n in os.listdir(raw_dir) if n.lower().endswith(('.raw', '.manta'))])
|
|
|
|
segy_count = 0
|
|
h5_count = 0
|
|
if os.path.exists(output_dir):
|
|
files = os.listdir(output_dir)
|
|
segy_count = len([n for n in files if n.lower().endswith(('.segy', '.sgy'))])
|
|
h5_count = len([n for n in files if n.lower().endswith('.h5')])
|
|
|
|
# Disk usage
|
|
total, used, free = shutil.disk_usage(output_dir)
|
|
|
|
# Conversion progress
|
|
progress = None
|
|
if os.path.exists('/tmp/segy2h5_progress.json'):
|
|
try:
|
|
with open('/tmp/segy2h5_progress.json', 'r') as f:
|
|
progress = json.load(f)
|
|
except:
|
|
pass
|
|
|
|
return jsonify({
|
|
'counts': {
|
|
'raw': raw_count,
|
|
'segy': segy_count,
|
|
'h5': h5_count,
|
|
'total_expected': 345 # Hardcoded target from context
|
|
},
|
|
'disk': {
|
|
'total_gb': round(total / (1024**3), 2),
|
|
'used_gb': round(used / (1024**3), 2),
|
|
'free_gb': round(free / (1024**3), 2),
|
|
'percent': round((used / total) * 100, 1)
|
|
},
|
|
'progress': progress
|
|
})
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
# ========== GATHER ENDPOINT ==========
|
|
|
|
|
|
@app.route('/api/gather_info', methods=['POST'])
|
|
def gather_info():
|
|
"""Return time window info with overlap clustering for a set of files."""
|
|
try:
|
|
body = request.get_json() or {}
|
|
files_param = body.get('files', '')
|
|
filenames = [f.strip() for f in files_param.split(',') if f.strip()]
|
|
|
|
import re as _re
|
|
from datetime import datetime
|
|
windows = []
|
|
for fn in filenames:
|
|
fp = os.path.join(DATA_DIR, fn)
|
|
if not os.path.exists(fp) or not fn.endswith('.h5'):
|
|
continue
|
|
m = _re.search(r'_(\d{10})', fn)
|
|
ts = int(m.group(1)) if m else 0
|
|
if ts == 0:
|
|
continue
|
|
try:
|
|
with h5py.File(fp, 'r') as hf:
|
|
ch = 'calibrated_data/channel_1'
|
|
if ch not in hf:
|
|
continue
|
|
sr = 500
|
|
if 'metadata' in hf:
|
|
sr = int(hf['metadata'].attrs.get('sample_rate_hz', 500))
|
|
n = hf[ch].shape[0]
|
|
dur = n / sr
|
|
windows.append({'file': fn, 'start_ts': ts, 'end_ts': ts + int(dur), 'duration': round(dur, 1)})
|
|
except:
|
|
continue
|
|
|
|
if not windows:
|
|
return jsonify({'error': 'No valid files'}), 400
|
|
|
|
windows.sort(key=lambda w: w['start_ts'])
|
|
|
|
# Cluster into overlapping groups
|
|
groups = []
|
|
for w in windows:
|
|
placed = False
|
|
for g in groups:
|
|
g_start = min(x['start_ts'] for x in g)
|
|
g_end = max(x['end_ts'] for x in g)
|
|
if w['start_ts'] < g_end and w['end_ts'] > g_start:
|
|
g.append(w)
|
|
placed = True
|
|
break
|
|
if not placed:
|
|
groups.append([w])
|
|
|
|
groups.sort(key=lambda g: len(g), reverse=True)
|
|
|
|
group_summaries = []
|
|
for i, g in enumerate(groups):
|
|
g_start = min(x['start_ts'] for x in g)
|
|
g_end = max(x['end_ts'] for x in g)
|
|
latest_start = max(x['start_ts'] for x in g)
|
|
earliest_end = min(x['end_ts'] for x in g)
|
|
union_duration = g_end - g_start
|
|
group_summaries.append({
|
|
'index': i,
|
|
'count': len(g),
|
|
'files': [x['file'] for x in g],
|
|
'start_ts': g_start,
|
|
'end_ts': g_end,
|
|
'overlap_start': g_start,
|
|
'overlap_duration': union_duration,
|
|
'union_duration': union_duration,
|
|
'overlap_start_utc': datetime.utcfromtimestamp(g_start).strftime('%Y-%m-%d %H:%M:%S'),
|
|
'date': datetime.utcfromtimestamp(g_start).strftime('%Y-%m-%d')
|
|
})
|
|
|
|
best = group_summaries[0] if group_summaries else None
|
|
|
|
return jsonify({
|
|
'total_files': len(windows),
|
|
'groups': group_summaries,
|
|
'best_group': best,
|
|
'num_groups': len(groups)
|
|
})
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/gather', methods=['GET', 'POST'])
|
|
def gather():
|
|
"""Return multi-file trace data for gather view."""
|
|
try:
|
|
if request.method == 'POST' and request.is_json:
|
|
body = request.get_json()
|
|
files_param = body.get('files', '')
|
|
channel = body.get('channel', 'calibrated_data/channel_1')
|
|
start = float(body.get('start', 0))
|
|
duration = float(body.get('duration', 60))
|
|
points = int(body.get('points', 2000))
|
|
sync = str(body.get('sync', 'true')).lower() == 'true'
|
|
else:
|
|
files_param = request.args.get('files', '')
|
|
channel = request.args.get('channel', 'calibrated_data/channel_1')
|
|
start = float(request.args.get('start', 0))
|
|
duration = float(request.args.get('duration', 60))
|
|
points = int(request.args.get('points', 2000))
|
|
sync = request.args.get('sync', 'true').lower() == 'true'
|
|
|
|
filenames = [f.strip() for f in files_param.split(',') if f.strip()]
|
|
if not filenames:
|
|
return jsonify({'error': 'No files specified'}), 400
|
|
|
|
import re as _re
|
|
file_timestamps = {}
|
|
for fn in filenames:
|
|
m = _re.search(r'_(\d{10})', fn)
|
|
file_timestamps[fn] = int(m.group(1)) if m else 0
|
|
|
|
if sync:
|
|
valid_ts = [file_timestamps[fn] for fn in filenames if file_timestamps[fn] > 0]
|
|
min_ts = min(valid_ts) if valid_ts else 0
|
|
abs_time_param = None
|
|
if request.method == 'POST' and request.is_json:
|
|
abs_time_param = body.get('abs_time', None)
|
|
else:
|
|
abs_time_param = request.args.get('abs_time', None)
|
|
|
|
if abs_time_param is not None:
|
|
abs_start = float(abs_time_param)
|
|
else:
|
|
abs_start = min_ts + start
|
|
|
|
traces = []
|
|
time_axis = None
|
|
sample_rate = None
|
|
max_dur = 0
|
|
|
|
for fn in filenames:
|
|
fp = os.path.join(DATA_DIR, fn)
|
|
if not os.path.exists(fp) or not fn.endswith('.h5'):
|
|
continue
|
|
|
|
board_id = extract_board_id(fn)
|
|
|
|
try:
|
|
with h5py.File(fp, 'r') as f:
|
|
if channel not in f:
|
|
continue
|
|
|
|
sr = 500
|
|
if 'metadata' in f:
|
|
sr = int(f['metadata'].attrs.get('sample_rate_hz', 500))
|
|
|
|
if sample_rate is None:
|
|
sample_rate = sr
|
|
|
|
total_samples = f[channel].shape[0]
|
|
file_duration = total_samples / sr
|
|
|
|
if sync and file_timestamps[fn] > 0:
|
|
file_ts = file_timestamps[fn]
|
|
local_start = abs_start - file_ts
|
|
local_end = local_start + duration
|
|
max_dur = max(max_dur, file_duration + (file_ts - min_ts))
|
|
else:
|
|
local_start = start
|
|
local_end = start + duration
|
|
max_dur = max(max_dur, file_duration)
|
|
|
|
start_sample = max(0, int(local_start * sr))
|
|
end_sample = min(int(local_end * sr), total_samples)
|
|
|
|
if end_sample <= start_sample:
|
|
trace_info = {
|
|
'filename': fn,
|
|
'board_id': board_id,
|
|
'y': [],
|
|
'min': 0, 'max': 0,
|
|
'start_ts': file_timestamps.get(fn, 0),
|
|
'empty': True
|
|
}
|
|
traces.append(trace_info)
|
|
continue
|
|
|
|
data = f[channel][start_sample:end_sample]
|
|
n = len(data)
|
|
|
|
if n > points:
|
|
step = n // points
|
|
indices = list(range(0, n, step))[:points]
|
|
values = [float(data[i]) for i in indices]
|
|
else:
|
|
values = [float(v) for v in data]
|
|
indices = list(range(n))
|
|
|
|
trace_info = {
|
|
'filename': fn,
|
|
'board_id': board_id,
|
|
'y': values,
|
|
'min': float(np.min(data)),
|
|
'max': float(np.max(data)),
|
|
'start_ts': file_timestamps.get(fn, 0)
|
|
}
|
|
traces.append(trace_info)
|
|
|
|
if time_axis is None:
|
|
time_axis = [local_start + i / sr for i in indices]
|
|
if sync:
|
|
time_axis = [(i / sr) for i in indices]
|
|
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
continue
|
|
|
|
actual_sr = sample_rate or 500
|
|
requested_samples = int(duration * actual_sr)
|
|
resp = {
|
|
'traces': traces,
|
|
'sample_rate': actual_sr,
|
|
'time_axis': time_axis or [],
|
|
'file_duration': round(max_dur, 1),
|
|
'sync': sync,
|
|
'decimation': {
|
|
'display_points': points,
|
|
'original_samples': requested_samples,
|
|
'ratio': max(1, requested_samples // max(1, points)),
|
|
'is_decimated': requested_samples > points
|
|
}
|
|
}
|
|
if sync and file_timestamps:
|
|
min_ts = min(t for t in file_timestamps.values() if t > 0) if any(t > 0 for t in file_timestamps.values()) else 0
|
|
resp['epoch_start'] = min_ts
|
|
resp['window_start'] = min_ts + start if min_ts else start
|
|
from datetime import datetime
|
|
if min_ts:
|
|
resp['date_start'] = datetime.utcfromtimestamp(min_ts).strftime('%Y-%m-%d %H:%M:%S UTC')
|
|
|
|
return jsonify(resp)
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
# ========== BACKGROUND EXTRACT LOGIC ==========
|
|
|
|
def background_extract(task_id, filenames, start, duration, channels_opt, include_nodes, include_shots, include_metadata):
|
|
"""Worker function for async extraction."""
|
|
try:
|
|
TASKS[task_id]['status'] = 'processing'
|
|
TASKS[task_id]['total_files'] = len(filenames)
|
|
|
|
# Load resources
|
|
darf = load_darf_nodes()
|
|
shot_times = []
|
|
if include_shots:
|
|
try:
|
|
shot_path = os.path.join(app.static_folder, 'shot_times.json')
|
|
if os.path.exists(shot_path):
|
|
with open(shot_path) as sf:
|
|
shot_times = json.load(sf)
|
|
except:
|
|
pass
|
|
|
|
import re as _re
|
|
file_timestamps = {}
|
|
for fn in filenames:
|
|
m = _re.search(r'_(\d{10})', fn)
|
|
file_timestamps[fn] = int(m.group(1)) if m else 0
|
|
|
|
valid_ts = [t for t in file_timestamps.values() if t > 0]
|
|
min_ts = min(valid_ts) if valid_ts else 0
|
|
abs_start = min_ts + start
|
|
abs_end = abs_start + duration
|
|
|
|
export_name = 'gather_extract_s%d_d%d.zip' % (int(start), int(duration))
|
|
temp_zip_path = os.path.join(TEMP_DIR, f"{task_id}.zip")
|
|
|
|
with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
|
metadata = {
|
|
'export_time': datetime.utcnow().isoformat() + 'Z',
|
|
'start_offset_s': start,
|
|
'duration_s': duration,
|
|
'abs_start_epoch': abs_start,
|
|
'abs_end_epoch': abs_end,
|
|
'sample_rate_hz': 500,
|
|
'files': [],
|
|
'decimation': 'none - full resolution export'
|
|
}
|
|
|
|
ch_map = {
|
|
'all8': ['calibrated_data/channel_1','calibrated_data/channel_2','calibrated_data/channel_3','calibrated_data/channel_4',
|
|
'raw_data/channel_1','raw_data/channel_2','raw_data/channel_3','raw_data/channel_4'],
|
|
'all_cal': ['calibrated_data/channel_1','calibrated_data/channel_2','calibrated_data/channel_3','calibrated_data/channel_4'],
|
|
'all_raw': ['raw_data/channel_1','raw_data/channel_2','raw_data/channel_3','raw_data/channel_4'],
|
|
'cal_1': ['calibrated_data/channel_1'], 'cal_2': ['calibrated_data/channel_2'],
|
|
'cal_3': ['calibrated_data/channel_3'], 'cal_4': ['calibrated_data/channel_4'],
|
|
'raw_1': ['raw_data/channel_1'], 'raw_2': ['raw_data/channel_2'],
|
|
'raw_3': ['raw_data/channel_3'], 'raw_4': ['raw_data/channel_4'],
|
|
}
|
|
all_channels = ch_map.get(channels_opt, ch_map['all8'])
|
|
|
|
processed_count = 0
|
|
for fn in filenames:
|
|
processed_count += 1
|
|
TASKS[task_id]['current_file'] = fn
|
|
TASKS[task_id]['progress'] = int((processed_count / len(filenames)) * 95)
|
|
TASKS[task_id]['processed_files'] = processed_count
|
|
|
|
fp = os.path.join(DATA_DIR, fn)
|
|
if not os.path.exists(fp) or not fn.endswith('.h5'):
|
|
continue
|
|
|
|
board_id = extract_board_id(fn)
|
|
node_info = darf.get(str(board_id), {}) if board_id else {}
|
|
file_ts = file_timestamps.get(fn, 0)
|
|
|
|
if file_ts > 0:
|
|
local_start = abs_start - file_ts
|
|
local_end = abs_end - file_ts
|
|
else:
|
|
local_start = start
|
|
local_end = start + duration
|
|
|
|
try:
|
|
with h5py.File(fp, 'r') as hf:
|
|
sr = 500
|
|
if 'metadata' in hf:
|
|
sr = int(hf['metadata'].attrs.get('sample_rate_hz', 500))
|
|
|
|
total_samples = hf['calibrated_data/channel_1'].shape[0]
|
|
s0 = max(0, int(local_start * sr))
|
|
s1 = min(total_samples, int(local_end * sr))
|
|
|
|
if s1 <= s0:
|
|
continue
|
|
|
|
n_samples = s1 - s0
|
|
|
|
channel_data = {}
|
|
available_channels = []
|
|
for ch in all_channels:
|
|
if ch in hf:
|
|
channel_data[ch] = hf[ch][s0:s1]
|
|
available_channels.append(ch)
|
|
|
|
if not channel_data:
|
|
continue
|
|
|
|
csv_name = fn.replace('.h5', '') + '_extract.csv'
|
|
csv_buf = io.StringIO()
|
|
writer = csv_mod.writer(csv_buf)
|
|
|
|
header = ['sample_index', 'time_s', 'time_utc']
|
|
for ch in available_channels:
|
|
short = ch.replace('calibrated_data/channel_', 'cal_ch').replace('raw_data/channel_', 'raw_ch')
|
|
header.append(short)
|
|
writer.writerow(header)
|
|
|
|
from datetime import timezone
|
|
for i in range(n_samples):
|
|
sample_idx = s0 + i
|
|
time_s = round(sample_idx / sr, 4)
|
|
if file_ts > 0:
|
|
utc_ts = file_ts + time_s
|
|
time_utc = datetime.fromtimestamp(utc_ts, tz=timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.') + str(int((utc_ts % 1) * 1000)).zfill(3)
|
|
else:
|
|
time_utc = ''
|
|
|
|
row = [sample_idx, time_s, time_utc]
|
|
for ch in available_channels:
|
|
row.append(channel_data[ch][i])
|
|
writer.writerow(row)
|
|
|
|
zf.writestr(csv_name, csv_buf.getvalue())
|
|
|
|
file_meta = {
|
|
'filename': fn,
|
|
'board_id': board_id,
|
|
'line': node_info.get('line'),
|
|
'point': node_info.get('point'),
|
|
'start_epoch': file_ts,
|
|
'sample_range': [s0, s1],
|
|
'n_samples': n_samples,
|
|
'duration_s': round(n_samples / sr, 2),
|
|
'channels': available_channels
|
|
}
|
|
metadata['files'].append(file_meta)
|
|
|
|
except Exception:
|
|
traceback.print_exc()
|
|
continue
|
|
|
|
# Matching shots
|
|
if shot_times:
|
|
day_start_h = abs_start % 86400
|
|
day_end_h = abs_end % 86400
|
|
matching_shots = []
|
|
for shot in shot_times:
|
|
shot_tod = shot['time_s']
|
|
if day_start_h <= day_end_h:
|
|
if shot_tod >= day_start_h and shot_tod <= day_end_h:
|
|
matching_shots.append(shot)
|
|
else:
|
|
if shot_tod >= day_start_h or shot_tod <= day_end_h:
|
|
matching_shots.append(shot)
|
|
if matching_shots:
|
|
shot_buf = io.StringIO()
|
|
writer = csv_mod.writer(shot_buf)
|
|
writer.writerow(['line', 'time_of_day_s', 'time_str', 'easting', 'northing'])
|
|
for s in matching_shots:
|
|
writer.writerow([s.get('line',''), s.get('time_s',''), s.get('time_str',''), s.get('easting',''), s.get('northing','')])
|
|
zf.writestr('shots.csv', shot_buf.getvalue())
|
|
metadata['matching_shots'] = len(matching_shots)
|
|
|
|
# Node positions
|
|
if include_nodes and darf:
|
|
node_buf = io.StringIO()
|
|
nw = csv_mod.writer(node_buf)
|
|
nw.writerow(['board_id','line','point','preplot_easting','preplot_northing','preplot_lat','preplot_lon',
|
|
'aslaid_easting','aslaid_northing','aslaid_lat','aslaid_lon','depth'])
|
|
exported_bids = set()
|
|
for fm in metadata['files']:
|
|
if fm.get('board_id'):
|
|
exported_bids.add(str(fm['board_id']))
|
|
for bid, node in sorted(darf.items(), key=lambda x: (x[1].get('line',0), x[1].get('point',0))):
|
|
if bid in exported_bids:
|
|
nw.writerow([
|
|
bid, node.get('line',''), node.get('point',''),
|
|
node.get('preplot_e',''), node.get('preplot_n',''),
|
|
node.get('preplot_lat',''), node.get('preplot_lon',''),
|
|
node.get('aslaid_e',''), node.get('aslaid_n',''),
|
|
node.get('aslaid_lat',''), node.get('aslaid_lon',''),
|
|
node.get('depth','')
|
|
])
|
|
zf.writestr('node_positions.csv', node_buf.getvalue())
|
|
|
|
if include_metadata:
|
|
zf.writestr('metadata.json', json.dumps(metadata, indent=2, default=str))
|
|
|
|
TASKS[task_id]['progress'] = 100
|
|
TASKS[task_id]['status'] = 'completed'
|
|
TASKS[task_id]['file_path'] = temp_zip_path
|
|
TASKS[task_id]['file_name'] = export_name
|
|
|
|
except Exception as e:
|
|
TASKS[task_id]['status'] = 'error'
|
|
TASKS[task_id]['error'] = str(e)
|
|
traceback.print_exc()
|
|
|
|
@app.route('/api/gather_extract', methods=['POST'])
|
|
def gather_extract():
|
|
"""Start async extraction job."""
|
|
try:
|
|
cleanup_old_tasks()
|
|
|
|
body = request.get_json() or {}
|
|
files_param = body.get('files', '')
|
|
start = float(body.get('start', 0))
|
|
duration = float(body.get('duration', 60))
|
|
filenames = [f.strip() for f in files_param.split(',') if f.strip()]
|
|
|
|
if not filenames:
|
|
return jsonify({'error': 'No files specified'}), 400
|
|
|
|
task_id = str(uuid.uuid4())
|
|
TASKS[task_id] = {
|
|
'id': task_id,
|
|
'status': 'pending',
|
|
'created_at': time.time(),
|
|
'progress': 0,
|
|
'total_files': len(filenames),
|
|
'processed_files': 0,
|
|
'current_file': ''
|
|
}
|
|
|
|
thread = threading.Thread(target=background_extract, args=(
|
|
task_id, filenames, start, duration,
|
|
body.get('channels', 'all8'),
|
|
body.get('include_nodes', True),
|
|
body.get('include_shots', True),
|
|
body.get('include_metadata', True)
|
|
))
|
|
thread.daemon = True
|
|
thread.start()
|
|
|
|
return jsonify({'task_id': task_id, 'status': 'pending'})
|
|
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/api/tasks/<task_id>', methods=['GET'])
|
|
def get_task_status(task_id):
|
|
"""Get status of a background task."""
|
|
task = TASKS.get(task_id)
|
|
if not task:
|
|
return jsonify({'error': 'Task not found'}), 404
|
|
return jsonify(task)
|
|
|
|
@app.route('/api/tasks/<task_id>/download', methods=['GET'])
|
|
def download_task_result(task_id):
|
|
"""Download the result of a completed task."""
|
|
task = TASKS.get(task_id)
|
|
if not task:
|
|
return jsonify({'error': 'Task not found'}), 404
|
|
if task['status'] != 'completed' or not os.path.exists(task.get('file_path', '')):
|
|
return jsonify({'error': 'Task not ready or file missing'}), 400
|
|
|
|
return send_file(
|
|
task['file_path'],
|
|
mimetype='application/zip',
|
|
as_attachment=True,
|
|
download_name=task.get('file_name', 'extract.zip')
|
|
)
|
|
|
|
@app.route('/api/shots')
|
|
def get_shots():
|
|
"""Return shot times matching visible time window."""
|
|
import re as _re
|
|
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
|
|
filename = request.args.get('file', '')
|
|
start_sec = float(request.args.get('start', 0))
|
|
duration_sec = float(request.args.get('duration', 3600))
|
|
shot_path = os.path.join(os.path.dirname(__file__), 'static', 'shot_times.json')
|
|
if not os.path.exists(shot_path):
|
|
return jsonify({'error': 'shot_times.json not found'}), 404
|
|
with open(shot_path) as f:
|
|
all_shots = json.load(f)
|
|
m = _re.search(r'_(\d{10,})\.h5', filename)
|
|
if not m:
|
|
return jsonify({'error': 'Cannot parse timestamp from filename'}), 400
|
|
file_start_unix = int(m.group(1))
|
|
file_start_dt = _dt.fromtimestamp(file_start_unix, tz=_tz.utc)
|
|
win_start_dt = file_start_dt + _td(seconds=start_sec)
|
|
win_end_dt = file_start_dt + _td(seconds=start_sec + duration_sec)
|
|
results = []
|
|
day = win_start_dt.date()
|
|
end_day = win_end_dt.date()
|
|
while day <= end_day:
|
|
day_start = _dt(day.year, day.month, day.day, tzinfo=_tz.utc)
|
|
eff_start = max(win_start_dt, day_start)
|
|
eff_end = min(win_end_dt, day_start + _td(days=1))
|
|
tod_start = (eff_start - day_start).total_seconds()
|
|
tod_end = (eff_end - day_start).total_seconds()
|
|
for shot in all_shots:
|
|
if tod_start <= shot['time_s'] <= tod_end:
|
|
abs_shot_time = day_start + _td(seconds=shot['time_s'])
|
|
offset = (abs_shot_time - file_start_dt).total_seconds()
|
|
results.append({
|
|
'offset_s': round(offset, 2),
|
|
'time_str': shot['time_str'],
|
|
'line': shot['line'],
|
|
'date': str(day)
|
|
})
|
|
day += _td(days=1)
|
|
results.sort(key=lambda x: x['offset_s'])
|
|
return jsonify({'shots': results, 'count': len(results)})
|
|
|
|
|
|
@app.route('/api/shots_by_time')
|
|
def shots_by_time():
|
|
"""Return shots matching an absolute time window (unix timestamps)."""
|
|
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
|
|
start_unix = float(request.args.get('start_unix', 0))
|
|
duration = float(request.args.get('duration', 60))
|
|
shot_path = os.path.join(os.path.dirname(__file__), 'static', 'shot_times.json')
|
|
if not os.path.exists(shot_path):
|
|
return jsonify({'error': 'shot_times.json not found'}), 404
|
|
with open(shot_path) as f:
|
|
all_shots = json.load(f)
|
|
win_start_dt = _dt.fromtimestamp(start_unix, tz=_tz.utc)
|
|
win_end_dt = _dt.fromtimestamp(start_unix + duration, tz=_tz.utc)
|
|
results = []
|
|
day = win_start_dt.date()
|
|
end_day = win_end_dt.date()
|
|
while day <= end_day:
|
|
day_start = _dt(day.year, day.month, day.day, tzinfo=_tz.utc)
|
|
eff_start = max(win_start_dt, day_start)
|
|
eff_end = min(win_end_dt, day_start + _td(days=1))
|
|
tod_start = (eff_start - day_start).total_seconds()
|
|
tod_end = (eff_end - day_start).total_seconds()
|
|
for shot in all_shots:
|
|
if tod_start <= shot['time_s'] <= tod_end:
|
|
abs_shot_time = day_start + _td(seconds=shot['time_s'])
|
|
offset = (abs_shot_time - win_start_dt).total_seconds()
|
|
results.append({
|
|
'offset_s': round(offset, 2),
|
|
'time_str': shot['time_str'],
|
|
'line': shot['line'],
|
|
'date': str(day)
|
|
})
|
|
day += _td(days=1)
|
|
results.sort(key=lambda x: x['offset_s'])
|
|
return jsonify({'shots': results, 'count': len(results)})
|
|
|
|
|
|
@app.route('/api/real_shots')
|
|
def real_shots():
|
|
"""Return real shot positions from SPS S01 files as GeoJSON."""
|
|
shot_path = os.path.join(os.path.dirname(__file__), 'static', 'real_shots.json')
|
|
if not os.path.exists(shot_path):
|
|
return jsonify({'error': 'real_shots.json not found'}), 404
|
|
with open(shot_path) as f:
|
|
return jsonify(json.load(f))
|
|
|
|
|
|
@app.route('/api/coverage')
|
|
def get_coverage():
|
|
"""Get coverage data for all H5 files."""
|
|
import re as _re
|
|
try:
|
|
darf = {}
|
|
if os.path.exists(DARF_FILE):
|
|
with open(DARF_FILE) as _f:
|
|
darf = json.load(_f)
|
|
result = []
|
|
for f in os.listdir(DATA_DIR):
|
|
if not f.endswith('.h5'):
|
|
continue
|
|
fpath = os.path.join(DATA_DIR, f)
|
|
info = {'name': f, 'size': os.path.getsize(fpath)}
|
|
m = _re.match(r'auto_(\d+)_(\d{2})(\d{2})(\d{2})_b(\d+)_rsn(\d+)_seq(\d+)_(\d+)', f)
|
|
if m:
|
|
day, hh, mm, ss, board_id, rsn, seq, ts = m.groups()
|
|
info['board_id'] = int(board_id)
|
|
info['day'] = int(day)
|
|
from datetime import datetime as _dt, timedelta as _td
|
|
base = _dt(2020, 1, 1) + _td(days=int(day)-1, hours=int(hh), minutes=int(mm), seconds=int(ss))
|
|
info['date_str'] = base.isoformat()
|
|
bid_str = str(board_id)
|
|
if bid_str in darf:
|
|
info['line'] = darf[bid_str].get('line')
|
|
info['point'] = darf[bid_str].get('point')
|
|
info['duration_sec'] = 8000
|
|
try:
|
|
import h5py
|
|
with h5py.File(fpath, 'r') as h5f:
|
|
attrs = dict(h5f.attrs)
|
|
info['duration_sec'] = float(attrs.get('duration', attrs.get('record_length', 8000)))
|
|
except:
|
|
pass
|
|
result.append(info)
|
|
result.sort(key=lambda x: x.get('date_str', ''))
|
|
return jsonify({'files': result, 'count': len(result)})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
if __name__ == '__main__':
|
|
app.run(host='0.0.0.0', port=3001, debug=False)
|