feat: add MCAP/CSV ingest, NATS publisher with stdout fallback, and CLI
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
77
src/cosma_log_analyzer/bus.py
Normal file
77
src/cosma_log_analyzer/bus.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Event bus publisher: NATS when configured, stdout otherwise."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Protocol
|
||||
|
||||
from .models import Anomaly
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Publisher(Protocol):
|
||||
def publish(self, anomaly: Anomaly) -> None: ...
|
||||
def close(self) -> None: ...
|
||||
|
||||
|
||||
class StdoutPublisher:
|
||||
"""Fallback: writes JSON Lines to stdout. Thread/process-safe via writes."""
|
||||
|
||||
def __init__(self, stream=None) -> None:
|
||||
self._stream = stream if stream is not None else sys.stdout
|
||||
|
||||
def publish(self, anomaly: Anomaly) -> None:
|
||||
line = anomaly.to_json()
|
||||
self._stream.write(line + "\n")
|
||||
self._stream.flush()
|
||||
|
||||
def close(self) -> None:
|
||||
try:
|
||||
self._stream.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class NatsPublisher:
|
||||
"""Sync wrapper around nats-py async client. Keeps a dedicated loop."""
|
||||
|
||||
def __init__(self, url: str) -> None:
|
||||
self._url = url
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._nc = None
|
||||
self._loop.run_until_complete(self._connect())
|
||||
|
||||
async def _connect(self) -> None:
|
||||
import nats # lazy import
|
||||
|
||||
self._nc = await nats.connect(self._url)
|
||||
|
||||
def publish(self, anomaly: Anomaly) -> None:
|
||||
if self._nc is None:
|
||||
raise RuntimeError("NATS client not connected")
|
||||
payload = anomaly.to_json().encode("utf-8")
|
||||
self._loop.run_until_complete(self._nc.publish(anomaly.nats_subject(), payload))
|
||||
|
||||
def close(self) -> None:
|
||||
if self._nc is None:
|
||||
return
|
||||
try:
|
||||
self._loop.run_until_complete(self._nc.drain())
|
||||
except Exception as exc:
|
||||
logger.warning("NATS drain failed: %s", exc)
|
||||
finally:
|
||||
self._loop.close()
|
||||
self._nc = None
|
||||
|
||||
|
||||
def make_publisher(nats_url: str | None = None) -> Publisher:
|
||||
"""Pick NATS or stdout based on env/arg. Empty/None -> stdout fallback."""
|
||||
url = nats_url if nats_url is not None else os.environ.get("NATS_URL", "")
|
||||
if not url:
|
||||
logger.info("NATS_URL empty -> using stdout fallback publisher")
|
||||
return StdoutPublisher()
|
||||
logger.info("Connecting NATS publisher to %s", url)
|
||||
return NatsPublisher(url)
|
||||
143
src/cosma_log_analyzer/ingest.py
Normal file
143
src/cosma_log_analyzer/ingest.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""MCAP + CSV readers.
|
||||
|
||||
MCAP decoding stays schema-agnostic: we read each message's bytes and try
|
||||
JSON first (CDR/ROS2 is out of scope for v0 — field extraction relies on
|
||||
known field names once decoded). If the message body is not JSON we skip it.
|
||||
|
||||
Rules operate on pandas DataFrames with per-topic columns:
|
||||
imu -> ts, ax, ay, az, gx, gy, gz
|
||||
usbl -> ts, distance_m, snr_db
|
||||
battery -> ts, voltage_v
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
from collections.abc import Iterable, Iterator
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
TOPIC_IMU = "/mavros/imu/data"
|
||||
TOPIC_USBL = "/usbl_reading/usbl_solution"
|
||||
TOPIC_BATTERY = "/mavros/battery"
|
||||
|
||||
KNOWN_TOPICS = (TOPIC_IMU, TOPIC_USBL, TOPIC_BATTERY)
|
||||
|
||||
|
||||
def _decode_payload(data: bytes) -> dict[str, Any] | None:
|
||||
try:
|
||||
return json.loads(data.decode("utf-8"))
|
||||
except (UnicodeDecodeError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
|
||||
def iter_mcap_messages(
|
||||
path: str | Path,
|
||||
topics: Iterable[str] | None = None,
|
||||
) -> Iterator[tuple[str, float, dict[str, Any]]]:
|
||||
"""Yield (topic, ts_seconds, decoded_payload) for matching messages."""
|
||||
from mcap.reader import make_reader # lazy: lib mcap
|
||||
|
||||
wanted = set(topics) if topics else None
|
||||
with open(path, "rb") as fh:
|
||||
reader = make_reader(fh)
|
||||
for schema, channel, message in reader.iter_messages(topics=list(wanted) if wanted else None):
|
||||
payload = _decode_payload(message.data)
|
||||
if payload is None:
|
||||
continue
|
||||
ts = message.log_time / 1e9 # MCAP uses ns
|
||||
yield channel.topic, ts, payload
|
||||
|
||||
|
||||
def _extract_imu(payload: dict[str, Any]) -> dict[str, float] | None:
|
||||
lin = payload.get("linear_acceleration") or payload.get("accel") or {}
|
||||
ang = payload.get("angular_velocity") or payload.get("gyro") or {}
|
||||
if not lin and not ang:
|
||||
return None
|
||||
return {
|
||||
"ax": float(lin.get("x", 0.0)),
|
||||
"ay": float(lin.get("y", 0.0)),
|
||||
"az": float(lin.get("z", 0.0)),
|
||||
"gx": float(ang.get("x", 0.0)),
|
||||
"gy": float(ang.get("y", 0.0)),
|
||||
"gz": float(ang.get("z", 0.0)),
|
||||
}
|
||||
|
||||
|
||||
def _extract_usbl(payload: dict[str, Any]) -> dict[str, float] | None:
|
||||
dist = payload.get("distance_m", payload.get("range_m"))
|
||||
snr = payload.get("snr_db", payload.get("snr"))
|
||||
if dist is None or snr is None:
|
||||
return None
|
||||
return {"distance_m": float(dist), "snr_db": float(snr)}
|
||||
|
||||
|
||||
def _extract_battery(payload: dict[str, Any]) -> dict[str, float] | None:
|
||||
v = payload.get("voltage_v", payload.get("voltage"))
|
||||
if v is None:
|
||||
return None
|
||||
return {"voltage_v": float(v)}
|
||||
|
||||
|
||||
_EXTRACTORS = {
|
||||
TOPIC_IMU: _extract_imu,
|
||||
TOPIC_USBL: _extract_usbl,
|
||||
TOPIC_BATTERY: _extract_battery,
|
||||
}
|
||||
|
||||
|
||||
def load_mcap(path: str | Path) -> dict[str, pd.DataFrame]:
|
||||
"""Load an MCAP file into per-topic DataFrames.
|
||||
|
||||
Returns a dict keyed by topic. Missing topics yield empty DataFrames.
|
||||
"""
|
||||
rows: dict[str, list[dict[str, Any]]] = {t: [] for t in KNOWN_TOPICS}
|
||||
for topic, ts, payload in iter_mcap_messages(path, KNOWN_TOPICS):
|
||||
extractor = _EXTRACTORS.get(topic)
|
||||
if extractor is None:
|
||||
continue
|
||||
fields = extractor(payload)
|
||||
if fields is None:
|
||||
continue
|
||||
fields["ts"] = ts
|
||||
rows[topic].append(fields)
|
||||
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for topic, items in rows.items():
|
||||
if not items:
|
||||
out[topic] = pd.DataFrame()
|
||||
continue
|
||||
df = pd.DataFrame(items).sort_values("ts").reset_index(drop=True)
|
||||
out[topic] = df
|
||||
return out
|
||||
|
||||
|
||||
def load_csv_nav(path: str | Path) -> pd.DataFrame:
|
||||
"""Load a USV nav CSV. Expected columns: ts, lat, lon, heading (flexible).
|
||||
|
||||
Missing file or unparseable rows return an empty DataFrame.
|
||||
"""
|
||||
p = Path(path)
|
||||
if not p.exists():
|
||||
logger.warning("CSV nav file not found: %s", path)
|
||||
return pd.DataFrame()
|
||||
rows: list[dict[str, Any]] = []
|
||||
with p.open(newline="") as fh:
|
||||
reader = csv.DictReader(fh)
|
||||
for row in reader:
|
||||
try:
|
||||
rows.append({k: float(v) if v not in ("", None) else None for k, v in row.items()})
|
||||
except ValueError:
|
||||
continue
|
||||
if not rows:
|
||||
return pd.DataFrame()
|
||||
df = pd.DataFrame(rows)
|
||||
if "ts" in df.columns:
|
||||
df = df.sort_values("ts").reset_index(drop=True)
|
||||
return df
|
||||
148
src/cosma_log_analyzer/main.py
Normal file
148
src/cosma_log_analyzer/main.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""CLI + service entrypoint for cosma-log-analyzer."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import click
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from .bus import Publisher, StdoutPublisher, make_publisher
|
||||
from .ingest import (
|
||||
KNOWN_TOPICS,
|
||||
TOPIC_BATTERY,
|
||||
TOPIC_IMU,
|
||||
TOPIC_USBL,
|
||||
load_mcap,
|
||||
)
|
||||
from .models import Anomaly
|
||||
from .rules import all_rules
|
||||
from .rules.base import Rule
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
TOPIC_TO_KEY = {
|
||||
TOPIC_IMU: TOPIC_IMU,
|
||||
TOPIC_USBL: TOPIC_USBL,
|
||||
TOPIC_BATTERY: TOPIC_BATTERY,
|
||||
}
|
||||
|
||||
|
||||
def _setup_logging() -> None:
|
||||
level = os.environ.get("LOG_LEVEL", "INFO").upper()
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, level, logging.INFO),
|
||||
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
|
||||
def analyze_mcap(
|
||||
path: str | Path,
|
||||
subject: str,
|
||||
rules: Iterable[Rule] | None = None,
|
||||
) -> list[Anomaly]:
|
||||
"""Load an MCAP file, run all rules, return the anomaly list."""
|
||||
dataframes = load_mcap(path)
|
||||
rules = list(rules) if rules is not None else all_rules()
|
||||
anomalies: list[Anomaly] = []
|
||||
for rule in rules:
|
||||
rule.bind(subject)
|
||||
df = dataframes.get(rule.topic)
|
||||
if df is None or df.empty:
|
||||
continue
|
||||
anomalies.extend(rule.detect(df))
|
||||
anomalies.sort(key=lambda a: a.timestamp)
|
||||
return anomalies
|
||||
|
||||
|
||||
def emit(anomalies: Iterable[Anomaly], publisher: Publisher) -> int:
|
||||
count = 0
|
||||
for a in anomalies:
|
||||
publisher.publish(a)
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli() -> None:
|
||||
"""cosma-log-analyzer: deterministic anomaly detection on AUV logs."""
|
||||
load_dotenv()
|
||||
_setup_logging()
|
||||
|
||||
|
||||
@cli.command("ingest")
|
||||
@click.argument("path", type=click.Path(exists=True, dir_okay=False))
|
||||
@click.option("--subject", default="AUV000", help="AUV identifier for NATS subject.")
|
||||
@click.option("--dry-run", is_flag=True, help="Force stdout publisher (ignore NATS_URL).")
|
||||
def ingest_cmd(path: str, subject: str, dry_run: bool) -> None:
|
||||
"""Analyze a single MCAP file and publish anomalies."""
|
||||
publisher: Publisher = StdoutPublisher() if dry_run else make_publisher()
|
||||
try:
|
||||
anomalies = analyze_mcap(path, subject)
|
||||
n = emit(anomalies, publisher)
|
||||
logger.info("Processed %s -> %d anomalies", path, n)
|
||||
finally:
|
||||
publisher.close()
|
||||
|
||||
|
||||
@cli.command("serve")
|
||||
@click.option(
|
||||
"--mcap-dir",
|
||||
default=None,
|
||||
help="Directory to watch (default: MCAP_DIR env).",
|
||||
)
|
||||
@click.option(
|
||||
"--subject",
|
||||
default="AUV000",
|
||||
help="AUV identifier for NATS subject.",
|
||||
)
|
||||
@click.option(
|
||||
"--poll-interval",
|
||||
default=None,
|
||||
type=float,
|
||||
help="Seconds between scans (default: POLL_INTERVAL_S env, else 30).",
|
||||
)
|
||||
def serve_cmd(mcap_dir: str | None, subject: str, poll_interval: float | None) -> None:
|
||||
"""Watch a directory and process new MCAP files as they appear."""
|
||||
mcap_dir = mcap_dir or os.environ.get("MCAP_DIR") or "/data/mcap"
|
||||
interval = poll_interval if poll_interval is not None else float(
|
||||
os.environ.get("POLL_INTERVAL_S", "30")
|
||||
)
|
||||
watch_dir = Path(mcap_dir)
|
||||
watch_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info("Watching %s (interval=%.1fs)", watch_dir, interval)
|
||||
|
||||
publisher = make_publisher()
|
||||
seen: set[str] = set()
|
||||
try:
|
||||
while True:
|
||||
for mcap_path in sorted(watch_dir.glob("*.mcap")):
|
||||
key = str(mcap_path)
|
||||
if key in seen:
|
||||
continue
|
||||
logger.info("New MCAP: %s", mcap_path)
|
||||
try:
|
||||
anomalies = analyze_mcap(mcap_path, subject)
|
||||
n = emit(anomalies, publisher)
|
||||
logger.info("Emitted %d anomalies from %s", n, mcap_path.name)
|
||||
except Exception as exc:
|
||||
logger.exception("Failed processing %s: %s", mcap_path, exc)
|
||||
seen.add(key)
|
||||
time.sleep(interval)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Shutting down on SIGINT")
|
||||
finally:
|
||||
publisher.close()
|
||||
|
||||
|
||||
def main() -> None: # pragma: no cover
|
||||
cli(standalone_mode=True)
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
main()
|
||||
40
src/cosma_log_analyzer/models.py
Normal file
40
src/cosma_log_analyzer/models.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
SEVERITIES = ("info", "warn", "critical")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Anomaly:
|
||||
rule: str
|
||||
severity: str
|
||||
timestamp: float
|
||||
subject: str
|
||||
topic: str
|
||||
value: float | None = None
|
||||
context: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.severity not in SEVERITIES:
|
||||
raise ValueError(
|
||||
f"severity must be one of {SEVERITIES}, got {self.severity!r}"
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(self.to_dict(), sort_keys=True, default=_json_default)
|
||||
|
||||
def nats_subject(self) -> str:
|
||||
return f"cosma.auv.{self.subject}.anomaly.{self.rule}"
|
||||
|
||||
|
||||
def _json_default(obj: Any) -> Any:
|
||||
if hasattr(obj, "isoformat"):
|
||||
return obj.isoformat()
|
||||
raise TypeError(f"Not JSON serializable: {type(obj).__name__}")
|
||||
Reference in New Issue
Block a user