From c43a387927c82d852422a2ef4e997fdd6a01d2d6 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Mon, 13 Apr 2026 15:36:57 -0400 Subject: [PATCH 01/22] adding scaffolding --- opensampl/db/orm.py | 24 +++++++ opensampl/vendors/constants.py | 8 +++ opensampl/vendors/ntp2.py | 123 +++++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 opensampl/vendors/ntp2.py diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py index ee6b374..8c90605 100644 --- a/opensampl/db/orm.py +++ b/opensampl/db/orm.py @@ -434,6 +434,30 @@ class MicrochipTP4100Metadata(Base): # --- CUSTOM TABLES --- !! Do not remove line, used as reference when inserting metadata table +class NtpMetadata2(Base): + __tablename__ = "ntp_metadata2" + + probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True) + mode = Column(Text) + probe_name = Column(Text) + target_host = Column(Text) + target_port = Column(Integer) + sync_status = Column(Text) + leap_status = Column(Text) + stratum = Column(Integer) + reachability = Column(Integer) + offset_last_s = Column(Float) + delay_s = Column(Float) + jitter_s = Column(Float) + dispersion_s = Column(Float) + root_delay_s = Column(Float) + root_dispersion_s = Column(Float) + poll_interval_s = Column(Float) + reference_id = Column(Text) + observation_source = Column(Text) + collection_host = Column(Text) + additional_metadata = Column(JSONB) + probe = relationship("ProbeMetadata", back_populates="ntp_metadata2") # --- TABLE FUNCTIONS --- diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py index 4725b05..9c4b27c 100644 --- a/opensampl/vendors/constants.py +++ b/opensampl/vendors/constants.py @@ -71,6 +71,14 @@ class VENDORS: ) # --- CUSTOM VENDORS --- !! Do not remove line, used as reference when inserting vendor + NTP2 = VendorType( + name='NTP2', + parser_class='NtpProbe2', + parser_module='ntp2', + metadata_table='ntp_metadata2', + metadata_orm='NtpMetadata2' + ) + # --- VENDOR FUNCTIONS --- diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py new file mode 100644 index 0000000..812f038 --- /dev/null +++ b/opensampl/vendors/ntp2.py @@ -0,0 +1,123 @@ +"""Probe implementation for NTP2 vendor""" + +import pandas as pd + +from opensampl.vendors.base_probe import BaseProbe +from opensampl.vendors.constants import ProbeKey, VENDORS +from opensampl.references import REF_TYPES +from opensampl.mixins.collect import CollectMixin + +class NtpProbe2(BaseProbe, CollectMixin): + """Probe parser for NTP2 vendor data files""" + + vendor = VENDORS.NTP2 + + class CollectConfig(CollectMixin.CollectConfig): + """ + The following configuration fields are inherited from the Collect mixin. + Change the defaults by uncommenting and changing value + + Add additional fields, which will automatically be added to the collect click options + and provided to calls to collect + output_dir: Optional[Path] = None + load: bool = False + duration: int = 300 + + ip_address: str = '127.0.0.1' + probe_id: str = '1-1' + """ + + + def __init__(self, input_file: str, **kwargs): + """Initialize NtpProbe2 from input file""" + super().__init__(input_file) + # TODO: parse self.input_file to extract self.probe_key + # self.probe_key = ProbeKey(probe_id=..., ip_address=...) + + def process_metadata(self) -> dict: + """ + Parse and return probe metadata from input file. + + Expected metadata fields: + ['mode', + 'probe_name', + 'target_host', + 'target_port', + 'sync_status', + 'leap_status', + 'stratum', + 'reachability', + 'offset_last_s', + 'delay_s', + 'jitter_s', + 'dispersion_s', + 'root_delay_s', + 'root_dispersion_s', + 'poll_interval_s', + 'reference_id', + 'observation_source', + 'collection_host', + 'additional_metadata'] + + Returns: + dict with metadata field names as keys + """ + # TODO: implement metadata parsing + # return { + # "field_name": value, + # ... + # } + raise NotImplementedError + + def process_time_data(self) -> pd.DataFrame: + """ + Parse and load time series data from self.input_file. + + Use either send_time_data (which prefills METRICS.PHASE_OFFSET) + or send_data and provide alternative METRICS type. + Both require a df as follows: + pd.DataFrame with columns: + - time (datetime64[ns]): timestamp for each measurement + - value (float64): measured value at each timestamp + + + """ + # TODO: implement time data parsing and call self.send_time_data(df, reference_type) + # or self.send_data(df, metric_type, reference_type) + # df = pd.DataFrame({"time": [...], "value": [...]}) + # self.send_time_data(df, reference_type=...) + + # Ensure the format it is reading in matches that in save_to_file + raise NotImplementedError + + @classmethod + def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact: + """ + Create a collect artifact defined as follows + class CollectArtifact(BaseModel): + data: pd.DataFrame + metric: MetricType = METRICS.UNKNOWN + reference_type: ReferenceType = REF_TYPES.UNKNOWN + compound_reference: Optional[dict[str, Any]] = None + probe_key: Optional[ProbeKey] = None + metadata: Optional[dict] = Field(default_factory=dict) + + on a collect_config.load, the metadata and data will be loaded into db. + + define logic for the save_to_file as well. + """ + # TODO: implement the logic for creating a CollectArtifact, as above. + # + + raise NotImplementedError + + @classmethod + def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str: + # TODO: Create the str content for an output file. Ensure readable by parse functions & that required metadata is available + # Filename will be automatically generated as {ip_address}_{probe_id}_{vendor}_{timestamp}.txt and saved to directory provided by cli + raise NotImplementedError + + + + + From 699f3b3daf114fa33967147fad8be2b87bd1c24f Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Mon, 13 Apr 2026 17:49:32 -0400 Subject: [PATCH 02/22] just my stuff # Conflicts: # opensampl/mixins/collect.py --- opensampl/metrics.py | 57 +++++++ opensampl/vendors/ntp2.py | 338 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 384 insertions(+), 11 deletions(-) diff --git a/opensampl/metrics.py b/opensampl/metrics.py index 8cc1418..d859cb4 100644 --- a/opensampl/metrics.py +++ b/opensampl/metrics.py @@ -64,3 +64,60 @@ class METRICS: ) # --- CUSTOM METRICS --- !! Do not remove line, used as reference when inserting metric + NTP_DELAY = MetricType( + name="NTP Delay", + description="Round-trip delay (RTT) to the NTP server or observed path delay in seconds", + unit="s", + value_type=float, + ) + NTP_JITTER = MetricType( + name="NTP Jitter", + description=( + "Jitter or offset variation for NTP in seconds (true value from chrony/ntpq when available; " + "remote single-packet collection may use a delay/dispersion bound estimate)" + ), + unit="s", + value_type=float, + ) + NTP_STRATUM = MetricType( + name="NTP Stratum", + description="NTP stratum level (distance from reference clock)", + unit="level", + value_type=float, + ) + NTP_REACHABILITY = MetricType( + name="NTP Reachability", + description="NTP reachability register (0-255) as a scalar for plotting", + unit="count", + value_type=float, + ) + NTP_DISPERSION = MetricType( + name="NTP Dispersion", + description="Combined error budget / dispersion in seconds", + unit="s", + value_type=float, + ) + NTP_ROOT_DELAY = MetricType( + name="NTP Root Delay", + description="Root delay from NTP packet or local estimate in seconds", + unit="s", + value_type=float, + ) + NTP_ROOT_DISPERSION = MetricType( + name="NTP Root Dispersion", + description="Root dispersion from NTP packet or local estimate in seconds", + unit="s", + value_type=float, + ) + NTP_POLL_INTERVAL = MetricType( + name="NTP Poll Interval", + description="Poll interval in seconds", + unit="s", + value_type=float, + ) + NTP_SYNC_HEALTH = MetricType( + name="NTP Sync Health", + description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)", + unit="ratio", + value_type=float, + ) diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py index 812f038..71be6c7 100644 --- a/opensampl/vendors/ntp2.py +++ b/opensampl/vendors/ntp2.py @@ -1,32 +1,266 @@ """Probe implementation for NTP2 vendor""" import pandas as pd +import re + +from pandas.conftest import datetime64_dtype from opensampl.vendors.base_probe import BaseProbe from opensampl.vendors.constants import ProbeKey, VENDORS from opensampl.references import REF_TYPES from opensampl.mixins.collect import CollectMixin +from typing import Literal, Optional, Any, TypeVar +from pydantic import model_validator, BaseModel, Field, field_serializer +from pydanclick import from_pydantic +import click +import shutil +import subprocess +from datetime import datetime, timezone +from loguru import logger +from opensampl.metrics import METRICS + + +T = TypeVar('T') +def _merge(a: T | None, b: T | None) -> T | None: + return a if a is not None else b + class NtpProbe2(BaseProbe, CollectMixin): """Probe parser for NTP2 vendor data files""" vendor = VENDORS.NTP2 - class CollectConfig(CollectMixin.CollectConfig): - """ - The following configuration fields are inherited from the Collect mixin. - Change the defaults by uncommenting and changing value + metric_map = { + "phase_offset_s": METRICS.PHASE_OFFSET, + "delay_s": METRICS.NTP_DELAY, + "jitter_s": METRICS.NTP_JITTER, + "stratum": METRICS.NTP_STRATUM, + "reachability": METRICS.NTP_REACHABILITY, + "dispersion_s": METRICS.NTP_DISPERSION, + "root_delay_s": METRICS.NTP_ROOT_DELAY, + "root_dispersion_s": METRICS.NTP_ROOT_DISPERSION, + "poll_interval_s": METRICS.NTP_POLL_INTERVAL, + "sync_health": METRICS.NTP_SYNC_HEALTH, + } - Add additional fields, which will automatically be added to the collect click options - and provided to calls to collect - output_dir: Optional[Path] = None - load: bool = False - duration: int = 300 + class NTPMetadata(BaseModel): + mode: Literal['remote', 'local'] - ip_address: str = '127.0.0.1' - probe_id: str = '1-1' + target_host: str = "" + target_port: int = 0 + + sync_status: str = Field("unknown", serialization_alias='sync_health') + leap_status: str = "unknown" + stratum: int | None = None + reachability: int | None = None + offset_last_s: float | None = Field(None, serialization_alias='phase_offset_s') + delay_s: float | None = None + jitter_s: float | None = None + dispersion_s: float | None = None + root_delay_s: float | None = None + root_dispersion_s: float | None = None + poll_interval_s: float | None = None + reference_id: str | None = None + observation_sources: list[str] = Field(default_factory=list) + collection_host: str | None = None + + extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata') + + def parse_chronyc_tracking(self, text: str) -> None: + """Parse `chronyc tracking` key: value output.""" + out: dict[str, Any] = {} + for line in text.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + key, _, rest = line.partition(":") + key = key.strip().lower().replace(" ", "_") + val = rest.strip() + out[key] = val + + # Last offset : +0.000000123 seconds + m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) + if m: + try: + self.offset_last_s = _merge(self.offset_last_s, (m.group(1))) + except ValueError: + pass + m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) + if m: + try: + self.jitter_s = _merge(self.jitter_s, float(m.group(1))) + except ValueError: + pass + m = re.search(r"stratum\s*:\s*(\d+)", text, re.I) + if m: + try: + self.stratum = _merge(self.stratum, int(m.group(1))) + except ValueError: + pass + m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I) + if m: + self.reference_id = (m.group(2) or m.group(1)) or self.reference_id + + self.sync_status = "unsynchronized" + if "normal" in text.lower() or self.offset_last_s is not None: + self.sync_status = "tracking" + self.extras['chronyc_raw_tracking'] = out + self.observation_sources.append("chronyc_tracking") + + def parse_chronyc_sources(self, text: str) -> None: + """Parse `chronyc sources` for reach and selected source.""" + reach: Optional[int] = None + selected: Optional[str] = None + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith("MS") or line.startswith("="): + continue + # ^* or ^+ prefix indicates selected/accepted + if line.startswith("*") or line.startswith("+"): + parts = line.split() + if len(parts) >= 7: + try: + reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5]) + except ValueError: + try: + reach = int(parts[5]) + except ValueError: + pass + selected = parts[1] + break + # Fallback: last column often reach (octal) + parts = line.split() + if len(parts) >= 7 and parts[0] in ("^*", "^+", "*", "+"): + # already handled + pass + if reach is None: + # Try any line with 377 octal style + m = re.search(r"\b([0-7]{3})\b", text) + if m: + try: + reach = int(m.group(1), 8) + except ValueError: + pass + + self.reachability = self.reachability or reach + self.reference_id = self.reference_id or selected + self.observation_sources.append( "chronyc_sources") + + def parse_ntpq(self, text: str) -> None: + """Parse `ntpq -p` / `ntpq -pn` output.""" + offset_s: Optional[float] = None + delay_s: Optional[float] = None + jitter_s: Optional[float] = None + stratum: Optional[int] = None + reach: Optional[int] = None + ref = None + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith("remote") or line.startswith("="): + continue + if line.startswith("*") or line.startswith("+") or line.startswith("-"): + parts = line.split() + # remote refid st t when poll reach delay offset jitter + if len(parts) >= 10: + try: + stratum = int(parts[2]) + except ValueError: + pass + try: + delay_s = float(parts[7]) / 1000.0 # ms -> s + offset_s = float(parts[8]) / 1000.0 + jitter_s = float(parts[9]) / 1000.0 + except (ValueError, IndexError): + pass + try: + reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6]) + except ValueError: + try: + reach = int(parts[6]) + except ValueError: + pass + ref = parts[1] + break + sync_status = "synced" if offset_s is not None else "unknown" + + self.offset_last_s = self.offset_last_s or offset_s + self.delay_s = self.delay_s or delay_s + self.jitter_s = self.jitter_s or jitter_s + self.stratum = self.stratum or stratum + self.reachability = self.reachability or reach + self.reference_id = self.reference_id or ref + self.sync_status = sync_status or self.sync_status + self.observation_sources.append("ntpq") + + def parse_timedatectl(self, text: str) -> None: + """Parse `timedatectl status` / `show-timesync --all`.""" + sync = None + for line in text.splitlines(): + low = line.lower() + if "system clock synchronized" in low or "ntp synchronized" in low: + if "yes" in low: + sync = True + elif "no" in low: + sync = False + sync_status = "unknown" + if sync is True: + sync_status = "synchronized" + elif sync is False: + sync_status = "unsynchronized" + + if self.sync_status == 'unknown': + self.sync_status = sync_status or self.sync_status + self.observation_sources.append("timedatectl") + self.extras['timedatectl'] = text[:2000] + + def parse_systemctl_show(self, text: str) -> None: + """Parse `systemctl show` / `systemctl status` for systemd-timesyncd.""" + active = None + for line in text.splitlines(): + if line.strip().lower().startswith("activestate="): + active = line.split("=", 1)[1].strip().lower() == "active" + break + if active is None and "active (running)" in text.lower(): + active = True + sync_status = "unknown" + if active is True: + sync_status = "service_active" + elif active is False: + sync_status = "service_inactive" + + if self.sync_status == 'unknown': + self.sync_status = sync_status or self.sync_status + self.extras['systemctl'] = text[:2000] + self.observation_sources.append("systemctl_timesyncd") + + + class CollectConfig(CollectMixin.CollectConfig): + """ + Attributes: + probe_id: stable probe_id slug (e.g. local-chrony) + ip_address: Host or IP address for Probe (default '127.0.0.1') + port: UDP port for remote mode (use high ports for lab mocks) + output_dir: When provided, will save collected data as a file to provided directory. Filename will be automatically generated as ntp_{ip_address}_{probe_id}_{ts.strftime('%Y%m%dT%H%M%SZ')}.json + load: Whether to load collected data directly to the database + duration: Number of seconds to collect data for + mode: Collect remote or local NTP. Default is 'local'. + interval: Seconds between samples; 0 = single sample and exit + duration: Samples to collect when interval > 0 + timeout: UDP request timeout for remote mode(seconds) default: 3.0 """ + ip_address: str = '127.0.0.1' + port: Optional[int] = None + mode: Literal['remote', 'local'] = 'local' + interval: float = 0.0 + duration: int = 1 + timeout: float = 3.0 + @classmethod + def get_collect_cli_options(cls): + return [ + from_pydantic(cls.CollectConfig, rename={'ip_address': 'host'}), + click.pass_context, + ] def __init__(self, input_file: str, **kwargs): """Initialize NtpProbe2 from input file""" @@ -90,6 +324,88 @@ def process_time_data(self) -> pd.DataFrame: # Ensure the format it is reading in matches that in save_to_file raise NotImplementedError + @staticmethod + def _run(cmd: list[str], timeout: float=8.0) -> Optional[str]: + """Run command; return stdout or None if missing/failed.""" + bin0 = cmd[0] + if shutil.which(bin0) is None: + return None + try: + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + except (OSError, subprocess.SubprocessError) as e: + logger.debug(f"ntp local: command {cmd!r} failed: {e}") + return None + if proc.returncode != 0: + logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}") + return None + return proc.stdout or "" + + def collect_local(self, collect_config: CollectConfig) -> CollectMixin.CollectArtifact: + merged = self.NTPMetadata(mode='local', probe_name=collect_config.probe_id) + t = self._run(["chronyc", "tracking"]) + if t: + merged.parse_chronyc_tracking(t) + + t = self._run(["chronyc", "sources", "-v"]) or self._run(["chronyc", "sources"]) + if t: + merged.parse_chronyc_sources(t) + + if merged.offset_last_s is None and merged.stratum is None: + t = self._run(["ntpq", "-pn"]) or self._run(["ntpq", "-p"]) + if t: + merged.parse_ntpq(t) + + t = self._run(["timedatectl", "show-timesync", "--all"]) or self._run(["timedatectl", "status"]) + if t: + merged.parse_timedatectl(t) + + t = self._run(["systemctl", "show", "systemd-timesyncd", "--property=ActiveState"]) + if not t: + t = self._run(["systemctl", "status", "systemd-timesyncd", "--no-pager"]) + + if t: + merged.parse_systemctl_show(t) + + if not merged.observation_sources: + merged.observation_source = ['none'] + + now = datetime.now(tz=timezone.utc) + + + + row = merged.model_dump( + include={'offset_last_s', 'delay_s', 'jitter_s', 'stratum', 'reachability', 'dispersion_s', + 'root_delay_s', 'root_dispersion_s', 'poll_interval_s'}) + row['sync_health'] = 1.0 if merged.sync_status in ("tracking", "synchronized", "synced") else 0.0 + meta = merged.model_dump(exclude_none=True) + if merged.reference_id: + reference_type = REF_TYPES.PROBE + compound_reference = merged.reference_id + else: + reference_type = REF_TYPES.UNKNOWN + compound_reference = None + + artifacts: list[CollectMixin.DataArtifact] = [] + for k, v in row.items(): + value = pd.DataFrame([(now, v)], columns=['time', 'value'], dtype={'time': datetime64_dtype}) + metric = self.metric_map.get(k, None) + if not metric: + logger.warning(f'No metric mapping found for {k}') + continue + artifacts.append(CollectMixin.DataArtifact(metric=metric, + reference_type=reference_type, + compound_reference=compound_reference, + value=value)) + + return CollectMixin.CollectArtifact(data=artifacts, metadata=meta) + + @classmethod def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact: """ From 882569dc3ce2e0418490180b54155d1a444679b6 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Mon, 13 Apr 2026 17:51:37 -0400 Subject: [PATCH 03/22] flexible --- opensampl/db/orm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py index 8c90605..08a89bb 100644 --- a/opensampl/db/orm.py +++ b/opensampl/db/orm.py @@ -181,6 +181,7 @@ class ProbeMetadata(Base): adva_metadata = relationship("AdvaMetadata", back_populates="probe", uselist=False) microchip_twst_metadata = relationship("MicrochipTWSTMetadata", back_populates="probe", uselist=False) microchip_tp4100_metadata = relationship("MicrochipTP4100Metadata", back_populates="probe", uselist=False) + ntp_metadata = relationship("NtpMetadata", back_populates="probe", uselist=False) # --- CUSTOM PROBE METADATA RELATIONSHIP --- @@ -436,7 +437,7 @@ class MicrochipTP4100Metadata(Base): # --- CUSTOM TABLES --- !! Do not remove line, used as reference when inserting metadata table class NtpMetadata2(Base): __tablename__ = "ntp_metadata2" - + probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True) mode = Column(Text) probe_name = Column(Text) @@ -454,7 +455,7 @@ class NtpMetadata2(Base): root_dispersion_s = Column(Float) poll_interval_s = Column(Float) reference_id = Column(Text) - observation_source = Column(Text) + observation_sources = Column(JSONB) collection_host = Column(Text) additional_metadata = Column(JSONB) probe = relationship("ProbeMetadata", back_populates="ntp_metadata2") From 208c63e92b3c27a412e65cd03cc8429728b6e4ec Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 13:14:23 -0400 Subject: [PATCH 04/22] remote collect worked --- opensampl/db/orm.py | 7 - opensampl/vendors/ntp2.py | 705 +++++++++++++++++++++++--------------- 2 files changed, 424 insertions(+), 288 deletions(-) diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py index 08a89bb..9927c5c 100644 --- a/opensampl/db/orm.py +++ b/opensampl/db/orm.py @@ -440,17 +440,10 @@ class NtpMetadata2(Base): probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True) mode = Column(Text) - probe_name = Column(Text) target_host = Column(Text) target_port = Column(Integer) sync_status = Column(Text) leap_status = Column(Text) - stratum = Column(Integer) - reachability = Column(Integer) - offset_last_s = Column(Float) - delay_s = Column(Float) - jitter_s = Column(Float) - dispersion_s = Column(Float) root_delay_s = Column(Float) root_dispersion_s = Column(Float) poll_interval_s = Column(Float) diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py index 71be6c7..a7e7cb9 100644 --- a/opensampl/vendors/ntp2.py +++ b/opensampl/vendors/ntp2.py @@ -1,36 +1,34 @@ """Probe implementation for NTP2 vendor""" +import socket import pandas as pd import re -from pandas.conftest import datetime64_dtype - from opensampl.vendors.base_probe import BaseProbe from opensampl.vendors.constants import ProbeKey, VENDORS -from opensampl.references import REF_TYPES +from opensampl.references import REF_TYPES, ReferenceType from opensampl.mixins.collect import CollectMixin -from typing import Literal, Optional, Any, TypeVar -from pydantic import model_validator, BaseModel, Field, field_serializer +from typing import Literal, Optional, Any, TypeVar, ClassVar +from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict from pydanclick import from_pydantic import click import shutil import subprocess from datetime import datetime, timezone from loguru import logger -from opensampl.metrics import METRICS - +from opensampl.metrics import METRICS, MetricType +import json +import yaml +import textwrap +from io import StringIO T = TypeVar('T') def _merge(a: T | None, b: T | None) -> T | None: return a if a is not None else b - -class NtpProbe2(BaseProbe, CollectMixin): - """Probe parser for NTP2 vendor data files""" - - vendor = VENDORS.NTP2 - - metric_map = { +class NTPCollector(BaseModel): + mode: ClassVar[Literal['remote', 'local']] + metric_map: ClassVar[dict[str, MetricType]] = { "phase_offset_s": METRICS.PHASE_OFFSET, "delay_s": METRICS.NTP_DELAY, "jitter_s": METRICS.NTP_JITTER, @@ -43,196 +41,390 @@ class NtpProbe2(BaseProbe, CollectMixin): "sync_health": METRICS.NTP_SYNC_HEALTH, } - class NTPMetadata(BaseModel): - mode: Literal['remote', 'local'] - - target_host: str = "" - target_port: int = 0 - - sync_status: str = Field("unknown", serialization_alias='sync_health') - leap_status: str = "unknown" - stratum: int | None = None - reachability: int | None = None - offset_last_s: float | None = Field(None, serialization_alias='phase_offset_s') - delay_s: float | None = None - jitter_s: float | None = None - dispersion_s: float | None = None - root_delay_s: float | None = None - root_dispersion_s: float | None = None - poll_interval_s: float | None = None - reference_id: str | None = None - observation_sources: list[str] = Field(default_factory=list) - collection_host: str | None = None - - extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata') - - def parse_chronyc_tracking(self, text: str) -> None: - """Parse `chronyc tracking` key: value output.""" - out: dict[str, Any] = {} - for line in text.splitlines(): - line = line.strip() - if not line or ":" not in line: - continue - key, _, rest = line.partition(":") - key = key.strip().lower().replace(" ", "_") - val = rest.strip() - out[key] = val - - # Last offset : +0.000000123 seconds - m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) - if m: - try: - self.offset_last_s = _merge(self.offset_last_s, (m.group(1))) - except ValueError: - pass - m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) - if m: - try: - self.jitter_s = _merge(self.jitter_s, float(m.group(1))) - except ValueError: - pass - m = re.search(r"stratum\s*:\s*(\d+)", text, re.I) + sync_status: str = Field("unknown") + sync_health: float | None = Field(None, json_schema_extra={'metric': True}) + + stratum: float | None = Field(None, json_schema_extra={'metric': True}) + reachability: int | None = Field(None, json_schema_extra={'metric': True}) + offset_s: float | None = Field(None, serialization_alias='phase_offset_s', json_schema_extra={'metric': True}) + delay_s: float | None = Field(None, json_schema_extra={'metric': True}) + jitter_s: float | None = Field(None, json_schema_extra={'metric': True}) + reference_id: str | None = None + observation_sources: list[str] = Field(default_factory=list) + collection_host: str = Field(default_factory=socket.gethostname) + + extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata') + model_config = ConfigDict(serialize_by_alias=True) + + def collect(self): + raise NotImplementedError() + + def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]: + return REF_TYPES.UNKNOWN, None + + def export_data(self) -> list[CollectMixin.DataArtifact]: + now = datetime.now(tz=timezone.utc) + include_list = {f for f, field_info + in type(self).model_fields.items() + if field_info.json_schema_extra and field_info.json_schema_extra.get('metric', False)} + reference_type, compound_reference = self.determine_reference() + metric_values = self.model_dump(include=include_list, exclude_none=True) + + artifacts: list[CollectMixin.DataArtifact] = [] + for m, v in metric_values.items(): + metric = self.metric_map.get(m, None) + if metric is None: + metric = MetricType(name=m, + description=f'Automatically generated metric type for {m}', + value_type=object, + unit="unknown") + logger.warning(f'Generated new metric type for {m}') + value = pd.DataFrame([(now, v)], columns=['time', 'value']) + value['time'] = pd.to_datetime(value['time']) + + artifacts.append(CollectMixin.DataArtifact(metric=metric, + reference_type=reference_type, + compound_reference=compound_reference, + value=value)) + return artifacts + + def export_metadata(self) -> dict[str, Any]: + include_list = {f for f, field_info + in type(self).model_fields.items() + if not field_info.json_schema_extra or not field_info.json_schema_extra.get('metric', False)} + meta = self.model_dump(include=include_list, exclude_none=True) + meta['mode'] = self.mode + return meta + + def export(self) -> CollectMixin.CollectArtifact: + meta = self.export_metadata() + + artifacts: list[CollectMixin.DataArtifact] = self.export_data() + + return CollectMixin.CollectArtifact(data=artifacts, metadata=meta) + + @classmethod + def invert_metric_map(cls): + return {v.name: k for k, v in cls.metric_map.items()} + +class NTPLocalCollector(NTPCollector): + mode: ClassVar[Literal['remote', 'local']] = 'local' + + @staticmethod + def _run(cmd: list[str], timeout: float = 8.0) -> Optional[str]: + """Run command; return stdout or None if missing/failed.""" + bin0 = cmd[0] + if shutil.which(bin0) is None: + logger.debug(f"ntp local: command {bin0!r} not found") + return None + try: + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + except (OSError, subprocess.SubprocessError) as e: + logger.debug(f"ntp local: command {cmd!r} failed: {e}") + return None + if proc.returncode != 0: + logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}") + return None + logger.debug(f'ntp local: {cmd!r} exit {proc.stdout}') + return proc.stdout or "" + + def _parse_chronyc_tracking(self, text: str) -> None: + """Parse `chronyc tracking` key: value output.""" + out: dict[str, Any] = {} + for line in text.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + key, _, rest = line.partition(":") + key = key.strip().lower().replace(" ", "_") + val = rest.strip() + out[key] = val + + # Last offset : +0.000000123 seconds + m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) + if m: + try: + self.offset_s = _merge(self.offset_s, (m.group(1))) + except ValueError: + pass + m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) + if m: + try: + self.jitter_s = _merge(self.jitter_s, float(m.group(1))) + except ValueError: + pass + m = re.search(r"stratum\s*:\s*(\d+)", text, re.I) + if m: + try: + self.stratum = _merge(self.stratum, int(m.group(1))) + except ValueError: + pass + m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I) + if m: + self.reference_id = (m.group(2) or m.group(1)) or self.reference_id + + self.sync_status = "unsynchronized" + if "normal" in text.lower() or self.offset_s is not None: + self.sync_status = "tracking" + self.extras['chronyc_raw_tracking'] = out + self.observation_sources.append("chronyc_tracking") + + def _parse_chronyc_sources(self, text: str) -> None: + """Parse `chronyc sources` for reach and selected source.""" + reach: Optional[int] = None + selected: Optional[str] = None + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith("MS") or line.startswith("="): + continue + # ^* or ^+ prefix indicates selected/accepted + if line.startswith("*") or line.startswith("+"): + parts = line.split() + if len(parts) >= 7: + try: + reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5]) + except ValueError: + try: + reach = int(parts[5]) + except ValueError: + pass + selected = parts[1] + break + # Fallback: last column often reach (octal) + parts = line.split() + if len(parts) >= 7 and parts[0] in ("^*", "^+", "*", "+"): + # already handled + pass + if reach is None: + # Try any line with 377 octal style + m = re.search(r"\b([0-7]{3})\b", text) if m: try: - self.stratum = _merge(self.stratum, int(m.group(1))) + reach = int(m.group(1), 8) except ValueError: pass - m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I) - if m: - self.reference_id = (m.group(2) or m.group(1)) or self.reference_id - - self.sync_status = "unsynchronized" - if "normal" in text.lower() or self.offset_last_s is not None: - self.sync_status = "tracking" - self.extras['chronyc_raw_tracking'] = out - self.observation_sources.append("chronyc_tracking") - - def parse_chronyc_sources(self, text: str) -> None: - """Parse `chronyc sources` for reach and selected source.""" - reach: Optional[int] = None - selected: Optional[str] = None - for line in text.splitlines(): - line = line.strip() - if not line or line.startswith("MS") or line.startswith("="): - continue - # ^* or ^+ prefix indicates selected/accepted - if line.startswith("*") or line.startswith("+"): - parts = line.split() - if len(parts) >= 7: - try: - reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5]) - except ValueError: - try: - reach = int(parts[5]) - except ValueError: - pass - selected = parts[1] - break - # Fallback: last column often reach (octal) + + self.reachability = self.reachability or reach + self.reference_id = self.reference_id or selected + self.observation_sources.append( "chronyc_sources") + + def _parse_ntpq(self, text: str) -> None: + """Parse `ntpq -p` / `ntpq -pn` output.""" + offset_s: Optional[float] = None + delay_s: Optional[float] = None + jitter_s: Optional[float] = None + stratum: Optional[int] = None + reach: Optional[int] = None + ref = None + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith("remote") or line.startswith("="): + continue + if line.startswith("*") or line.startswith("+") or line.startswith("-"): parts = line.split() - if len(parts) >= 7 and parts[0] in ("^*", "^+", "*", "+"): - # already handled - pass - if reach is None: - # Try any line with 377 octal style - m = re.search(r"\b([0-7]{3})\b", text) - if m: + # remote refid st t when poll reach delay offset jitter + if len(parts) >= 10: try: - reach = int(m.group(1), 8) + stratum = int(parts[2]) except ValueError: pass - - self.reachability = self.reachability or reach - self.reference_id = self.reference_id or selected - self.observation_sources.append( "chronyc_sources") - - def parse_ntpq(self, text: str) -> None: - """Parse `ntpq -p` / `ntpq -pn` output.""" - offset_s: Optional[float] = None - delay_s: Optional[float] = None - jitter_s: Optional[float] = None - stratum: Optional[int] = None - reach: Optional[int] = None - ref = None - for line in text.splitlines(): - line = line.strip() - if not line or line.startswith("remote") or line.startswith("="): - continue - if line.startswith("*") or line.startswith("+") or line.startswith("-"): - parts = line.split() - # remote refid st t when poll reach delay offset jitter - if len(parts) >= 10: + try: + delay_s = float(parts[7]) / 1000.0 # ms -> s + offset_s = float(parts[8]) / 1000.0 + jitter_s = float(parts[9]) / 1000.0 + except (ValueError, IndexError): + pass + try: + reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6]) + except ValueError: try: - stratum = int(parts[2]) + reach = int(parts[6]) except ValueError: pass - try: - delay_s = float(parts[7]) / 1000.0 # ms -> s - offset_s = float(parts[8]) / 1000.0 - jitter_s = float(parts[9]) / 1000.0 - except (ValueError, IndexError): - pass - try: - reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6]) - except ValueError: - try: - reach = int(parts[6]) - except ValueError: - pass - ref = parts[1] - break - sync_status = "synced" if offset_s is not None else "unknown" - - self.offset_last_s = self.offset_last_s or offset_s - self.delay_s = self.delay_s or delay_s - self.jitter_s = self.jitter_s or jitter_s - self.stratum = self.stratum or stratum - self.reachability = self.reachability or reach - self.reference_id = self.reference_id or ref + ref = parts[1] + break + sync_status = "synced" if offset_s is not None else "unknown" + + self.offset_s = self.offset_s or offset_s + self.delay_s = self.delay_s or delay_s + self.jitter_s = self.jitter_s or jitter_s + self.stratum = self.stratum or stratum + self.reachability = self.reachability or reach + self.reference_id = self.reference_id or ref + self.sync_status = sync_status or self.sync_status + self.observation_sources.append("ntpq") + + def _parse_timedatectl(self, text: str) -> None: + """Parse `timedatectl status` / `show-timesync --all`.""" + sync = None + for line in text.splitlines(): + low = line.lower() + if "system clock synchronized" in low or "ntp synchronized" in low: + if "yes" in low: + sync = True + elif "no" in low: + sync = False + sync_status = "unknown" + if sync is True: + sync_status = "synchronized" + elif sync is False: + sync_status = "unsynchronized" + + if self.sync_status == 'unknown': + self.sync_status = sync_status or self.sync_status + self.observation_sources.append("timedatectl") + self.extras['timedatectl'] = text[:2000] + + def _parse_systemctl_show(self, text: str) -> None: + """Parse `systemctl show` / `systemctl status` for systemd-timesyncd.""" + active = None + for line in text.splitlines(): + if line.strip().lower().startswith("activestate="): + active = line.split("=", 1)[1].strip().lower() == "active" + break + if active is None and "active (running)" in text.lower(): + active = True + sync_status = "unknown" + if active is True: + sync_status = "service_active" + elif active is False: + sync_status = "service_inactive" + + if self.sync_status == 'unknown': self.sync_status = sync_status or self.sync_status - self.observation_sources.append("ntpq") - - def parse_timedatectl(self, text: str) -> None: - """Parse `timedatectl status` / `show-timesync --all`.""" - sync = None - for line in text.splitlines(): - low = line.lower() - if "system clock synchronized" in low or "ntp synchronized" in low: - if "yes" in low: - sync = True - elif "no" in low: - sync = False - sync_status = "unknown" - if sync is True: - sync_status = "synchronized" - elif sync is False: - sync_status = "unsynchronized" - - if self.sync_status == 'unknown': - self.sync_status = sync_status or self.sync_status - self.observation_sources.append("timedatectl") - self.extras['timedatectl'] = text[:2000] - - def parse_systemctl_show(self, text: str) -> None: - """Parse `systemctl show` / `systemctl status` for systemd-timesyncd.""" - active = None - for line in text.splitlines(): - if line.strip().lower().startswith("activestate="): - active = line.split("=", 1)[1].strip().lower() == "active" - break - if active is None and "active (running)" in text.lower(): - active = True - sync_status = "unknown" - if active is True: - sync_status = "service_active" - elif active is False: - sync_status = "service_inactive" - - if self.sync_status == 'unknown': - self.sync_status = sync_status or self.sync_status - self.extras['systemctl'] = text[:2000] - self.observation_sources.append("systemctl_timesyncd") + self.extras['systemctl'] = text[:2000] + self.observation_sources.append("systemctl_timesyncd") + + def collect(self): + t = self._run(["chronyc", "tracking"]) + if t: + self._parse_chronyc_tracking(t) + + t = self._run(["chronyc", "sources", "-v"]) or self._run(["chronyc", "sources"]) + if t: + self._parse_chronyc_sources(t) + + if self.offset_s is None and self.stratum is None: + t = self._run(["ntpq", "-pn"]) or self._run(["ntpq", "-p"]) + if t: + self._parse_ntpq(t) + + t = self._run(["timedatectl", "show-timesync", "--all"]) or self._run(["timedatectl", "status"]) + if t: + self._parse_timedatectl(t) + + t = self._run(["systemctl", "show", "systemd-timesyncd", "--property=ActiveState"]) + if not t: + t = self._run(["systemctl", "status", "systemd-timesyncd", "--no-pager"]) + + if t: + self._parse_systemctl_show(t) + + if not self.observation_sources: + self.observation_sources = ['none'] + + self.sync_health = 1.0 if self.sync_status in ("tracking", "synchronized", "synced") else 0.0 + + def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]: + if self.reference_id: + reference_type = REF_TYPES.PROBE + compound_reference = self.reference_id + else: + reference_type = REF_TYPES.UNKNOWN + compound_reference = None + return reference_type, compound_reference + +class NTPRemoteCollector(NTPCollector): + mode: ClassVar[Literal['remote', 'local']] = 'remote' + + target_host: str + target_port: int + timeout: float = 3.0 + + root_delay_s: float | None = Field(None, json_schema_extra={'metric': True}) + root_dispersion_s: float | None = Field(None, json_schema_extra={'metric': True}) + poll_interval_s: float | None = Field(None, json_schema_extra={'metric': True}) + leap_status: str = "unknown" + + def configure_failure(self, e): + self.sync_status = 'unreachable' + self.sync_health = 0 + self.extras['error'] = str(e) + self.observation_sources.append("ntplib") + self.observation_sources.append("error") + + def _estimate_jitter_s(self) -> None: + """ + Single NTP client response does not include RFC5905 peer jitter (that needs multiple samples). + + Emit a conservative positive bound from round-trip delay and root dispersion so downstream + ``NTP Jitter`` metrics and dashboards have a value; chrony/ntpq local paths still supply true jitter when available. + """ + if self.delay_s is None and self.root_dispersion_s is None: + return + d = float(self.delay_s) if self.delay_s is not None else 0.0 + r = float(self.root_dispersion_s) if self.root_dispersion_s is not None else 0.0 + est = 0.05 * d + 0.25 * r + if est > 0: + self.jitter_s = est + return + + def collect(self): + try: + import ntplib # type: ignore[import-untyped] + except ImportError as e: + raise ImportError( + "Remote NTP collection requires the 'ntplib' package (install opensampl[collect]).") from e + client = ntplib.NTPClient() + try: + resp = client.request(self.target_host, port=self.target_port, version=3, timeout=self.timeout) + except Exception as e: + logger.warning(f"NTP request to {self.target_host}:{self.target_port} failed: {e}") + self.configure_failure(e) + return + leap = int(resp.leap) + leap_map = {0: "no_warning", 1: "add_second", 2: "del_second", 3: "alarm"} + self.leap_status = leap_map.get(leap, str(leap)) + + stratum = int(resp.stratum) + + try: + self.poll_interval_s = float(2 ** int(resp.poll)) + except (TypeError, ValueError, OverflowError): + logger.debug(f'No poll interval determined') + + self.root_delay_s = float(resp.root_delay) if resp.root_delay is not None else None + self.root_dispersion_s = float(resp.root_dispersion) if resp.root_dispersion is not None else None + self.delay_s = float(resp.delay) if resp.delay is not None else None + self.offset_s = float(resp.offset) if resp.offset is not None else None + + ref_id = getattr(resp, "ref_id", None) + if hasattr(ref_id, "decode"): + try: + ref_id = ref_id.decode("ascii", errors="replace") + except Exception: + ref_id = str(ref_id) + self.reference_id = str(ref_id) if ref_id is not None else None + + sync_ok = stratum < 16 and self.offset_s is not None + self.observation_sources.append("ntplib") + self.sync_status = "synchronized" if sync_ok else "unsynchronized" + self.sync_health = 1.0 if sync_ok else 0.0 + self._estimate_jitter_s() + + self.extras['version'] = getattr(resp, 'version', None) + +class NtpProbe2(BaseProbe, CollectMixin): + """Probe parser for NTP2 vendor data files""" + + vendor = VENDORS.NTP2 class CollectConfig(CollectMixin.CollectConfig): """ @@ -281,7 +473,7 @@ def process_metadata(self) -> dict: 'leap_status', 'stratum', 'reachability', - 'offset_last_s', + 'offset_s', 'delay_s', 'jitter_s', 'dispersion_s', @@ -324,87 +516,6 @@ def process_time_data(self) -> pd.DataFrame: # Ensure the format it is reading in matches that in save_to_file raise NotImplementedError - @staticmethod - def _run(cmd: list[str], timeout: float=8.0) -> Optional[str]: - """Run command; return stdout or None if missing/failed.""" - bin0 = cmd[0] - if shutil.which(bin0) is None: - return None - try: - proc = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=timeout, - check=False, - ) - except (OSError, subprocess.SubprocessError) as e: - logger.debug(f"ntp local: command {cmd!r} failed: {e}") - return None - if proc.returncode != 0: - logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}") - return None - return proc.stdout or "" - - def collect_local(self, collect_config: CollectConfig) -> CollectMixin.CollectArtifact: - merged = self.NTPMetadata(mode='local', probe_name=collect_config.probe_id) - t = self._run(["chronyc", "tracking"]) - if t: - merged.parse_chronyc_tracking(t) - - t = self._run(["chronyc", "sources", "-v"]) or self._run(["chronyc", "sources"]) - if t: - merged.parse_chronyc_sources(t) - - if merged.offset_last_s is None and merged.stratum is None: - t = self._run(["ntpq", "-pn"]) or self._run(["ntpq", "-p"]) - if t: - merged.parse_ntpq(t) - - t = self._run(["timedatectl", "show-timesync", "--all"]) or self._run(["timedatectl", "status"]) - if t: - merged.parse_timedatectl(t) - - t = self._run(["systemctl", "show", "systemd-timesyncd", "--property=ActiveState"]) - if not t: - t = self._run(["systemctl", "status", "systemd-timesyncd", "--no-pager"]) - - if t: - merged.parse_systemctl_show(t) - - if not merged.observation_sources: - merged.observation_source = ['none'] - - now = datetime.now(tz=timezone.utc) - - - - row = merged.model_dump( - include={'offset_last_s', 'delay_s', 'jitter_s', 'stratum', 'reachability', 'dispersion_s', - 'root_delay_s', 'root_dispersion_s', 'poll_interval_s'}) - row['sync_health'] = 1.0 if merged.sync_status in ("tracking", "synchronized", "synced") else 0.0 - meta = merged.model_dump(exclude_none=True) - if merged.reference_id: - reference_type = REF_TYPES.PROBE - compound_reference = merged.reference_id - else: - reference_type = REF_TYPES.UNKNOWN - compound_reference = None - - artifacts: list[CollectMixin.DataArtifact] = [] - for k, v in row.items(): - value = pd.DataFrame([(now, v)], columns=['time', 'value'], dtype={'time': datetime64_dtype}) - metric = self.metric_map.get(k, None) - if not metric: - logger.warning(f'No metric mapping found for {k}') - continue - artifacts.append(CollectMixin.DataArtifact(metric=metric, - reference_type=reference_type, - compound_reference=compound_reference, - value=value)) - - return CollectMixin.CollectArtifact(data=artifacts, metadata=meta) - @classmethod def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact: @@ -422,17 +533,49 @@ class CollectArtifact(BaseModel): define logic for the save_to_file as well. """ - # TODO: implement the logic for creating a CollectArtifact, as above. - # - - raise NotImplementedError + collector = None + if collect_config.mode == 'local': + collector = NTPLocalCollector() + elif collect_config.mode == 'remote': + collector = NTPRemoteCollector(target_host=collect_config.ip_address, + target_port=collect_config.port, + timeout=collect_config.timeout) + if collector is None: + raise ValueError('Could not determine mode from collect_config') + collector.collect() + + return collector.export() @classmethod def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str: - # TODO: Create the str content for an output file. Ensure readable by parse functions & that required metadata is available - # Filename will be automatically generated as {ip_address}_{probe_id}_{vendor}_{timestamp}.txt and saved to directory provided by cli - raise NotImplementedError - + single_reference = collected.single_reference + first_data = next(iter(collected.data or []), None) + if not single_reference: + collected.metadata['reference'] = 'varied' + elif first_data and first_data.compound_reference: + collected.metadata['reference'] = json.dumps(collected.single_reference) + + metric_names = NTPCollector.invert_metric_map() + dfs = [] + for d in collected.data or []: + df = d.value + df['metric'] = metric_names.get(d.metric.name, d.metric.name.lower().replace(' ', '_')) + if not single_reference: + df['reference'] = json.dumps(d.compound_reference) + dfs.append(df) + value_df = pd.concat(dfs) if dfs else None + + header = yaml.dump(collected.metadata, sort_keys=False) + header = textwrap.indent(header, prefix='# ') + buffer = StringIO() + buffer.write(header) + buffer.write('\n') + + if value_df is not None: + # write dataframe + value_df.to_csv(buffer, index=False) + + return buffer.getvalue() From 99f40334f81f08eac8c20d2f2c40e36772f980a8 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 15:34:30 -0400 Subject: [PATCH 05/22] ok loading works --- opensampl/db/orm.py | 10 +- opensampl/load_data.py | 6 +- opensampl/vendors/ntp2.py | 206 +++++++++++++++++++++++--------------- 3 files changed, 137 insertions(+), 85 deletions(-) diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py index 9927c5c..6cc4631 100644 --- a/opensampl/db/orm.py +++ b/opensampl/db/orm.py @@ -181,7 +181,7 @@ class ProbeMetadata(Base): adva_metadata = relationship("AdvaMetadata", back_populates="probe", uselist=False) microchip_twst_metadata = relationship("MicrochipTWSTMetadata", back_populates="probe", uselist=False) microchip_tp4100_metadata = relationship("MicrochipTP4100Metadata", back_populates="probe", uselist=False) - ntp_metadata = relationship("NtpMetadata", back_populates="probe", uselist=False) + ntp_metadata2 = relationship("NtpMetadata2", back_populates="probe", uselist=False) # --- CUSTOM PROBE METADATA RELATIONSHIP --- @@ -440,16 +440,16 @@ class NtpMetadata2(Base): probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True) mode = Column(Text) + reference = Column(Boolean, comment="Is used as a reference for other probes") target_host = Column(Text) target_port = Column(Integer) sync_status = Column(Text) leap_status = Column(Text) - root_delay_s = Column(Float) - root_dispersion_s = Column(Float) - poll_interval_s = Column(Float) reference_id = Column(Text) observation_sources = Column(JSONB) - collection_host = Column(Text) + collection_id = Column(Text) + collection_ip = Column(Text) + timeout = Column(Float) additional_metadata = Column(JSONB) probe = relationship("ProbeMetadata", back_populates="ntp_metadata2") diff --git a/opensampl/load_data.py b/opensampl/load_data.py index f427167..fce9864 100644 --- a/opensampl/load_data.py +++ b/opensampl/load_data.py @@ -125,9 +125,10 @@ def load_time_data( strict=strict, session=session, ) + probe = data_definition.probe # ty: ignore[possibly-unbound-attribute] probe_readable = ( - data_definition.probe.name # ty: ignore[possibly-unbound-attribute] - or f"{data_definition.probe.ip_address} ({data_definition.probe.probe_id})" # ty: ignore[possibly-unbound-attribute] + probe.name + or f"{probe.ip_address} ({probe.probe_id})" # ty: ignore[possibly-unbound-attribute] ) if any(x is None for x in [data_definition.probe, data_definition.metric, data_definition.reference]): @@ -227,6 +228,7 @@ def create_new_tables(*, _config: BaseConfig, create_schema: bool = True, sessio session.execute(text(f"CREATE SCHEMA IF NOT EXISTS {Base.metadata.schema}")) session.commit() Base.metadata.create_all(session.bind) + session.commit() except Exception as e: session.rollback() logger.error(f"Error writing to table: {e}") diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py index a7e7cb9..b21e512 100644 --- a/opensampl/vendors/ntp2.py +++ b/opensampl/vendors/ntp2.py @@ -3,13 +3,15 @@ import pandas as pd import re - +import time +import requests +from opensampl.load_data import load_probe_metadata from opensampl.vendors.base_probe import BaseProbe from opensampl.vendors.constants import ProbeKey, VENDORS from opensampl.references import REF_TYPES, ReferenceType from opensampl.mixins.collect import CollectMixin from typing import Literal, Optional, Any, TypeVar, ClassVar -from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict +from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict, field_validator from pydanclick import from_pydantic import click import shutil @@ -21,6 +23,10 @@ import yaml import textwrap from io import StringIO +import psycopg2.errors + +from sqlalchemy.exc import IntegrityError + T = TypeVar('T') def _merge(a: T | None, b: T | None) -> T | None: @@ -41,6 +47,8 @@ class NTPCollector(BaseModel): "sync_health": METRICS.NTP_SYNC_HEALTH, } + target_host: str + sync_status: str = Field("unknown") sync_health: float | None = Field(None, json_schema_extra={'metric': True}) @@ -51,7 +59,9 @@ class NTPCollector(BaseModel): jitter_s: float | None = Field(None, json_schema_extra={'metric': True}) reference_id: str | None = None observation_sources: list[str] = Field(default_factory=list) - collection_host: str = Field(default_factory=socket.gethostname) + collection_id: str + collection_ip: str + probe_id: str | None = None extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata') model_config = ConfigDict(serialize_by_alias=True) @@ -59,9 +69,6 @@ class NTPCollector(BaseModel): def collect(self): raise NotImplementedError() - def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]: - return REF_TYPES.UNKNOWN, None - def export_data(self) -> list[CollectMixin.DataArtifact]: now = datetime.now(tz=timezone.utc) include_list = {f for f, field_info @@ -107,6 +114,9 @@ def export(self) -> CollectMixin.CollectArtifact: def invert_metric_map(cls): return {v.name: k for k, v in cls.metric_map.items()} + def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]: + return REF_TYPES.PROBE, {'ip_address': self.collection_ip, 'probe_id': self.collection_id} + class NTPLocalCollector(NTPCollector): mode: ClassVar[Literal['remote', 'local']] = 'local' @@ -331,19 +341,12 @@ def collect(self): self.sync_health = 1.0 if self.sync_status in ("tracking", "synchronized", "synced") else 0.0 - def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]: - if self.reference_id: - reference_type = REF_TYPES.PROBE - compound_reference = self.reference_id - else: - reference_type = REF_TYPES.UNKNOWN - compound_reference = None - return reference_type, compound_reference + if self.probe_id is None: + self.probe_id = 'ntp-local' class NTPRemoteCollector(NTPCollector): mode: ClassVar[Literal['remote', 'local']] = 'remote' - target_host: str target_port: int timeout: float = 3.0 @@ -388,7 +391,7 @@ def collect(self): logger.warning(f"NTP request to {self.target_host}:{self.target_port} failed: {e}") self.configure_failure(e) return - + from pprint import pformat leap = int(resp.leap) leap_map = {0: "no_warning", 1: "add_second", 2: "del_second", 3: "alarm"} self.leap_status = leap_map.get(leap, str(leap)) @@ -421,6 +424,25 @@ def collect(self): self.extras['version'] = getattr(resp, 'version', None) + if self.probe_id is None: + self.probe_id = f'remote:{self.target_port}' + +def collect_ip_factory(): + s = None + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(("8.8.8.8", 80)) # doesn't actually send data + v = s.getsockname()[0] + except: + v = '127.0.0.1' + finally: + if s: + s.close() + return v + +def collect_id_factory(): + return socket.gethostname() or 'collection-host' + class NtpProbe2(BaseProbe, CollectMixin): """Probe parser for NTP2 vendor data files""" @@ -439,6 +461,8 @@ class CollectConfig(CollectMixin.CollectConfig): interval: Seconds between samples; 0 = single sample and exit duration: Samples to collect when interval > 0 timeout: UDP request timeout for remote mode(seconds) default: 3.0 + collection_ip: Override for the IP address of device collecting readings. Will attempt to resolve a local network IP using socket and fall back to '127.0.0.1' + collection_id: Override for the Probe ID of the device collecting readings. Will attempt to resolve using socket.gethostname and fall back to 'collection-host' """ ip_address: str = '127.0.0.1' port: Optional[int] = None @@ -446,56 +470,50 @@ class CollectConfig(CollectMixin.CollectConfig): interval: float = 0.0 duration: int = 1 timeout: float = 3.0 + collection_ip: str = Field(default_factory=collect_ip_factory) + collection_id: str = Field(default_factory=collect_id_factory) @classmethod def get_collect_cli_options(cls): return [ - from_pydantic(cls.CollectConfig, rename={'ip_address': 'host'}), + from_pydantic(cls.CollectConfig, rename={'ip_address': 'host', 'duration': 'count'}), click.pass_context, ] def __init__(self, input_file: str, **kwargs): """Initialize NtpProbe2 from input file""" super().__init__(input_file) - # TODO: parse self.input_file to extract self.probe_key - # self.probe_key = ProbeKey(probe_id=..., ip_address=...) + self.collection_probe = None def process_metadata(self) -> dict: """ Parse and return probe metadata from input file. - Expected metadata fields: - ['mode', - 'probe_name', - 'target_host', - 'target_port', - 'sync_status', - 'leap_status', - 'stratum', - 'reachability', - 'offset_s', - 'delay_s', - 'jitter_s', - 'dispersion_s', - 'root_delay_s', - 'root_dispersion_s', - 'poll_interval_s', - 'reference_id', - 'observation_source', - 'collection_host', - 'additional_metadata'] - Returns: dict with metadata field names as keys """ - # TODO: implement metadata parsing - # return { - # "field_name": value, - # ... - # } - raise NotImplementedError - - def process_time_data(self) -> pd.DataFrame: + if not self.metadata_parsed: + header_lines = [] + with self.input_file.open() as f: + for line in f: + if line.startswith("#"): + header_lines.append(line[2:]) + else: + break + + header_str = "".join(header_lines) + self.metadata = yaml.safe_load(header_str) + self.collection_probe = ProbeKey(ip_address=self.metadata.get('collection_ip'), + probe_id=self.metadata.get('collection_id')) + load_probe_metadata(vendor=self.vendor, + probe_key=self.collection_probe, + data={'reference': True, }) + self.probe_key = ProbeKey(ip_address=self.metadata.get('target_host'), probe_id=self.metadata.get('probe_id')) + self.metadata_parsed = True + + return self.metadata + + def process_time_data(self) -> None: """ Parse and load time series data from self.input_file. @@ -506,16 +524,37 @@ def process_time_data(self) -> pd.DataFrame: - time (datetime64[ns]): timestamp for each measurement - value (float64): measured value at each timestamp - """ - # TODO: implement time data parsing and call self.send_time_data(df, reference_type) - # or self.send_data(df, metric_type, reference_type) - # df = pd.DataFrame({"time": [...], "value": [...]}) - # self.send_time_data(df, reference_type=...) - - # Ensure the format it is reading in matches that in save_to_file - raise NotImplementedError - + raw_df = pd.read_csv( + self.input_file, + comment="#", + ) + self.process_metadata() + + reference_type = REF_TYPES.PROBE + grouped_dfs: dict[str, pd.DataFrame] = {str(metric): group.reset_index(drop=True) for metric, group in raw_df.groupby('metric')} + for metr, df in grouped_dfs.items(): + metric = NTPCollector.metric_map.get(metr) + if not metric: + logger.warning(f"Metric {metr} is not supported for NTP. Will not ingest {len(df)} rows") + continue + try: + self.send_data(data=df, + metric=metric, + reference_type=reference_type, + compound_reference=self.collection_probe.model_dump()) + except requests.HTTPError as e: + resp = e.response + if resp is None: + raise + status_code = resp.status_code + if status_code == 409: + logger.info(f"{metr} against {self.collection_probe} already loaded for time frame, continuing..") + continue + raise + except IntegrityError as e: + if isinstance(e.orig, psycopg2.errors.UniqueViolation): # ty: ignore[unresolved-attribute] + logger.info(f"{metr} against {self.collection_probe} already loaded for time frame already loaded for time frame, continuing..") @classmethod def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact: @@ -533,35 +572,46 @@ class CollectArtifact(BaseModel): define logic for the save_to_file as well. """ - collector = None - if collect_config.mode == 'local': - collector = NTPLocalCollector() - elif collect_config.mode == 'remote': - collector = NTPRemoteCollector(target_host=collect_config.ip_address, - target_port=collect_config.port, - timeout=collect_config.timeout) - if collector is None: - raise ValueError('Could not determine mode from collect_config') - collector.collect() - - return collector.export() + collector_overrides = collect_config.model_dump(include=['collection_ip', 'collection_id', 'probe_id'], exclude_none=True) + + def collect_once() -> CollectMixin.CollectArtifact: + collector = None + if collect_config.mode == 'local': + collector = NTPLocalCollector(target_host=collect_config.ip_address, + **collector_overrides) + elif collect_config.mode == 'remote': + collector = NTPRemoteCollector(target_host=collect_config.ip_address, + target_port=collect_config.port, + timeout=collect_config.timeout, **collector_overrides) + if collector is None: + raise ValueError('Could not determine mode from collect_config') + collector.collect() + + return collector.export() + + if collect_config.interval <= 0: + return collect_once() + + artifact = None + for _ in range(max(collect_config.duration, 1)): + newer = collect_once() + if artifact is None: + artifact = newer + else: + artifact.data.extend(newer.data) + artifact.metadata |= newer.metadata + + time.sleep(collect_config.interval) + + return artifact @classmethod def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str: - single_reference = collected.single_reference - first_data = next(iter(collected.data or []), None) - if not single_reference: - collected.metadata['reference'] = 'varied' - elif first_data and first_data.compound_reference: - collected.metadata['reference'] = json.dumps(collected.single_reference) - metric_names = NTPCollector.invert_metric_map() dfs = [] for d in collected.data or []: df = d.value df['metric'] = metric_names.get(d.metric.name, d.metric.name.lower().replace(' ', '_')) - if not single_reference: - df['reference'] = json.dumps(d.compound_reference) dfs.append(df) value_df = pd.concat(dfs) if dfs else None From d20bd097f6a8e15f986f45d107596be9a418dad7 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 15:39:45 -0400 Subject: [PATCH 06/22] doing some cleaning here --- opensampl/config/server.py | 12 ++++++++++++ opensampl/vendors/base_probe.py | 12 ++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/opensampl/config/server.py b/opensampl/config/server.py index 6478145..0e302e1 100644 --- a/opensampl/config/server.py +++ b/opensampl/config/server.py @@ -35,6 +35,8 @@ class ServerConfig(BaseConfig): COMPOSE_FILE: str = Field(default="", description="Fully resolved path to the Docker Compose file.") + OVERRIDE_FILE: str | None = Field(defualt=None, description="Override for the compose file") + DOCKER_ENV_FILE: str = Field(default="", description="Fully resolved path to the Docker .env file.") docker_env_values: dict[str, Any] = Field(default_factory=dict, init=False) @@ -67,6 +69,14 @@ def resolve_compose_file(cls, v: Any) -> str: return get_resolved_resource_path(opensampl.server, "docker-compose.yaml") return str(Path(v).expanduser().resolve()) + @field_validator("OVERRIDE_FILE", mode="before") + @classmethod + def resolve_override_file(cls, v: Any) -> str: + """Resolve the provided compose file for docker to use, or default to the docker-compose.yaml provided""" + if v: + return str(Path(v).expanduser().resolve()) + return v + @field_validator("DOCKER_ENV_FILE", mode="before") @classmethod def resolve_docker_env_file(cls, v: Any) -> str: @@ -89,6 +99,8 @@ def build_docker_compose_base(self): compose_command = self.get_compose_command() command = shlex.split(compose_command) command.extend(["--env-file", self.DOCKER_ENV_FILE, "-f", self.COMPOSE_FILE]) + if self.OVERRIDE_FILE: + command.extend(["-f", self.OVERRIDE_FILE]) return command def set_by_name(self, name: str, value: Any): diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py index 1253844..020213b 100644 --- a/opensampl/vendors/base_probe.py +++ b/opensampl/vendors/base_probe.py @@ -461,16 +461,16 @@ def ip_address(self): return self.probe_key.ip_address @abstractmethod - def process_time_data(self) -> pd.DataFrame: + def process_time_data(self) -> None: """ - Process time series data. + Parse and load time series data from self.input_file. - Returns - ------- - pd.DataFrame: DataFrame with columns: + Use either send_time_data (which prefills METRICS.PHASE_OFFSET) + or send_data and provide alternative METRICS type. + Both require a df as follows: + pd.DataFrame with columns: - time (datetime64[ns]): timestamp for each measurement - value (float64): measured value at each timestamp - """ @dualmethod From 06a0e1bc3ed35d24ee325f30af904022d61ff3e2 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 15:48:22 -0400 Subject: [PATCH 07/22] some funkiness from calling as a class --- opensampl/vendors/base_probe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py index 020213b..68ddd59 100644 --- a/opensampl/vendors/base_probe.py +++ b/opensampl/vendors/base_probe.py @@ -483,13 +483,13 @@ def send_data( probe_key: ProbeKey | None = None, ) -> None: """Ingests data into the database""" - if isinstance(self, BaseProbe): + if isinstance(self, BaseProbe) and probe_key is None: probe_key = self.probe_key if probe_key is None: raise ValueError("send data must be called with probe_key if used as class method") - if self.chunk_size: + if hasattr(self, 'chunk_size') and self.chunk_size: for chunk_start in range(0, len(data), self.chunk_size): chunk = data.iloc[chunk_start : chunk_start + self.chunk_size] load_time_data( From 2a5fba1ba0f7d9bf86ec2c607db2974e91523b35 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 16:11:32 -0400 Subject: [PATCH 08/22] missed the extra metadata when loading direct (no file) --- opensampl/vendors/ntp2.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py index b21e512..b63a557 100644 --- a/opensampl/vendors/ntp2.py +++ b/opensampl/vendors/ntp2.py @@ -513,6 +513,23 @@ def process_metadata(self) -> dict: return self.metadata + @classmethod + def load_metadata(cls, probe_key: ProbeKey, metadata: dict) -> None: + """ + Parse and return probe metadata from input file. + + Returns: + dict with metadata field names as keys + """ + collection_probe = ProbeKey(ip_address=metadata.get('collection_ip'), + probe_id=metadata.get('collection_id')) + load_probe_metadata(vendor=cls.vendor, + probe_key=collection_probe, + data={'reference': True, }) + load_probe_metadata(vendor=cls.vendor, + probe_key=probe_key, + data=metadata) + def process_time_data(self) -> None: """ Parse and load time series data from self.input_file. From a5c2f2b7af7279fef961d26d2563f8b2bc49adac Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 17:02:21 -0400 Subject: [PATCH 09/22] adding random --- opensampl/vendors/ntp2.py | 95 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py index b63a557..00e226f 100644 --- a/opensampl/vendors/ntp2.py +++ b/opensampl/vendors/ntp2.py @@ -10,20 +10,23 @@ from opensampl.vendors.constants import ProbeKey, VENDORS from opensampl.references import REF_TYPES, ReferenceType from opensampl.mixins.collect import CollectMixin +from opensampl.mixins.random_data import RandomDataMixin from typing import Literal, Optional, Any, TypeVar, ClassVar from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict, field_validator from pydanclick import from_pydantic import click import shutil import subprocess -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from loguru import logger from opensampl.metrics import METRICS, MetricType import json +import random import yaml import textwrap from io import StringIO import psycopg2.errors +import numpy as np from sqlalchemy.exc import IntegrityError @@ -441,9 +444,12 @@ def collect_ip_factory(): return v def collect_id_factory(): - return socket.gethostname() or 'collection-host' + try: + return socket.gethostname() or 'collection-host' + except Exception: + return 'collection-host' -class NtpProbe2(BaseProbe, CollectMixin): +class NtpProbe2(BaseProbe, CollectMixin, RandomDataMixin): """Probe parser for NTP2 vendor data files""" vendor = VENDORS.NTP2 @@ -480,6 +486,22 @@ def get_collect_cli_options(cls): click.pass_context, ] + class RandomDataConfig(RandomDataMixin.RandomDataConfig): + """Random NTP-like test data.""" + + base_value: float = Field( + default_factory=lambda: random.uniform(-1e-4, 1e-4), + description="random.uniform(-1e-4, 1e-4)", + ) + noise_amplitude: float = Field( + default_factory=lambda: random.uniform(1e-9, 1e-7), + description="random.uniform(1e-9, 1e-7)", + ) + drift_rate: float = Field( + default_factory=lambda: random.uniform(-1e-12, 1e-12), + description="random.uniform(-1e-12, 1e-12)", + ) + def __init__(self, input_file: str, **kwargs): """Initialize NtpProbe2 from input file""" super().__init__(input_file) @@ -644,6 +666,73 @@ def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str: return buffer.getvalue() + @classmethod + def generate_random_data( + cls, + config: RandomDataConfig, + probe_key: ProbeKey, + ) -> ProbeKey: + """Generate synthetic NTP-like metrics for testing.""" + cls._setup_random_seed(config.seed) + logger.info(f"Generating random NTP data for {probe_key}") + + meta = { + "mode": "random", + "name": f"Random NTP {probe_key}", + "target_host": "", + "target_port": 0, + "sync_status": "tracking", + "leap_status": "no_warning", + "observation_sources": ["random"], + "additional_metadata": {"test_data": True}, + } + cls._send_metadata_to_db(probe_key, meta) + + total_seconds = config.duration_hours * 3600 + num_samples = int(total_seconds / config.sample_interval) + times = [] + metric_maps = { + 'offset': {'metric': METRICS.PHASE_OFFSET, + 'values': []}, + 'delay_s': {'metric': METRICS.NTP_DELAY, + 'values': []}, + 'jitter_s': {'metric': METRICS.NTP_JITTER, + 'values': []}, + 'stratum': {'metric': METRICS.NTP_STRATUM, + 'values': []}, + 'sync_health': {'metric': METRICS.NTP_SYNC_HEALTH, + 'values': []}, + } + + for i in range(num_samples): + sample_time = config.start_time + timedelta(seconds=i * config.sample_interval) + times.append(sample_time) + time_offset = i * config.sample_interval + drift_component = config.drift_rate * time_offset + noise = float(np.random.normal(0, config.noise_amplitude)) + offset = config.base_value + drift_component + noise + if random.random() < config.outlier_probability: + offset += float(np.random.normal(0, config.noise_amplitude * config.outlier_multiplier)) + + delay_s = 0.02 + abs(0.0001 * random.random()) + jitter_s = abs(float(config.noise_amplitude * 5)) + stratum = 2.0 + (1.0 if random.random() < 0.05 else 0.0) + sync_health = 1.0 + metric_maps['offset']['values'].append(offset) + metric_maps['delay_s']['values'].append(delay_s) + metric_maps['jitter_s']['values'].append(jitter_s) + metric_maps['stratum']['values'].append(stratum) + metric_maps['sync_health']['values'].append(sync_health) + + for metric in metric_maps.values(): + cls.send_data(probe_key=probe_key, + metric=metric.get('metric'), + reference_type=REF_TYPES.UNKNOWN, + data=pd.DataFrame({"time": times, "value": metric.get('values')})) + + logger.info(f"Finished random NTP generation for {probe_key}") + return probe_key + From 07f9190414f879929b0a1b7c8076c7aca74f5616 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 17:08:50 -0400 Subject: [PATCH 10/22] removing the 2 suffix --- .gitignore | 3 +++ opensampl/db/orm.py | 13 +++++++------ opensampl/metrics.py | 5 +++-- opensampl/vendors/base_probe.py | 1 + opensampl/vendors/constants.py | 12 ++++++------ opensampl/vendors/{ntp2.py => ntp.py} | 10 +++++----- 6 files changed, 25 insertions(+), 19 deletions(-) rename opensampl/vendors/{ntp2.py => ntp.py} (99%) diff --git a/.gitignore b/.gitignore index 41e29a2..8d8939a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# OpenSAMPL data paths +archive/ +ntp-snapshots/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py index 6cc4631..ca49647 100644 --- a/opensampl/db/orm.py +++ b/opensampl/db/orm.py @@ -181,7 +181,7 @@ class ProbeMetadata(Base): adva_metadata = relationship("AdvaMetadata", back_populates="probe", uselist=False) microchip_twst_metadata = relationship("MicrochipTWSTMetadata", back_populates="probe", uselist=False) microchip_tp4100_metadata = relationship("MicrochipTP4100Metadata", back_populates="probe", uselist=False) - ntp_metadata2 = relationship("NtpMetadata2", back_populates="probe", uselist=False) + ntp_metadata = relationship("NtpMetadata", back_populates="probe", uselist=False) # --- CUSTOM PROBE METADATA RELATIONSHIP --- @@ -433,10 +433,8 @@ class MicrochipTP4100Metadata(Base): additional_metadata = Column(JSONB) probe = relationship("ProbeMetadata", back_populates="microchip_tp4100_metadata") - -# --- CUSTOM TABLES --- !! Do not remove line, used as reference when inserting metadata table -class NtpMetadata2(Base): - __tablename__ = "ntp_metadata2" +class NtpMetadata(Base): + __tablename__ = "ntp_metadata" probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True) mode = Column(Text) @@ -451,7 +449,10 @@ class NtpMetadata2(Base): collection_ip = Column(Text) timeout = Column(Float) additional_metadata = Column(JSONB) - probe = relationship("ProbeMetadata", back_populates="ntp_metadata2") + probe = relationship("ProbeMetadata", back_populates="ntp_metadata") + +# --- CUSTOM TABLES --- !! Do not remove line, used as reference when inserting metadata table + # --- TABLE FUNCTIONS --- diff --git a/opensampl/metrics.py b/opensampl/metrics.py index d859cb4..4900e37 100644 --- a/opensampl/metrics.py +++ b/opensampl/metrics.py @@ -62,8 +62,6 @@ class METRICS: unit="unknown", value_type=object, ) - - # --- CUSTOM METRICS --- !! Do not remove line, used as reference when inserting metric NTP_DELAY = MetricType( name="NTP Delay", description="Round-trip delay (RTT) to the NTP server or observed path delay in seconds", @@ -121,3 +119,6 @@ class METRICS: unit="ratio", value_type=float, ) + + # --- CUSTOM METRICS --- !! Do not remove line, used as reference when inserting metric + diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py index 68ddd59..82bccd1 100644 --- a/opensampl/vendors/base_probe.py +++ b/opensampl/vendors/base_probe.py @@ -538,6 +538,7 @@ def _send_metadata_to_db(cls, probe_key: ProbeKey, metadata: dict) -> None: load_probe_metadata(vendor=cls.vendor, probe_key=probe_key, data=metadata) logger.debug(f"Sent metadata for probe {probe_key}") + def send_metadata(self): """Send metadata to database""" metadata = self.process_metadata() diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py index 9c4b27c..66b4ac5 100644 --- a/opensampl/vendors/constants.py +++ b/opensampl/vendors/constants.py @@ -71,12 +71,12 @@ class VENDORS: ) # --- CUSTOM VENDORS --- !! Do not remove line, used as reference when inserting vendor - NTP2 = VendorType( - name='NTP2', - parser_class='NtpProbe2', - parser_module='ntp2', - metadata_table='ntp_metadata2', - metadata_orm='NtpMetadata2' + NTP = VendorType( + name='NTP', + parser_class='NtpProbe', + parser_module='ntp', + metadata_table='ntp_metadata', + metadata_orm='NtpMetadata' ) diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp.py similarity index 99% rename from opensampl/vendors/ntp2.py rename to opensampl/vendors/ntp.py index 00e226f..38427e8 100644 --- a/opensampl/vendors/ntp2.py +++ b/opensampl/vendors/ntp.py @@ -1,4 +1,4 @@ -"""Probe implementation for NTP2 vendor""" +"""Probe implementation for NTP vendor""" import socket import pandas as pd @@ -449,10 +449,10 @@ def collect_id_factory(): except Exception: return 'collection-host' -class NtpProbe2(BaseProbe, CollectMixin, RandomDataMixin): - """Probe parser for NTP2 vendor data files""" +class NtpProbe(BaseProbe, CollectMixin, RandomDataMixin): + """Probe parser for NTP vendor data files""" - vendor = VENDORS.NTP2 + vendor = VENDORS.NTP class CollectConfig(CollectMixin.CollectConfig): """ @@ -503,7 +503,7 @@ class RandomDataConfig(RandomDataMixin.RandomDataConfig): ) def __init__(self, input_file: str, **kwargs): - """Initialize NtpProbe2 from input file""" + """Initialize NtpProbe from input file""" super().__init__(input_file) self.collection_probe = None From 82fb461799abac2ec10a72b2df305419db398bab Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Tue, 14 Apr 2026 18:09:02 -0400 Subject: [PATCH 11/22] adding the geolocator stuff --- opensampl/config/base.py | 2 + opensampl/helpers/geolocator.py | 117 ++++++++++++++++++++++++++++++++ opensampl/load_data.py | 11 +++ 3 files changed, 130 insertions(+) create mode 100644 opensampl/helpers/geolocator.py diff --git a/opensampl/config/base.py b/opensampl/config/base.py index aa2af28..8a39790 100644 --- a/opensampl/config/base.py +++ b/opensampl/config/base.py @@ -43,6 +43,8 @@ class BaseConfig(BaseSettings): False, description="Allow insecure requests to be made to the backend", alias="INSECURE_REQUESTS" ) + ENABLE_GEOLOCATE: bool = Field(False, description="Enable geolocate features which extract a location from ip addresses", alias="ENABLE_GEOLOCATE") + @field_serializer("ARCHIVE_PATH") def convert_to_str(self, v: Path) -> str: """Convert archive path to a string for serialization""" diff --git a/opensampl/helpers/geolocator.py b/opensampl/helpers/geolocator.py new file mode 100644 index 0000000..8ed8490 --- /dev/null +++ b/opensampl/helpers/geolocator.py @@ -0,0 +1,117 @@ +"""Associate NTP probes with ``castdb.locations`` for the geospatial Grafana dashboard.""" + +from __future__ import annotations + +import ipaddress +import json +import os +import socket +import urllib.request +from typing import TYPE_CHECKING, Any + +from loguru import logger + +from opensampl.load.table_factory import TableFactory + +if TYPE_CHECKING: + from sqlalchemy.orm import Session + + from opensampl.vendors.constants import ProbeKey + +_GEO_CACHE: dict[str, tuple[float, float, str]] = {} + + +def _env_bool(name: str, default: bool) -> bool: + v = os.getenv(name) + if v is None: + return default + return v.strip().lower() in ("1", "true", "yes", "on") + + +def _default_lab_coords() -> tuple[float, float]: + lat = float(os.getenv("DEFAULT_LAT", "37.4419")) + lon = float(os.getenv("DEFAULT_LON", "-122.1430")) + return lat, lon + + +def _is_private_or_loopback(ip: str) -> bool: + try: + addr = ipaddress.ip_address(ip) + except ValueError: + return True + return bool(addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_reserved) + + +def _lookup_geo_ipapi(ip: str) -> tuple[float, float, str] | None: + if ip in _GEO_CACHE: + return _GEO_CACHE[ip] + url = f"http://ip-api.com/json/{ip}?fields=status,lat,lon,city,country" + try: + with urllib.request.urlopen(url, timeout=4.0) as resp: # noqa: S310 + body = json.loads(resp.read().decode("utf-8")) + except Exception as e: + logger.warning("ip-api geolocation failed for {}: {}", ip, e) + return None + + if body.get("status") != "success" or body.get("lat") is None or body.get("lon") is None: + logger.warning("ip-api returned no coordinates for {}", ip) + return None + + city = body.get("city") or "" + country = body.get("country") or "" + label = ", ".join(x for x in (city, country) if x) + out = (float(body["lat"]), float(body["lon"]), label or ip) + _GEO_CACHE[ip] = out + return out + +def create_location(session: Session, geolocate_enabled: bool, ip_address: str, geo_override: dict) -> str | None: + """ + Set probe ``name``, ``public``, and ``location_uuid`` on NTP metadata before ``probe_metadata`` insert. + + Uses ``additional_metadata.geo_override`` when present (lat/lon/label). Otherwise resolves the remote + host, uses RFC1918/loopback defaults from env, or ip-api.com for public IPs (HTTP, no API key). + """ + + lat: float | None = None + lon: float | None = None + name: str | None = None + + if isinstance(geo_override, dict) and geo_override.get("lat") is not None and geo_override.get("lon") is not None: + lat = float(geo_override["lat"]) + lon = float(geo_override["lon"]) + + if isinstance(geo_override, dict) and geo_override.get("name") is not None: + name = geo_override["name"] + + if geolocate_enabled and lat is None and lon is None: + ip_for_geo = ip_address + try: + ip_for_geo = socket.gethostbyname(ip_address) + except OSError as e: + logger.debug("Could not resolve {}: {}", ip_address, e) + + if _is_private_or_loopback(ip_for_geo): + lat, lon = _default_lab_coords() + else: + geo = _lookup_geo_ipapi(ip_for_geo) + if geo: + lat, lon, _name = geo + name = name or _name + else: + lat, lon = _default_lab_coords() + + loc_factory = TableFactory("locations", session=session) + loc = None + if name: + loc = loc_factory.find_existing({"name": name}) + + if loc is None: + loc = loc_factory.write( + {"name": name, "lat": lat, "lon": lon, "public": True}, + if_exists="ignore", + ) + + if loc: + return loc.uuid + return None + diff --git a/opensampl/load_data.py b/opensampl/load_data.py index fce9864..e723851 100644 --- a/opensampl/load_data.py +++ b/opensampl/load_data.py @@ -15,6 +15,7 @@ from opensampl.metrics import MetricType from opensampl.references import ReferenceType from opensampl.vendors.constants import ProbeKey, VendorType +from opensampl.helpers.geolocator import create_location conflict_actions = Literal["error", "replace", "update", "ignore"] @@ -200,6 +201,16 @@ def load_probe_metadata( pm_cols = {col.name for col in pm_factory.inspector.columns} probe_info = {k: data.pop(k) for k in list(data.keys()) if k in pm_cols} + location_name = probe_info.pop('location_name', None) + geolocation = ({'name': location_name} if location_name else {}) | probe_info.pop('geolocation', {}) + + if geolocation or _config.ENABLE_GEOLOCATE: + location_uuid = create_location(session, + geolocate_enabled=_config.ENABLE_GEOLOCATE, + geo_override=geolocation, ip_address=probe_key.ip_address) + if location_uuid: + probe_info.update({'location_uuid': location_uuid}) + probe_info.update({"probe_id": probe_key.probe_id, "ip_address": probe_key.ip_address, "vendor": vendor.name}) probe = pm_factory.write(data=probe_info, if_exists="update") From e3618a1e51da42dfd7611a20f3c87a01b5598a21 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Wed, 15 Apr 2026 09:25:37 -0400 Subject: [PATCH 12/22] hmm i think this is it --- pyproject.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d297b5d..ce69916 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,10 @@ backend = [ "uvicorn", "prometheus-client", ] -collect = ["telnetlib3==2.0.4"] +collect = [ + "telnetlib3==2.0.4", + "ntplib>=0.4.0,<0.5" +] [project.scripts] opensampl = "opensampl.cli:cli" @@ -147,6 +150,7 @@ ignore = ["D203", "D212", "D400", "D415", "ANN401", "S101", "PLR2004", "COM812", "opensampl/vendors/**/*.py" = ['S311'] # we want to ignore the errors about random "opensampl/server/backend/main.py" = ['B008', 'ARG001'] #ignore complaints about calling functions in args "opensampl/mixins/random_data.py" = ['S311'] + [tool.ruff.lint.pylint] max-args = 10 From 2d31e9a14c8055c8d2ef54709ea1a532266fd9df Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Thu, 16 Apr 2026 17:01:37 -0400 Subject: [PATCH 13/22] so ruff out here --- opensampl/config/base.py | 6 +- opensampl/config/server.py | 12 +- opensampl/db/orm.py | 4 + opensampl/helpers/geolocator.py | 6 +- opensampl/load_data.py | 20 +- opensampl/metrics.py | 1 - opensampl/mixins/collect.py | 2 +- opensampl/vendors/base_probe.py | 3 +- opensampl/vendors/constants.py | 11 +- opensampl/vendors/ntp.py | 444 +++++++++++++++++--------------- pyproject.toml | 4 +- 11 files changed, 275 insertions(+), 238 deletions(-) diff --git a/opensampl/config/base.py b/opensampl/config/base.py index 8a39790..b304dfd 100644 --- a/opensampl/config/base.py +++ b/opensampl/config/base.py @@ -43,7 +43,11 @@ class BaseConfig(BaseSettings): False, description="Allow insecure requests to be made to the backend", alias="INSECURE_REQUESTS" ) - ENABLE_GEOLOCATE: bool = Field(False, description="Enable geolocate features which extract a location from ip addresses", alias="ENABLE_GEOLOCATE") + ENABLE_GEOLOCATE: bool = Field( + False, + description="Enable geolocate features which extract a location from ip addresses", + alias="ENABLE_GEOLOCATE", + ) @field_serializer("ARCHIVE_PATH") def convert_to_str(self, v: Path) -> str: diff --git a/opensampl/config/server.py b/opensampl/config/server.py index 0e302e1..a0bc6e6 100644 --- a/opensampl/config/server.py +++ b/opensampl/config/server.py @@ -5,11 +5,12 @@ configuration validation, and settings management. """ +from __future__ import annotations + import shlex from importlib.resources import as_file, files from pathlib import Path -from types import ModuleType -from typing import Any, Union +from typing import TYPE_CHECKING, Any from dotenv import dotenv_values, set_key from loguru import logger @@ -20,8 +21,11 @@ from opensampl.config.base import BaseConfig from opensampl.server import check_command +if TYPE_CHECKING: + from types import ModuleType + -def get_resolved_resource_path(pkg: Union[str, ModuleType], relative_path: str) -> str: +def get_resolved_resource_path(pkg: str | ModuleType, relative_path: str) -> str: """Retrieve the resolved path to a resource in a package.""" resource = files(pkg).joinpath(relative_path) with as_file(resource) as real_path: @@ -56,7 +60,7 @@ def _ignore_in_set(self) -> list[str]: return ignored @model_validator(mode="after") - def get_docker_values(self) -> "ServerConfig": + def get_docker_values(self) -> ServerConfig: """Get the values that the docker containers will use on startup""" self.docker_env_values = dotenv_values(self.DOCKER_ENV_FILE) return self diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py index ca49647..7c214db 100644 --- a/opensampl/db/orm.py +++ b/opensampl/db/orm.py @@ -433,7 +433,10 @@ class MicrochipTP4100Metadata(Base): additional_metadata = Column(JSONB) probe = relationship("ProbeMetadata", back_populates="microchip_tp4100_metadata") + class NtpMetadata(Base): + """NTP Clock Probe specific metadata""" + __tablename__ = "ntp_metadata" probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True) @@ -451,6 +454,7 @@ class NtpMetadata(Base): additional_metadata = Column(JSONB) probe = relationship("ProbeMetadata", back_populates="ntp_metadata") + # --- CUSTOM TABLES --- !! Do not remove line, used as reference when inserting metadata table diff --git a/opensampl/helpers/geolocator.py b/opensampl/helpers/geolocator.py index 8ed8490..99e9858 100644 --- a/opensampl/helpers/geolocator.py +++ b/opensampl/helpers/geolocator.py @@ -7,7 +7,7 @@ import os import socket import urllib.request -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING from loguru import logger @@ -16,7 +16,6 @@ if TYPE_CHECKING: from sqlalchemy.orm import Session - from opensampl.vendors.constants import ProbeKey _GEO_CACHE: dict[str, tuple[float, float, str]] = {} @@ -64,6 +63,7 @@ def _lookup_geo_ipapi(ip: str) -> tuple[float, float, str] | None: _GEO_CACHE[ip] = out return out + def create_location(session: Session, geolocate_enabled: bool, ip_address: str, geo_override: dict) -> str | None: """ Set probe ``name``, ``public``, and ``location_uuid`` on NTP metadata before ``probe_metadata`` insert. @@ -71,7 +71,6 @@ def create_location(session: Session, geolocate_enabled: bool, ip_address: str, Uses ``additional_metadata.geo_override`` when present (lat/lon/label). Otherwise resolves the remote host, uses RFC1918/loopback defaults from env, or ip-api.com for public IPs (HTTP, no API key). """ - lat: float | None = None lon: float | None = None name: str | None = None @@ -114,4 +113,3 @@ def create_location(session: Session, geolocate_enabled: bool, ip_address: str, if loc: return loc.uuid return None - diff --git a/opensampl/load_data.py b/opensampl/load_data.py index e723851..dc163e7 100644 --- a/opensampl/load_data.py +++ b/opensampl/load_data.py @@ -10,12 +10,12 @@ from opensampl.config.base import BaseConfig from opensampl.db.orm import Base, ProbeData +from opensampl.helpers.geolocator import create_location from opensampl.load.routing import route from opensampl.load.table_factory import TableFactory from opensampl.metrics import MetricType from opensampl.references import ReferenceType from opensampl.vendors.constants import ProbeKey, VendorType -from opensampl.helpers.geolocator import create_location conflict_actions = Literal["error", "replace", "update", "ignore"] @@ -128,8 +128,7 @@ def load_time_data( ) probe = data_definition.probe # ty: ignore[possibly-unbound-attribute] probe_readable = ( - probe.name - or f"{probe.ip_address} ({probe.probe_id})" # ty: ignore[possibly-unbound-attribute] + probe.name or f"{probe.ip_address} ({probe.probe_id})" # ty: ignore[possibly-unbound-attribute] ) if any(x is None for x in [data_definition.probe, data_definition.metric, data_definition.reference]): @@ -201,15 +200,18 @@ def load_probe_metadata( pm_cols = {col.name for col in pm_factory.inspector.columns} probe_info = {k: data.pop(k) for k in list(data.keys()) if k in pm_cols} - location_name = probe_info.pop('location_name', None) - geolocation = ({'name': location_name} if location_name else {}) | probe_info.pop('geolocation', {}) + location_name = probe_info.pop("location_name", None) + geolocation = ({"name": location_name} if location_name else {}) | probe_info.pop("geolocation", {}) if geolocation or _config.ENABLE_GEOLOCATE: - location_uuid = create_location(session, - geolocate_enabled=_config.ENABLE_GEOLOCATE, - geo_override=geolocation, ip_address=probe_key.ip_address) + location_uuid = create_location( + session, + geolocate_enabled=_config.ENABLE_GEOLOCATE, + geo_override=geolocation, + ip_address=probe_key.ip_address, + ) if location_uuid: - probe_info.update({'location_uuid': location_uuid}) + probe_info.update({"location_uuid": location_uuid}) probe_info.update({"probe_id": probe_key.probe_id, "ip_address": probe_key.ip_address, "vendor": vendor.name}) probe = pm_factory.write(data=probe_info, if_exists="update") diff --git a/opensampl/metrics.py b/opensampl/metrics.py index 4900e37..72979b8 100644 --- a/opensampl/metrics.py +++ b/opensampl/metrics.py @@ -121,4 +121,3 @@ class METRICS: ) # --- CUSTOM METRICS --- !! Do not remove line, used as reference when inserting metric - diff --git a/opensampl/mixins/collect.py b/opensampl/mixins/collect.py index 91da293..ed48d93 100644 --- a/opensampl/mixins/collect.py +++ b/opensampl/mixins/collect.py @@ -58,7 +58,7 @@ class CollectConfig(BaseModel): Attributes: output_dir: When provided, will save collected data as a file to provided directory. - Filename will be automatically generated as {ip_address}_{probe_id}_{vendor}_{timestamp}.txt + Filename will be automatically generated as {vendor}_{ip_address}_{probe_id}_{vendor}_{timestamp}.txt load: Whether to load collected data directly to the database duration: Number of seconds to collect data for diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py index 82bccd1..388fdc8 100644 --- a/opensampl/vendors/base_probe.py +++ b/opensampl/vendors/base_probe.py @@ -489,7 +489,7 @@ def send_data( if probe_key is None: raise ValueError("send data must be called with probe_key if used as class method") - if hasattr(self, 'chunk_size') and self.chunk_size: + if hasattr(self, "chunk_size") and self.chunk_size: for chunk_start in range(0, len(data), self.chunk_size): chunk = data.iloc[chunk_start : chunk_start + self.chunk_size] load_time_data( @@ -538,7 +538,6 @@ def _send_metadata_to_db(cls, probe_key: ProbeKey, metadata: dict) -> None: load_probe_metadata(vendor=cls.vendor, probe_key=probe_key, data=metadata) logger.debug(f"Sent metadata for probe {probe_key}") - def send_metadata(self): """Send metadata to database""" metadata = self.process_metadata() diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py index 66b4ac5..e2bc68a 100644 --- a/opensampl/vendors/constants.py +++ b/opensampl/vendors/constants.py @@ -72,14 +72,13 @@ class VENDORS: # --- CUSTOM VENDORS --- !! Do not remove line, used as reference when inserting vendor NTP = VendorType( - name='NTP', - parser_class='NtpProbe', - parser_module='ntp', - metadata_table='ntp_metadata', - metadata_orm='NtpMetadata' + name="NTP", + parser_class="NtpProbe", + parser_module="ntp", + metadata_table="ntp_metadata", + metadata_orm="NtpMetadata", ) - # --- VENDOR FUNCTIONS --- @classmethod diff --git a/opensampl/vendors/ntp.py b/opensampl/vendors/ntp.py index 38427e8..8ecb065 100644 --- a/opensampl/vendors/ntp.py +++ b/opensampl/vendors/ntp.py @@ -1,42 +1,49 @@ """Probe implementation for NTP vendor""" -import socket -import pandas as pd +from __future__ import annotations + +import contextlib +import random import re -import time -import requests -from opensampl.load_data import load_probe_metadata -from opensampl.vendors.base_probe import BaseProbe -from opensampl.vendors.constants import ProbeKey, VENDORS -from opensampl.references import REF_TYPES, ReferenceType -from opensampl.mixins.collect import CollectMixin -from opensampl.mixins.random_data import RandomDataMixin -from typing import Literal, Optional, Any, TypeVar, ClassVar -from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict, field_validator -from pydanclick import from_pydantic -import click import shutil +import socket import subprocess -from datetime import datetime, timezone, timedelta -from loguru import logger -from opensampl.metrics import METRICS, MetricType -import json -import random -import yaml import textwrap +import time +from datetime import datetime, timedelta, timezone from io import StringIO -import psycopg2.errors -import numpy as np +from typing import Any, Callable, ClassVar, Literal, TypeVar +import click +import numpy as np +import pandas as pd +import psycopg2.errors +import requests +import yaml +from loguru import logger +from pydanclick import from_pydantic +from pydantic import BaseModel, ConfigDict, Field from sqlalchemy.exc import IntegrityError +from opensampl.load_data import load_probe_metadata +from opensampl.metrics import METRICS, MetricType +from opensampl.mixins.collect import CollectMixin +from opensampl.mixins.random_data import RandomDataMixin +from opensampl.references import REF_TYPES, ReferenceType +from opensampl.vendors.base_probe import BaseProbe +from opensampl.vendors.constants import VENDORS, ProbeKey + +T = TypeVar("T") + -T = TypeVar('T') def _merge(a: T | None, b: T | None) -> T | None: return a if a is not None else b + class NTPCollector(BaseModel): - mode: ClassVar[Literal['remote', 'local']] + """Base class for NTP Collector, for specific implementations to inherit.""" + + mode: ClassVar[Literal["remote", "local"]] metric_map: ClassVar[dict[str, MetricType]] = { "phase_offset_s": METRICS.PHASE_OFFSET, "delay_s": METRICS.NTP_DELAY, @@ -53,30 +60,38 @@ class NTPCollector(BaseModel): target_host: str sync_status: str = Field("unknown") - sync_health: float | None = Field(None, json_schema_extra={'metric': True}) + sync_health: float | None = Field(None, json_schema_extra={"metric": True}) - stratum: float | None = Field(None, json_schema_extra={'metric': True}) - reachability: int | None = Field(None, json_schema_extra={'metric': True}) - offset_s: float | None = Field(None, serialization_alias='phase_offset_s', json_schema_extra={'metric': True}) - delay_s: float | None = Field(None, json_schema_extra={'metric': True}) - jitter_s: float | None = Field(None, json_schema_extra={'metric': True}) + stratum: float | None = Field(None, json_schema_extra={"metric": True}) + reachability: int | None = Field(None, json_schema_extra={"metric": True}) + offset_s: float | None = Field(None, serialization_alias="phase_offset_s", json_schema_extra={"metric": True}) + delay_s: float | None = Field(None, json_schema_extra={"metric": True}) + jitter_s: float | None = Field(None, json_schema_extra={"metric": True}) reference_id: str | None = None observation_sources: list[str] = Field(default_factory=list) collection_id: str collection_ip: str probe_id: str | None = None - extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata') + extras: dict = Field(default_factory=dict, serialization_alias="additional_metadata") model_config = ConfigDict(serialize_by_alias=True) def collect(self): - raise NotImplementedError() + """Collect a single NTP Reading""" + raise NotImplementedError def export_data(self) -> list[CollectMixin.DataArtifact]: + """ + Export the data from the NTP Collection to a list of DataArtifacts + + Each distinct metric type will get it's own data artifact + """ now = datetime.now(tz=timezone.utc) - include_list = {f for f, field_info - in type(self).model_fields.items() - if field_info.json_schema_extra and field_info.json_schema_extra.get('metric', False)} + include_list = { + f + for f, field_info in type(self).model_fields.items() + if field_info.json_schema_extra and field_info.json_schema_extra.get("metric", False) + } reference_type, compound_reference = self.determine_reference() metric_values = self.model_dump(include=include_list, exclude_none=True) @@ -84,29 +99,36 @@ def export_data(self) -> list[CollectMixin.DataArtifact]: for m, v in metric_values.items(): metric = self.metric_map.get(m, None) if metric is None: - metric = MetricType(name=m, - description=f'Automatically generated metric type for {m}', - value_type=object, - unit="unknown") - logger.warning(f'Generated new metric type for {m}') - value = pd.DataFrame([(now, v)], columns=['time', 'value']) - value['time'] = pd.to_datetime(value['time']) - - artifacts.append(CollectMixin.DataArtifact(metric=metric, - reference_type=reference_type, - compound_reference=compound_reference, - value=value)) + metric = MetricType( + name=m, + description=f"Automatically generated metric type for {m}", + value_type=object, + unit="unknown", + ) + logger.warning(f"Generated new metric type for {m}") + value = pd.DataFrame([(now, v)], columns=["time", "value"]) + value["time"] = pd.to_datetime(value["time"]) + + artifacts.append( + CollectMixin.DataArtifact( + metric=metric, reference_type=reference_type, compound_reference=compound_reference, value=value + ) + ) return artifacts def export_metadata(self) -> dict[str, Any]: - include_list = {f for f, field_info - in type(self).model_fields.items() - if not field_info.json_schema_extra or not field_info.json_schema_extra.get('metric', False)} + """Export the metadata from the NTP Collection to a dict""" + include_list = { + f + for f, field_info in type(self).model_fields.items() + if not field_info.json_schema_extra or not field_info.json_schema_extra.get("metric", False) + } meta = self.model_dump(include=include_list, exclude_none=True) - meta['mode'] = self.mode + meta["mode"] = self.mode return meta def export(self) -> CollectMixin.CollectArtifact: + """Export the data + metadata for the NTP Collection to a CollectArtifact""" meta = self.export_metadata() artifacts: list[CollectMixin.DataArtifact] = self.export_data() @@ -114,24 +136,29 @@ def export(self) -> CollectMixin.CollectArtifact: return CollectMixin.CollectArtifact(data=artifacts, metadata=meta) @classmethod - def invert_metric_map(cls): + def invert_metric_map(cls) -> dict[str, str]: + """Invert metric map to go from MetricType.name to string""" return {v.name: k for k, v in cls.metric_map.items()} - def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]: - return REF_TYPES.PROBE, {'ip_address': self.collection_ip, 'probe_id': self.collection_id} + def determine_reference(self) -> tuple[ReferenceType, None | dict[str, Any]]: + """Get the reference type and compound reference details""" + return REF_TYPES.PROBE, {"ip_address": self.collection_ip, "probe_id": self.collection_id} + class NTPLocalCollector(NTPCollector): - mode: ClassVar[Literal['remote', 'local']] = 'local' + """Collector model for taking NTP readings from local device""" + + mode: ClassVar[Literal["remote", "local"]] = "local" @staticmethod - def _run(cmd: list[str], timeout: float = 8.0) -> Optional[str]: + def _run(cmd: list[str], timeout: float = 8.0) -> str | None: """Run command; return stdout or None if missing/failed.""" bin0 = cmd[0] if shutil.which(bin0) is None: logger.debug(f"ntp local: command {bin0!r} not found") return None try: - proc = subprocess.run( + proc = subprocess.run( # noqa: S603 cmd, capture_output=True, text=True, @@ -144,14 +171,14 @@ def _run(cmd: list[str], timeout: float = 8.0) -> Optional[str]: if proc.returncode != 0: logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}") return None - logger.debug(f'ntp local: {cmd!r} exit {proc.stdout}') + logger.debug(f"ntp local: {cmd!r} exit {proc.stdout}") return proc.stdout or "" def _parse_chronyc_tracking(self, text: str) -> None: """Parse `chronyc tracking` key: value output.""" out: dict[str, Any] = {} - for line in text.splitlines(): - line = line.strip() + for l in text.splitlines(): + line = l.strip() if not line or ":" not in line: continue key, _, rest = line.partition(":") @@ -160,53 +187,48 @@ def _parse_chronyc_tracking(self, text: str) -> None: out[key] = val # Last offset : +0.000000123 seconds - m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) + m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.IGNORECASE) if m: - try: + with contextlib.suppress(ValueError): self.offset_s = _merge(self.offset_s, (m.group(1))) - except ValueError: - pass - m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I) + + m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.IGNORECASE) if m: - try: + with contextlib.suppress(ValueError): self.jitter_s = _merge(self.jitter_s, float(m.group(1))) - except ValueError: - pass - m = re.search(r"stratum\s*:\s*(\d+)", text, re.I) + + m = re.search(r"stratum\s*:\s*(\d+)", text, re.IGNORECASE) if m: - try: + with contextlib.suppress(ValueError): self.stratum = _merge(self.stratum, int(m.group(1))) - except ValueError: - pass - m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I) + + m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.IGNORECASE) if m: self.reference_id = (m.group(2) or m.group(1)) or self.reference_id self.sync_status = "unsynchronized" if "normal" in text.lower() or self.offset_s is not None: self.sync_status = "tracking" - self.extras['chronyc_raw_tracking'] = out + self.extras["chronyc_raw_tracking"] = out self.observation_sources.append("chronyc_tracking") def _parse_chronyc_sources(self, text: str) -> None: """Parse `chronyc sources` for reach and selected source.""" - reach: Optional[int] = None - selected: Optional[str] = None - for line in text.splitlines(): - line = line.strip() - if not line or line.startswith("MS") or line.startswith("="): + reach: int | None = None + selected: str | None = None + for l in text.splitlines(): + line = l.strip() + if not line or line.startswith(("MS", "=")): continue # ^* or ^+ prefix indicates selected/accepted - if line.startswith("*") or line.startswith("+"): + if line.startswith(("*", "+")): parts = line.split() if len(parts) >= 7: try: reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5]) except ValueError: - try: + with contextlib.suppress(ValueError): reach = int(parts[5]) - except ValueError: - pass selected = parts[1] break # Fallback: last column often reach (octal) @@ -218,35 +240,32 @@ def _parse_chronyc_sources(self, text: str) -> None: # Try any line with 377 octal style m = re.search(r"\b([0-7]{3})\b", text) if m: - try: + with contextlib.suppress(ValueError): reach = int(m.group(1), 8) - except ValueError: - pass self.reachability = self.reachability or reach self.reference_id = self.reference_id or selected - self.observation_sources.append( "chronyc_sources") + self.observation_sources.append("chronyc_sources") def _parse_ntpq(self, text: str) -> None: """Parse `ntpq -p` / `ntpq -pn` output.""" - offset_s: Optional[float] = None - delay_s: Optional[float] = None - jitter_s: Optional[float] = None - stratum: Optional[int] = None - reach: Optional[int] = None + offset_s: float | None = None + delay_s: float | None = None + jitter_s: float | None = None + stratum: int | None = None + reach: int | None = None ref = None - for line in text.splitlines(): - line = line.strip() - if not line or line.startswith("remote") or line.startswith("="): + for l in text.splitlines(): + line = l.strip() + if not line or line.startswith(("remote", "=")): continue - if line.startswith("*") or line.startswith("+") or line.startswith("-"): + if line.startswith(("*", "+", "-")): parts = line.split() # remote refid st t when poll reach delay offset jitter if len(parts) >= 10: - try: + with contextlib.suppress(ValueError): stratum = int(parts[2]) - except ValueError: - pass + try: delay_s = float(parts[7]) / 1000.0 # ms -> s offset_s = float(parts[8]) / 1000.0 @@ -256,10 +275,9 @@ def _parse_ntpq(self, text: str) -> None: try: reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6]) except ValueError: - try: + with contextlib.suppress(ValueError): reach = int(parts[6]) - except ValueError: - pass + ref = parts[1] break sync_status = "synced" if offset_s is not None else "unknown" @@ -289,10 +307,10 @@ def _parse_timedatectl(self, text: str) -> None: elif sync is False: sync_status = "unsynchronized" - if self.sync_status == 'unknown': + if self.sync_status == "unknown": self.sync_status = sync_status or self.sync_status self.observation_sources.append("timedatectl") - self.extras['timedatectl'] = text[:2000] + self.extras["timedatectl"] = text[:2000] def _parse_systemctl_show(self, text: str) -> None: """Parse `systemctl show` / `systemctl status` for systemd-timesyncd.""" @@ -309,12 +327,13 @@ def _parse_systemctl_show(self, text: str) -> None: elif active is False: sync_status = "service_inactive" - if self.sync_status == 'unknown': + if self.sync_status == "unknown": self.sync_status = sync_status or self.sync_status - self.extras['systemctl'] = text[:2000] + self.extras["systemctl"] = text[:2000] self.observation_sources.append("systemctl_timesyncd") def collect(self): + """Collect local NTP readings using various tools""" t = self._run(["chronyc", "tracking"]) if t: self._parse_chronyc_tracking(t) @@ -340,28 +359,32 @@ def collect(self): self._parse_systemctl_show(t) if not self.observation_sources: - self.observation_sources = ['none'] + self.observation_sources = ["none"] self.sync_health = 1.0 if self.sync_status in ("tracking", "synchronized", "synced") else 0.0 if self.probe_id is None: - self.probe_id = 'ntp-local' + self.probe_id = "ntp-local" + class NTPRemoteCollector(NTPCollector): - mode: ClassVar[Literal['remote', 'local']] = 'remote' + """Collector model for taking readings from remote NTP Server.""" + + mode: ClassVar[Literal["remote", "local"]] = "remote" target_port: int timeout: float = 3.0 - root_delay_s: float | None = Field(None, json_schema_extra={'metric': True}) - root_dispersion_s: float | None = Field(None, json_schema_extra={'metric': True}) - poll_interval_s: float | None = Field(None, json_schema_extra={'metric': True}) + root_delay_s: float | None = Field(None, json_schema_extra={"metric": True}) + root_dispersion_s: float | None = Field(None, json_schema_extra={"metric": True}) + poll_interval_s: float | None = Field(None, json_schema_extra={"metric": True}) leap_status: str = "unknown" - def configure_failure(self, e): - self.sync_status = 'unreachable' + def configure_failure(self, e: Exception) -> None: + """Set all metric and metadata values to reflect failure to connect""" + self.sync_status = "unreachable" self.sync_health = 0 - self.extras['error'] = str(e) + self.extras["error"] = str(e) self.observation_sources.append("ntplib") self.observation_sources.append("error") @@ -370,7 +393,8 @@ def _estimate_jitter_s(self) -> None: Single NTP client response does not include RFC5905 peer jitter (that needs multiple samples). Emit a conservative positive bound from round-trip delay and root dispersion so downstream - ``NTP Jitter`` metrics and dashboards have a value; chrony/ntpq local paths still supply true jitter when available. + ``NTP Jitter`` metrics and dashboards have a value; chrony/ntpq local paths still supply + true jitter when available. """ if self.delay_s is None and self.root_dispersion_s is None: return @@ -382,11 +406,13 @@ def _estimate_jitter_s(self) -> None: return def collect(self): + """Collect readings from a single ping against a remote NTP server.""" try: import ntplib # type: ignore[import-untyped] except ImportError as e: raise ImportError( - "Remote NTP collection requires the 'ntplib' package (install opensampl[collect]).") from e + "Remote NTP collection requires the 'ntplib' package (install opensampl[collect])." + ) from e client = ntplib.NTPClient() try: resp = client.request(self.target_host, port=self.target_port, version=3, timeout=self.timeout) @@ -394,7 +420,6 @@ def collect(self): logger.warning(f"NTP request to {self.target_host}:{self.target_port} failed: {e}") self.configure_failure(e) return - from pprint import pformat leap = int(resp.leap) leap_map = {0: "no_warning", 1: "add_second", 2: "del_second", 3: "alarm"} self.leap_status = leap_map.get(leap, str(leap)) @@ -404,7 +429,7 @@ def collect(self): try: self.poll_interval_s = float(2 ** int(resp.poll)) except (TypeError, ValueError, OverflowError): - logger.debug(f'No poll interval determined') + logger.debug("No poll interval determined") self.root_delay_s = float(resp.root_delay) if resp.root_delay is not None else None self.root_dispersion_s = float(resp.root_dispersion) if resp.root_dispersion is not None else None @@ -425,29 +450,34 @@ def collect(self): self.sync_health = 1.0 if sync_ok else 0.0 self._estimate_jitter_s() - self.extras['version'] = getattr(resp, 'version', None) + self.extras["version"] = getattr(resp, "version", None) if self.probe_id is None: - self.probe_id = f'remote:{self.target_port}' + self.probe_id = f"remote:{self.target_port}" + -def collect_ip_factory(): +def collect_ip_factory() -> str: + """Get ip address for collection host using socket (default to 127.0.0.1)""" s = None try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(("8.8.8.8", 80)) # doesn't actually send data v = s.getsockname()[0] - except: - v = '127.0.0.1' + except Exception: + v = "127.0.0.1" finally: if s: s.close() return v -def collect_id_factory(): + +def collect_id_factory() -> str: + """Get humanreadable host name for collection host using socket (default to collection-host)""" try: - return socket.gethostname() or 'collection-host' + return socket.gethostname() or "collection-host" except Exception: - return 'collection-host' + return "collection-host" + class NtpProbe(BaseProbe, CollectMixin, RandomDataMixin): """Probe parser for NTP vendor data files""" @@ -456,23 +486,30 @@ class NtpProbe(BaseProbe, CollectMixin, RandomDataMixin): class CollectConfig(CollectMixin.CollectConfig): """ + Configuration for Collecting NTP Readings + Attributes: probe_id: stable probe_id slug (e.g. local-chrony) ip_address: Host or IP address for Probe (default '127.0.0.1') port: UDP port for remote mode (use high ports for lab mocks) - output_dir: When provided, will save collected data as a file to provided directory. Filename will be automatically generated as ntp_{ip_address}_{probe_id}_{ts.strftime('%Y%m%dT%H%M%SZ')}.json + output_dir: When provided, will save collected data as a file to provided directory. Filename will be + automatically generated as NTP_{ip_address}_{probe_id}_{vendor}_{timestamp}.txt load: Whether to load collected data directly to the database duration: Number of seconds to collect data for mode: Collect remote or local NTP. Default is 'local'. interval: Seconds between samples; 0 = single sample and exit duration: Samples to collect when interval > 0 timeout: UDP request timeout for remote mode(seconds) default: 3.0 - collection_ip: Override for the IP address of device collecting readings. Will attempt to resolve a local network IP using socket and fall back to '127.0.0.1' - collection_id: Override for the Probe ID of the device collecting readings. Will attempt to resolve using socket.gethostname and fall back to 'collection-host' + collection_ip: Override for the IP address of device collecting readings. Will attempt to resolve a local + network IP using socket and fall back to '127.0.0.1' + collection_id: Override for the Probe ID of the device collecting readings. Will attempt to resolve using + socket.gethostname and fall back to 'collection-host' + """ - ip_address: str = '127.0.0.1' - port: Optional[int] = None - mode: Literal['remote', 'local'] = 'local' + + ip_address: str = "127.0.0.1" + port: int | None = None + mode: Literal["remote", "local"] = "local" interval: float = 0.0 duration: int = 1 timeout: float = 3.0 @@ -480,9 +517,10 @@ class CollectConfig(CollectMixin.CollectConfig): collection_id: str = Field(default_factory=collect_id_factory) @classmethod - def get_collect_cli_options(cls): + def get_collect_cli_options(cls) -> list[Callable]: + """Get the decorators to generate collection options for CLI""" return [ - from_pydantic(cls.CollectConfig, rename={'ip_address': 'host', 'duration': 'count'}), + from_pydantic(cls.CollectConfig, rename={"ip_address": "host", "duration": "count"}), click.pass_context, ] @@ -502,7 +540,7 @@ class RandomDataConfig(RandomDataMixin.RandomDataConfig): description="random.uniform(-1e-12, 1e-12)", ) - def __init__(self, input_file: str, **kwargs): + def __init__(self, input_file: str): """Initialize NtpProbe from input file""" super().__init__(input_file) self.collection_probe = None @@ -513,6 +551,7 @@ def process_metadata(self) -> dict: Returns: dict with metadata field names as keys + """ if not self.metadata_parsed: header_lines = [] @@ -525,12 +564,13 @@ def process_metadata(self) -> dict: header_str = "".join(header_lines) self.metadata = yaml.safe_load(header_str) - self.collection_probe = ProbeKey(ip_address=self.metadata.get('collection_ip'), - probe_id=self.metadata.get('collection_id')) - load_probe_metadata(vendor=self.vendor, - probe_key=self.collection_probe, - data={'reference': True, }) - self.probe_key = ProbeKey(ip_address=self.metadata.get('target_host'), probe_id=self.metadata.get('probe_id')) + self.collection_probe = ProbeKey( + ip_address=self.metadata.get("collection_ip"), probe_id=self.metadata.get("collection_id") + ) + load_probe_metadata(vendor=self.vendor, probe_key=self.collection_probe, data={"reference": True}) + self.probe_key = ProbeKey( + ip_address=self.metadata.get("target_host"), probe_id=self.metadata.get("probe_id") + ) self.metadata_parsed = True return self.metadata @@ -542,15 +582,11 @@ def load_metadata(cls, probe_key: ProbeKey, metadata: dict) -> None: Returns: dict with metadata field names as keys + """ - collection_probe = ProbeKey(ip_address=metadata.get('collection_ip'), - probe_id=metadata.get('collection_id')) - load_probe_metadata(vendor=cls.vendor, - probe_key=collection_probe, - data={'reference': True, }) - load_probe_metadata(vendor=cls.vendor, - probe_key=probe_key, - data=metadata) + collection_probe = ProbeKey(ip_address=metadata.get("collection_ip"), probe_id=metadata.get("collection_id")) + load_probe_metadata(vendor=cls.vendor, probe_key=collection_probe, data={"reference": True}) + load_probe_metadata(vendor=cls.vendor, probe_key=probe_key, data=metadata) def process_time_data(self) -> None: """ @@ -571,17 +607,21 @@ def process_time_data(self) -> None: self.process_metadata() reference_type = REF_TYPES.PROBE - grouped_dfs: dict[str, pd.DataFrame] = {str(metric): group.reset_index(drop=True) for metric, group in raw_df.groupby('metric')} + grouped_dfs: dict[str, pd.DataFrame] = { + str(metric): group.reset_index(drop=True) for metric, group in raw_df.groupby("metric") + } for metr, df in grouped_dfs.items(): metric = NTPCollector.metric_map.get(metr) if not metric: logger.warning(f"Metric {metr} is not supported for NTP. Will not ingest {len(df)} rows") continue try: - self.send_data(data=df, - metric=metric, - reference_type=reference_type, - compound_reference=self.collection_probe.model_dump()) + self.send_data( + data=df, + metric=metric, + reference_type=reference_type, + compound_reference=self.collection_probe.model_dump(), + ) except requests.HTTPError as e: resp = e.response if resp is None: @@ -593,37 +633,31 @@ def process_time_data(self) -> None: raise except IntegrityError as e: if isinstance(e.orig, psycopg2.errors.UniqueViolation): # ty: ignore[unresolved-attribute] - logger.info(f"{metr} against {self.collection_probe} already loaded for time frame already loaded for time frame, continuing..") + logger.info( + f"{metr} against {self.collection_probe} already loaded for time " + f"frame already loaded for time frame, continuing.." + ) @classmethod def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact: - """ - Create a collect artifact defined as follows - class CollectArtifact(BaseModel): - data: pd.DataFrame - metric: MetricType = METRICS.UNKNOWN - reference_type: ReferenceType = REF_TYPES.UNKNOWN - compound_reference: Optional[dict[str, Any]] = None - probe_key: Optional[ProbeKey] = None - metadata: Optional[dict] = Field(default_factory=dict) - - on a collect_config.load, the metadata and data will be loaded into db. - - define logic for the save_to_file as well. - """ - collector_overrides = collect_config.model_dump(include=['collection_ip', 'collection_id', 'probe_id'], exclude_none=True) + """Collect readings for an NTP probe according to collect_config.""" + collector_overrides = collect_config.model_dump( + include=["collection_ip", "collection_id", "probe_id"], exclude_none=True + ) def collect_once() -> CollectMixin.CollectArtifact: collector = None - if collect_config.mode == 'local': - collector = NTPLocalCollector(target_host=collect_config.ip_address, - **collector_overrides) - elif collect_config.mode == 'remote': - collector = NTPRemoteCollector(target_host=collect_config.ip_address, - target_port=collect_config.port, - timeout=collect_config.timeout, **collector_overrides) + if collect_config.mode == "local": + collector = NTPLocalCollector(target_host=collect_config.ip_address, **collector_overrides) + elif collect_config.mode == "remote": + collector = NTPRemoteCollector( + target_host=collect_config.ip_address, + target_port=collect_config.port, + timeout=collect_config.timeout, + **collector_overrides, + ) if collector is None: - raise ValueError('Could not determine mode from collect_config') + raise ValueError("Could not determine mode from collect_config") collector.collect() return collector.export() @@ -646,19 +680,20 @@ def collect_once() -> CollectMixin.CollectArtifact: @classmethod def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str: + """Create the content of a file from the CollectArtifacts""" metric_names = NTPCollector.invert_metric_map() dfs = [] for d in collected.data or []: df = d.value - df['metric'] = metric_names.get(d.metric.name, d.metric.name.lower().replace(' ', '_')) + df["metric"] = metric_names.get(d.metric.name, d.metric.name.lower().replace(" ", "_")) dfs.append(df) value_df = pd.concat(dfs) if dfs else None header = yaml.dump(collected.metadata, sort_keys=False) - header = textwrap.indent(header, prefix='# ') + header = textwrap.indent(header, prefix="# ") buffer = StringIO() buffer.write(header) - buffer.write('\n') + buffer.write("\n") if value_df is not None: # write dataframe @@ -668,9 +703,9 @@ def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str: @classmethod def generate_random_data( - cls, - config: RandomDataConfig, - probe_key: ProbeKey, + cls, + config: RandomDataConfig, + probe_key: ProbeKey, ) -> ProbeKey: """Generate synthetic NTP-like metrics for testing.""" cls._setup_random_seed(config.seed) @@ -692,16 +727,11 @@ def generate_random_data( num_samples = int(total_seconds / config.sample_interval) times = [] metric_maps = { - 'offset': {'metric': METRICS.PHASE_OFFSET, - 'values': []}, - 'delay_s': {'metric': METRICS.NTP_DELAY, - 'values': []}, - 'jitter_s': {'metric': METRICS.NTP_JITTER, - 'values': []}, - 'stratum': {'metric': METRICS.NTP_STRATUM, - 'values': []}, - 'sync_health': {'metric': METRICS.NTP_SYNC_HEALTH, - 'values': []}, + "offset": {"metric": METRICS.PHASE_OFFSET, "values": []}, + "delay_s": {"metric": METRICS.NTP_DELAY, "values": []}, + "jitter_s": {"metric": METRICS.NTP_JITTER, "values": []}, + "stratum": {"metric": METRICS.NTP_STRATUM, "values": []}, + "sync_health": {"metric": METRICS.NTP_SYNC_HEALTH, "values": []}, } for i in range(num_samples): @@ -718,21 +748,19 @@ def generate_random_data( jitter_s = abs(float(config.noise_amplitude * 5)) stratum = 2.0 + (1.0 if random.random() < 0.05 else 0.0) sync_health = 1.0 - metric_maps['offset']['values'].append(offset) - metric_maps['delay_s']['values'].append(delay_s) - metric_maps['jitter_s']['values'].append(jitter_s) - metric_maps['stratum']['values'].append(stratum) - metric_maps['sync_health']['values'].append(sync_health) + metric_maps["offset"]["values"].append(offset) + metric_maps["delay_s"]["values"].append(delay_s) + metric_maps["jitter_s"]["values"].append(jitter_s) + metric_maps["stratum"]["values"].append(stratum) + metric_maps["sync_health"]["values"].append(sync_health) for metric in metric_maps.values(): - cls.send_data(probe_key=probe_key, - metric=metric.get('metric'), - reference_type=REF_TYPES.UNKNOWN, - data=pd.DataFrame({"time": times, "value": metric.get('values')})) + cls.send_data( + probe_key=probe_key, + metric=metric.get("metric"), + reference_type=REF_TYPES.UNKNOWN, + data=pd.DataFrame({"time": times, "value": metric.get("values")}), + ) logger.info(f"Finished random NTP generation for {probe_key}") return probe_key - - - - diff --git a/pyproject.toml b/pyproject.toml index ce69916..b5a4a63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,7 +134,7 @@ build-backend = "hatchling.build" [tool.ruff] line-length = 120 -exclude = [".git", "__pycache__", "venv", "env", ".venv", ".env", "build", "dist", "docs", "opensampl/server/migrations/**/*.py",] +exclude = [".git", "__pycache__", "venv", "env", ".venv", ".env", "build", "dist", "docs", "opensampl/server/migrations/**/*.py"] include = ["opensampl/**/*.py"] [tool.ruff.lint] @@ -144,7 +144,7 @@ select = ["F", "E", "W", "C", "I", "D", "N", "B", "ERA", "ANN", "S", "A", "COM", "FLY", "PERF", "PL", "UP", "FURB", "RUF", "TRY"] ignore = ["D203", "D212", "D400", "D415", "ANN401", "S101", "PLR2004", "COM812", "ANN201", "B011", "EM102", "TRY003", "ANN204", "FA100", "PIE790", "EM101", - "PLC0415"] + "PLC0415", 'E741'] [tool.ruff.lint.per-file-ignores] "opensampl/vendors/**/*.py" = ['S311'] # we want to ignore the errors about random From 0edf9e503ed554435b3e6b2262b06ce357c54a68 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Fri, 17 Apr 2026 12:19:52 -0400 Subject: [PATCH 14/22] fixing ntp specific names --- opensampl/metrics.py | 60 +++++++++++++++++++--------------- opensampl/vendors/constants.py | 3 +- opensampl/vendors/ntp.py | 26 +++++++-------- 3 files changed, 49 insertions(+), 40 deletions(-) diff --git a/opensampl/metrics.py b/opensampl/metrics.py index 72979b8..27b6b8a 100644 --- a/opensampl/metrics.py +++ b/opensampl/metrics.py @@ -62,59 +62,67 @@ class METRICS: unit="unknown", value_type=object, ) - NTP_DELAY = MetricType( - name="NTP Delay", - description="Round-trip delay (RTT) to the NTP server or observed path delay in seconds", + DELAY = MetricType( + name="Delay", + description=( + "Round-trip delay (RTD) or Round-Trip Time (RTT). The time in seconds it takes for a data signal to " + "travel from a source to a destination and back, including acknowledgement." + ), unit="s", value_type=float, ) - NTP_JITTER = MetricType( - name="NTP Jitter", - description=( - "Jitter or offset variation for NTP in seconds (true value from chrony/ntpq when available; " - "remote single-packet collection may use a delay/dispersion bound estimate)" - ), + JITTER = MetricType( + name="Jitter", + description=("Jitter or offset variation in delay in seconds. Represents inconsistent response times."), unit="s", value_type=float, ) - NTP_STRATUM = MetricType( - name="NTP Stratum", - description="NTP stratum level (distance from reference clock)", + STRATUM = MetricType( + name="Stratum", + description=( + 'Stratum level. Hierarchical layer defining the distance (or "hops") between device and reference.' + ), unit="level", - value_type=float, + value_type=int, ) - NTP_REACHABILITY = MetricType( - name="NTP Reachability", - description="NTP reachability register (0-255) as a scalar for plotting", + REACHABILITY = MetricType( + name="Reachability", + description=( + "Reachability register (0-255) as a scalar for plotting. Ability of a source node to communicate " + "with a target node." + ), unit="count", value_type=float, ) - NTP_DISPERSION = MetricType( - name="NTP Dispersion", - description="Combined error budget / dispersion in seconds", + DISPERSION = MetricType( + name="Dispersion", + description="Uncertainty in a clock's time relative to its reference source in seconds", unit="s", value_type=float, ) NTP_ROOT_DELAY = MetricType( name="NTP Root Delay", - description="Root delay from NTP packet or local estimate in seconds", + description=( + "Total round-trip network delay from the local system" + " all the way to the primary reference clock (stratum 0)" + ), unit="s", value_type=float, ) NTP_ROOT_DISPERSION = MetricType( name="NTP Root Dispersion", - description="Root dispersion from NTP packet or local estimate in seconds", + description="The total accumulated clock uncertainty from the local system back to the primary reference clock", unit="s", value_type=float, ) - NTP_POLL_INTERVAL = MetricType( - name="NTP Poll Interval", - description="Poll interval in seconds", + POLL_INTERVAL = MetricType( + name="Poll Interval", + description="Time between requests sent to a time server in seconds", unit="s", value_type=float, ) - NTP_SYNC_HEALTH = MetricType( - name="NTP Sync Health", + SYNC_HEALTH = MetricType( + name="Sync Health", description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)", unit="ratio", value_type=float, diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py index e2bc68a..b5b7bdd 100644 --- a/opensampl/vendors/constants.py +++ b/opensampl/vendors/constants.py @@ -70,7 +70,6 @@ class VENDORS: metadata_orm="MicrochipTP4100Metadata", ) - # --- CUSTOM VENDORS --- !! Do not remove line, used as reference when inserting vendor NTP = VendorType( name="NTP", parser_class="NtpProbe", @@ -79,6 +78,8 @@ class VENDORS: metadata_orm="NtpMetadata", ) + # --- CUSTOM VENDORS --- !! Do not remove line, used as reference when inserting vendor + # --- VENDOR FUNCTIONS --- @classmethod diff --git a/opensampl/vendors/ntp.py b/opensampl/vendors/ntp.py index 8ecb065..57f835b 100644 --- a/opensampl/vendors/ntp.py +++ b/opensampl/vendors/ntp.py @@ -46,15 +46,15 @@ class NTPCollector(BaseModel): mode: ClassVar[Literal["remote", "local"]] metric_map: ClassVar[dict[str, MetricType]] = { "phase_offset_s": METRICS.PHASE_OFFSET, - "delay_s": METRICS.NTP_DELAY, - "jitter_s": METRICS.NTP_JITTER, - "stratum": METRICS.NTP_STRATUM, - "reachability": METRICS.NTP_REACHABILITY, - "dispersion_s": METRICS.NTP_DISPERSION, + "delay_s": METRICS.DELAY, + "jitter_s": METRICS.JITTER, + "stratum": METRICS.STRATUM, + "reachability": METRICS.REACHABILITY, + "dispersion_s": METRICS.DISPERSION, "root_delay_s": METRICS.NTP_ROOT_DELAY, "root_dispersion_s": METRICS.NTP_ROOT_DISPERSION, - "poll_interval_s": METRICS.NTP_POLL_INTERVAL, - "sync_health": METRICS.NTP_SYNC_HEALTH, + "poll_interval_s": METRICS.POLL_INTERVAL, + "sync_health": METRICS.SYNC_HEALTH, } target_host: str @@ -62,7 +62,7 @@ class NTPCollector(BaseModel): sync_status: str = Field("unknown") sync_health: float | None = Field(None, json_schema_extra={"metric": True}) - stratum: float | None = Field(None, json_schema_extra={"metric": True}) + stratum: int | None = Field(None, json_schema_extra={"metric": True}) reachability: int | None = Field(None, json_schema_extra={"metric": True}) offset_s: float | None = Field(None, serialization_alias="phase_offset_s", json_schema_extra={"metric": True}) delay_s: float | None = Field(None, json_schema_extra={"metric": True}) @@ -728,10 +728,10 @@ def generate_random_data( times = [] metric_maps = { "offset": {"metric": METRICS.PHASE_OFFSET, "values": []}, - "delay_s": {"metric": METRICS.NTP_DELAY, "values": []}, - "jitter_s": {"metric": METRICS.NTP_JITTER, "values": []}, - "stratum": {"metric": METRICS.NTP_STRATUM, "values": []}, - "sync_health": {"metric": METRICS.NTP_SYNC_HEALTH, "values": []}, + "delay_s": {"metric": METRICS.DELAY, "values": []}, + "jitter_s": {"metric": METRICS.JITTER, "values": []}, + "stratum": {"metric": METRICS.STRATUM, "values": []}, + "sync_health": {"metric": METRICS.SYNC_HEALTH, "values": []}, } for i in range(num_samples): @@ -746,7 +746,7 @@ def generate_random_data( delay_s = 0.02 + abs(0.0001 * random.random()) jitter_s = abs(float(config.noise_amplitude * 5)) - stratum = 2.0 + (1.0 if random.random() < 0.05 else 0.0) + stratum = 2 + (1 if random.random() < 0.05 else 0) sync_health = 1.0 metric_maps["offset"]["values"].append(offset) metric_maps["delay_s"]["values"].append(delay_s) From 1bedb8b1da2409dbac79bd720edd43b082f2d611 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Fri, 17 Apr 2026 12:55:13 -0400 Subject: [PATCH 15/22] adding migration stuff --- opensampl/helpers/geolocator.py | 4 +- .../2026_04_17_1243_add_ntp_values.py | 159 ++++++++++++++++++ 2 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py diff --git a/opensampl/helpers/geolocator.py b/opensampl/helpers/geolocator.py index 99e9858..b92c890 100644 --- a/opensampl/helpers/geolocator.py +++ b/opensampl/helpers/geolocator.py @@ -28,8 +28,8 @@ def _env_bool(name: str, default: bool) -> bool: def _default_lab_coords() -> tuple[float, float]: - lat = float(os.getenv("DEFAULT_LAT", "37.4419")) - lon = float(os.getenv("DEFAULT_LON", "-122.1430")) + lat = float(os.getenv("DEFAULT_LAT", "35.9312")) + lon = float(os.getenv("DEFAULT_LON", "-84.3101")) return lat, lon diff --git a/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py new file mode 100644 index 0000000..ebaa5a8 --- /dev/null +++ b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py @@ -0,0 +1,159 @@ +"""add ntp values + +Revision ID: 5665e5902905 +Revises: d419cac01df2 +Create Date: 2026-04-17 12:43:23.711453 + +""" +from typing import Sequence, Union +import uuid +from sqlalchemy.dialects import postgresql +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '5665e5902905' +down_revision: Union[str, None] = 'd419cac01df2' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +SCHEMA = 'castdb' + +def upgrade() -> None: + op.create_table( + "ntp_metadata", + sa.Column( + "probe_uuid", + sa.String(), + sa.ForeignKey("probe_metadata.uuid"), + primary_key=True, + nullable=False, + ), + sa.Column("mode", sa.Text(), nullable=True), + sa.Column( + "reference", + sa.Boolean(), + nullable=True, + comment="Is used as a reference for other probes", + ), + sa.Column("target_host", sa.Text(), nullable=True), + sa.Column("target_port", sa.Integer(), nullable=True), + sa.Column("sync_status", sa.Text(), nullable=True), + sa.Column("leap_status", sa.Text(), nullable=True), + sa.Column("reference_id", sa.Text(), nullable=True), + sa.Column("observation_sources", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("collection_id", sa.Text(), nullable=True), + sa.Column("collection_ip", sa.Text(), nullable=True), + sa.Column("timeout", sa.Float(), nullable=True), + sa.Column("additional_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + schema=SCHEMA, + if_not_exists=True, + comment="NTP Clock Probe specific metadata" + ) + + metric_type_table = sa.table('metric_type', + sa.column('uuid', sa.String), + sa.column('name', sa.String), + sa.column('description', sa.Text), + sa.column('unit', sa.String), + sa.column('value_type', sa.String), + schema=SCHEMA + ) + new_metrics = [ + dict(uuid=str(uuid.uuid4()), + name="Delay", + description=( + "Round-trip delay (RTD) or Round-Trip Time (RTT). The time in seconds it takes for a data signal to " + "travel from a source to a destination and back, including acknowledgement." + ), + unit="s", + value_type='float', + ), + dict(uuid=str(uuid.uuid4()), + name="Jitter", + description=("Jitter or offset variation in delay in seconds. Represents inconsistent response times."), + unit="s", + value_type='float', + ), + dict(uuid=str(uuid.uuid4()), + name="Stratum", + description=( + 'Stratum level. Hierarchical layer defining the distance (or "hops") between device and reference.' + ), + unit="level", + value_type='int', + ), + dict(uuid=str(uuid.uuid4()), + name="Reachability", + description=( + "Reachability register (0-255) as a scalar for plotting. Ability of a source node to communicate " + "with a target node." + ), + unit="count", + value_type='float', + ), + dict(uuid=str(uuid.uuid4()), + name="Dispersion", + description="Uncertainty in a clock's time relative to its reference source in seconds", + unit="s", + value_type='float', + ), + dict(uuid=str(uuid.uuid4()), + name="NTP Root Delay", + description=( + "Total round-trip network delay from the local system" + " all the way to the primary reference clock (stratum 0)" + ), + unit="s", + value_type='float' + ), + dict(uuid=str(uuid.uuid4()), + name="NTP Root Dispersion", + description="The total accumulated clock uncertainty from the local system back to the primary reference clock", + unit="s", + value_type='float', + ), + dict(uuid=str(uuid.uuid4()), + name="Poll Interval", + description="Time between requests sent to a time server in seconds", + unit="s", + value_type=float, + ), + dict(uuid=str(uuid.uuid4()), + name="Sync Health", + description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)", + unit="ratio", + value_type=float, + ) + ] + op.bulk_insert(metric_type_table, new_metrics) + + + + +def downgrade() -> None: + op.drop_table('ntp_metadata', schema=SCHEMA, if_exists=True) + metric_type = sa.sql.table( + "metric_type", + sa.column("name", sa.String), + schema=SCHEMA, + ) + + op.execute( + metric_type.delete().where( + metric_type.c.name.in_( + [ + "Delay", + "Jitter", + "Stratum", + "Reachability", + "Dispersion", + "NTP Root Delay", + "NTP Root Dispersion", + "Poll Interval", + "Sync Health", + ] + ) + ) + ) From 9d0d1472d8e8c677e86df652674f11fedb4b125f Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Fri, 17 Apr 2026 14:15:37 -0400 Subject: [PATCH 16/22] adding reference_probe view --- .../2026_04_17_1254_add_reference_view.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py diff --git a/opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py b/opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py new file mode 100644 index 0000000..8cbb326 --- /dev/null +++ b/opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py @@ -0,0 +1,56 @@ +"""add reference view + +Revision ID: c95e49e551be +Revises: 5665e5902905 +Create Date: 2026-04-17 12:54:27.037125 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'c95e49e551be' +down_revision: Union[str, None] = '5665e5902905' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +SCHEMA = 'castdb' + +CREATE_VIEW_SQL = f""" +CREATE VIEW {SCHEMA}.reference_probe_metadata +AS WITH probe_references AS ( + SELECT r.uuid, + r.reference_type_uuid, + r.compound_reference_uuid + FROM {SCHEMA}.reference r + JOIN {SCHEMA}.reference_type rt ON r.reference_type_uuid::text = rt.uuid::text + WHERE rt.name::text = 'PROBE'::text + ) + SELECT pm.uuid, + pm.probe_id, + pm.ip_address, + pm.vendor, + pm.model, + pm.name, + pm.public, + pm.location_uuid, + pm.test_uuid, + pr.uuid AS reference_uuid + FROM probe_references pr + JOIN {SCHEMA}.probe_metadata pm ON pr.compound_reference_uuid::text = pm.uuid::text; +""" + +DROP_VIEW_SQL = f""" +DROP VIEW IF EXISTS {SCHEMA}.reference_probe_metadata""" + +def upgrade() -> None: + # Drop the view first, just to be extra safe. + op.execute(DROP_VIEW_SQL) + op.execute(CREATE_VIEW_SQL) + + +def downgrade() -> None: + op.execute(DROP_VIEW_SQL) From 45798ee4cf9686abc9df50e6a9c18506c353cd55 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Fri, 17 Apr 2026 14:32:47 -0400 Subject: [PATCH 17/22] typosss --- opensampl/config/server.py | 2 +- .../_migrations/versions/2026_04_17_1243_add_ntp_values.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opensampl/config/server.py b/opensampl/config/server.py index a0bc6e6..16d061b 100644 --- a/opensampl/config/server.py +++ b/opensampl/config/server.py @@ -39,7 +39,7 @@ class ServerConfig(BaseConfig): COMPOSE_FILE: str = Field(default="", description="Fully resolved path to the Docker Compose file.") - OVERRIDE_FILE: str | None = Field(defualt=None, description="Override for the compose file") + OVERRIDE_FILE: str | None = Field(default=None, description="Override for the compose file") DOCKER_ENV_FILE: str = Field(default="", description="Fully resolved path to the Docker .env file.") diff --git a/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py index ebaa5a8..4cd6b14 100644 --- a/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py +++ b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py @@ -26,7 +26,7 @@ def upgrade() -> None: sa.Column( "probe_uuid", sa.String(), - sa.ForeignKey("probe_metadata.uuid"), + sa.ForeignKey(f"{SCHEMA}.probe_metadata.uuid"), primary_key=True, nullable=False, ), @@ -118,13 +118,13 @@ def upgrade() -> None: name="Poll Interval", description="Time between requests sent to a time server in seconds", unit="s", - value_type=float, + value_type='float', ), dict(uuid=str(uuid.uuid4()), name="Sync Health", description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)", unit="ratio", - value_type=float, + value_type='float', ) ] op.bulk_insert(metric_type_table, new_metrics) From 73b48f3aa134edc4a80f2d3bba2f56ba7ac60e31 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Fri, 17 Apr 2026 14:35:56 -0400 Subject: [PATCH 18/22] warning should only be on collision --- opensampl/load_data.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/opensampl/load_data.py b/opensampl/load_data.py index dc163e7..7b04f7e 100644 --- a/opensampl/load_data.py +++ b/opensampl/load_data.py @@ -157,11 +157,13 @@ def load_time_data( total_rows = len(records) inserted = result.rowcount # ty: ignore[unresolved-attribute] excluded = total_rows - inserted - - logger.warning( - f"Inserted {inserted}/{total_rows} rows for {probe_readable}; " - f"{excluded}/{total_rows} rejected due to conflicts" - ) + if excluded > 0: + logger.warning( + f"Inserted {inserted}/{total_rows} rows for {probe_readable}; " + f"{excluded}/{total_rows} rejected due to conflicts" + ) + else: + logger.info(f"Inserted {inserted}/{total_rows} rows for {probe_readable}") except Exception as e: # In case of an error, roll back the session From 1f617e44abfe7f65e219ebd68b2650840dcc4cd7 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Fri, 17 Apr 2026 16:44:26 -0400 Subject: [PATCH 19/22] ok, got that looking swanky --- .../grafana/grafana-dashboards/ntp_dash.json | 1412 +++++++++++++++++ 1 file changed, 1412 insertions(+) create mode 100644 opensampl/server/grafana/grafana-dashboards/ntp_dash.json diff --git a/opensampl/server/grafana/grafana-dashboards/ntp_dash.json b/opensampl/server/grafana/grafana-dashboards/ntp_dash.json new file mode 100644 index 0000000..345fd8c --- /dev/null +++ b/opensampl/server/grafana/grafana-dashboards/ntp_dash.json @@ -0,0 +1,1412 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "NTP reference path: measurements are relative to OpenSAMPL’s configured default reference (UNKNOWN type) unless you add GNSS-backed probes; timing vs GNSS is not implied for these series.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 0, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 52, + "panels": [], + "title": "All Probes", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP phase offset (Phase Offset metric)", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "jitter": true, + "metric_type_uuid": true, + "probe_uuid": true, + "reference_uuid": true, + "stratum": true, + "sync_health": true + }, + "includeByName": {}, + "indexByName": { + "metric_type_uuid": 5, + "probe_name": 0, + "probe_uuid": 3, + "reference_uuid": 4, + "time": 1, + "value": 2 + }, + "renameByName": { + "probe_name": "" + } + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "rowField": "time", + "valueField": "phase_offset" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "Remote single-packet paths use a conservative jitter estimate from delay and root dispersion when peer RMS jitter is unavailable; local chrony/ntpq snapshots may supply measured jitter.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP jitter (delay/dispersion estimate or measured)", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "metric_type_uuid": true, + "phase_offset": true, + "probe_uuid": true, + "reference_uuid": true, + "stratum": true, + "sync_health": true + }, + "includeByName": {}, + "indexByName": { + "metric_type_uuid": 5, + "probe_name": 0, + "probe_uuid": 3, + "reference_uuid": 4, + "time": 1, + "value": 2 + }, + "renameByName": {} + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "emptyValue": "null", + "rowField": "time", + "valueField": "jitter" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP stratum", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "jitter": true, + "phase_offset": true, + "probe_uuid": true, + "sync_health": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "rowField": "time", + "valueField": "stratum" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP sync health (1=healthy)", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "jitter": true, + "phase_offset": true, + "probe_name": false, + "probe_uuid": true, + "stratum": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "rowField": "time", + "valueField": "sync_health" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 53, + "panels": [], + "repeat": "ntp_reference", + "title": "Reference: $ntp_reference", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 54, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP phase offset (Phase Offset metric)", + "transformations": [ + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "equal", + "options": { + "value": "${ntp_reference}" + } + }, + "fieldName": "reference_uuid" + } + ], + "match": "all", + "type": "include" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "jitter": true, + "metric_type_uuid": true, + "probe_uuid": true, + "reference_uuid": true, + "stratum": true, + "sync_health": true + }, + "includeByName": {}, + "indexByName": { + "metric_type_uuid": 5, + "probe_name": 0, + "probe_uuid": 3, + "reference_uuid": 4, + "time": 1, + "value": 2 + }, + "renameByName": { + "probe_name": "" + } + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "rowField": "time", + "valueField": "phase_offset" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "description": "Remote single-packet paths use a conservative jitter estimate from delay and root dispersion when peer RMS jitter is unavailable; local chrony/ntpq snapshots may supply measured jitter.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 55, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP jitter (delay/dispersion estimate or measured)", + "transformations": [ + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "equal", + "options": { + "value": "${ntp_reference}" + } + }, + "fieldName": "reference_uuid" + } + ], + "match": "all", + "type": "include" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "metric_type_uuid": true, + "phase_offset": true, + "probe_uuid": true, + "reference_uuid": true, + "stratum": true, + "sync_health": true + }, + "includeByName": {}, + "indexByName": { + "metric_type_uuid": 5, + "probe_name": 0, + "probe_uuid": 3, + "reference_uuid": 4, + "time": 1, + "value": 2 + }, + "renameByName": {} + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "emptyValue": "null", + "rowField": "time", + "valueField": "jitter" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP stratum", + "transformations": [ + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "equal", + "options": { + "value": "${ntp_reference}" + } + }, + "fieldName": "reference_uuid" + } + ], + "match": "all", + "type": "include" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "jitter": true, + "phase_offset": true, + "probe_uuid": true, + "sync_health": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "rowField": "time", + "valueField": "stratum" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 51, + "refId": "A" + } + ], + "title": "NTP sync health (1=healthy)", + "transformations": [ + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "equal", + "options": { + "value": "${ntp_reference}" + } + }, + "fieldName": "reference_uuid" + } + ], + "match": "all", + "type": "include" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "jitter": true, + "phase_offset": true, + "probe_name": false, + "probe_uuid": true, + "stratum": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "groupingToMatrix", + "options": { + "columnField": "probe_name", + "rowField": "time", + "valueField": "sync_health" + } + }, + { + "id": "prepareTimeSeries", + "options": { + "format": "multi" + } + } + ], + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 50, + "panels": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "castdb-datasource" + }, + "description": "Phase metrics use OpenSAMPL’s default reference row (UNKNOWN reference type). NTP **observation** context is the configured server in `ntp_metadata` (not GNSS unless a GNSS-backed probe is present).", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 87 + }, + "id": 5, + "options": { + "cellHeight": "sm", + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "probe" + } + ] + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "castdb-datasource" + }, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT\n COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS probe,\n pm.vendor,\n COALESCE(rt.name, '') AS reference_type,\n COALESCE(nm.target_host::text, '') AS ntp_server,\n COALESCE(nm.mode::text, '') AS ntp_mode,\n COALESCE(nm.reference_id::text, '') AS ntp_ref_id,\n COALESCE(l.name, '') AS location,\n COALESCE(pm.public::text, '') AS public\nFROM castdb.probe_metadata pm\nLEFT JOIN castdb.ntp_metadata nm ON nm.probe_uuid = pm.uuid\nLEFT JOIN castdb.locations l ON l.uuid = pm.location_uuid\nLEFT JOIN LATERAL (\n SELECT pd.reference_uuid FROM castdb.probe_data pd WHERE pd.probe_uuid = pm.uuid LIMIT 1\n) rp ON true\nLEFT JOIN castdb.reference r ON r.uuid = rp.reference_uuid\nLEFT JOIN castdb.reference_type rt ON rt.uuid = r.reference_type_uuid\nWHERE pm.vendor = 'NTP'\n AND (trim('${ntp_probe:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${ntp_probe:csv}'), ',')))\nORDER BY 1;", + "refId": "A" + } + ], + "title": "Probe reference & source (stored metadata)", + "type": "table" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "P55EB97F79F5EB88E" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "footer": { + "reducers": [] + }, + "hideFrom": { + "viz": false + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 96 + }, + "id": 51, + "options": { + "cellHeight": "sm", + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "dataset": "castdb", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "WITH probe_ref AS (\n SELECT\n uuid,\n COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS probe_name\n FROM castdb.probe_metadata pm\n)\nSELECT\n time_bucket('1 minute'::interval, pd.time AT TIME ZONE 'UTC') AS time,\n pd.probe_uuid,\n pr.probe_name,\n pd.reference_uuid,\n AVG(pd.value::float * 1e9) FILTER (WHERE lower(m.name) = 'phase offset') AS phase_offset,\n AVG(pd.value::float * 1e9) FILTER (WHERE lower(m.name) = 'jitter') AS jitter,\n AVG((pd.value)::float) FILTER (WHERE lower(m.name) = 'stratum') AS stratum,\n AVG((pd.value)::float) FILTER (WHERE lower(m.name) = 'sync health') AS sync_health\nFROM castdb.probe_data pd\nJOIN probe_ref pr\n ON pd.probe_uuid = pr.uuid\nJOIN castdb.metric_type m\n ON pd.metric_type_uuid = m.uuid\nWHERE pd.probe_uuid = ANY(ARRAY[${ntp_probe:sqlstring}]::text[]) AND $__timeFilter(pd.time)\nGROUP BY 1, 2, 3, 4\nORDER BY\n 1, 3;", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "source_panel", + "type": "table" + } + ], + "title": "Reference & source metadata", + "type": "row" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 42, + "tags": [ + "ntp", + "opensampl", + "reference" + ], + "templating": { + "list": [ + { + "current": { + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "castdb-datasource" + }, + "definition": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor = 'NTP' ORDER BY 2", + "includeAll": true, + "multi": true, + "name": "ntp_probe", + "options": [], + "query": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor = 'NTP' ORDER BY 2", + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "castdb-datasource" + }, + "definition": "SELECT pm.reference_uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.reference_probe_metadata pm WHERE pm.vendor = 'NTP' \nUNION ALL\nSELECT r.uuid::text AS __value, rt.\"name\" AS __text FROM castdb.reference r JOIN castdb.reference_type rt ON r.reference_type_uuid = rt.\"uuid\" WHERE rt.\"name\" = 'UNKNOWN';", + "includeAll": true, + "multi": true, + "name": "ntp_reference", + "options": [], + "query": "SELECT pm.reference_uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.reference_probe_metadata pm WHERE pm.vendor = 'NTP' \nUNION ALL\nSELECT r.uuid::text AS __value, rt.\"name\" AS __text FROM castdb.reference r JOIN castdb.reference_type rt ON r.reference_type_uuid = rt.\"uuid\" WHERE rt.\"name\" = 'UNKNOWN';", + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "utc", + "title": "NTP probes (NTP server reference path)", + "uid": "ntp-opensampl", + "version": 17 +} \ No newline at end of file From cf9dc3227375f796268f12e17721ea7748c600bc Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Mon, 20 Apr 2026 15:41:29 -0400 Subject: [PATCH 20/22] adding initial pytest-postgresql functionality --- pyproject.toml | 4 +- tests/integration/__init__.py | 0 tests/integration/conftest.py | 332 +++++++++++++++++++++++++++++ tests/integration/test_db_setup.py | 106 +++++++++ 4 files changed, 441 insertions(+), 1 deletion(-) create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/conftest.py create mode 100644 tests/integration/test_db_setup.py diff --git a/pyproject.toml b/pyproject.toml index b5a4a63..ef33b97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,6 +100,8 @@ dev = [ "mkdocs-gen-files", "mkdocs-material", "mkdocs-click", + "psycopg[binary]", + "pytest-postgresql" ] [tool.hatch.build.targets.sdist] @@ -158,4 +160,4 @@ max-args = 10 quote-style = "double" indent-style = "space" skip-magic-trailing-comma = false -docstring-code-format = true +docstring-code-format = true \ No newline at end of file diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 0000000..1ccf890 --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,332 @@ +""" +tests/conftest.py + +Shared pytest fixtures for openSAMPL integration tests. + +Prerequisites +------------- +- PostgreSQL with PostGIS extension available +- pytest-postgresql installed: + uv add --group dev pytest-postgresql +- On macOS (Homebrew): + brew install postgresql postgis + +pytest-postgresql will locate pg_ctl automatically from your PATH. If you +have multiple Postgres versions installed, point it at the right one via +pytest.ini or pyproject.toml: + + [tool.pytest.ini_options] + postgresql_exec = "/opt/homebrew/opt/postgresql@16/bin/pg_ctl" +""" + +import pytest +from pytest_postgresql import factories as pg_factories +from sqlalchemy import create_engine, text +from sqlalchemy.orm import Session, sessionmaker + +from opensampl.db.orm import Base +from opensampl.db.orm import Defaults as DBDefaults +from opensampl.db.orm import MetricType as DBMetricType +from opensampl.db.orm import Reference as DBReference +from opensampl.db.orm import ReferenceType as DBReferenceType +from opensampl.metrics import METRICS, MetricType +from opensampl.references import REF_TYPES, ReferenceType + + +# --------------------------------------------------------------------------- +# pytest-postgresql process fixture +# +# postgresql_proc manages the Postgres server lifetime (session-scoped). +# We deliberately avoid the postgresql connection fixture so we have no +# dependency on a specific psycopg version — the project already has +# psycopg2-binary, and SQLAlchemy handles the connection from here. +# --------------------------------------------------------------------------- + +postgresql_proc = pg_factories.postgresql_proc() + + +# --------------------------------------------------------------------------- +# Helpers: introspect METRICS / REF_TYPES the same way VENDORS.all() does +# --------------------------------------------------------------------------- + +def _all_metrics() -> list[MetricType]: + """All MetricType instances defined on the METRICS class.""" + return [v for v in METRICS.__dict__.values() if isinstance(v, MetricType)] + + +def _all_ref_types() -> list[ReferenceType]: + """All ReferenceType instances defined on REF_TYPES (includes CompoundReferenceType).""" + return [v for v in REF_TYPES.__dict__.values() if isinstance(v, ReferenceType)] + + +# --------------------------------------------------------------------------- +# Session-scoped engine +# Schema, tables, seed data, and the get_default_uuid_for stub are all +# created once per test session. +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="session") +def db_engine(postgresql_proc): + """ + Session-scoped SQLAlchemy engine pointed at the pytest-postgresql instance. + + Connects using psycopg2-binary (already a project dependency) so there is + no dependency on psycopg3. Creates the opensampl_test database on first + run via a temporary autocommit connection to the default 'postgres' database. + + Lifecycle: + 1. Create the opensampl_test database. + 2. Install PostGIS and create the castdb schema. + 3. Create all ORM tables via Base.metadata.create_all(). + 4. Seed metric_type, reference_type, reference, and defaults tables. + 5. Install the get_default_uuid_for() PL/pgSQL stub. + """ + # postgresql_proc exposes plain attributes — no psycopg version dependency + host = postgresql_proc.host + port = postgresql_proc.port + user = postgresql_proc.user + test_dbname = "opensampl_test" + + # Connect to the default 'postgres' db to create our test database. + # Must use isolation_level=AUTOCOMMIT because CREATE DATABASE cannot run + # inside a transaction block. + bootstrap_url = f"postgresql+psycopg2://{user}@{host}:{port}/postgres" + bootstrap_engine = create_engine(bootstrap_url, isolation_level="AUTOCOMMIT") + with bootstrap_engine.connect() as conn: + exists = conn.execute( + text("SELECT 1 FROM pg_database WHERE datname = :dbname"), + {"dbname": test_dbname}, + ).fetchone() + if not exists: + conn.execute(text(f'CREATE DATABASE "{test_dbname}"')) + bootstrap_engine.dispose() + + db_url = f"postgresql+psycopg2://{user}@{host}:{port}/{test_dbname}" + engine = create_engine(db_url, echo=False) + + with engine.begin() as conn: + # PostGIS is required by the Locations.geom column (GeoAlchemy2) + conn.execute(text("CREATE EXTENSION IF NOT EXISTS postgis")) + conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {Base.metadata.schema}")) + + Base.metadata.create_all(engine) + _seed_lookup_tables(engine) + _install_default_uuid_stub(engine) + + yield engine + + engine.dispose() + + +# --------------------------------------------------------------------------- +# Seeding helpers (called once from db_engine, not exposed as fixtures) +# --------------------------------------------------------------------------- + +def _seed_lookup_tables(engine) -> None: + """ + Populate metric_type, reference_type, reference, and defaults tables. + + Reads directly from the METRICS and REF_TYPES Python definitions so the + test DB always matches what the application expects — no hardcoded values. + After inserting the lookup rows, seeds the defaults table with the UUIDs + of the UNKNOWN rows, mirroring how production initialises get_default_uuid_for(). + """ + SessionLocal = sessionmaker(bind=engine) + session = SessionLocal() + + try: + # --- metric_type --- + for metric in _all_metrics(): + data = metric.model_dump() # value_type serialised to str by field_serializer + if not session.query(DBMetricType).filter_by(name=data["name"]).first(): + session.add(DBMetricType(**data)) + + # --- reference_type --- + # CompoundReferenceType.model_dump() includes reference_table; the column is nullable so plain + # ReferenceType rows (no reference_table) are stored with NULL, which is correct. + for ref_type in _all_ref_types(): + data = ref_type.model_dump() + if not session.query(DBReferenceType).filter_by(name=data["name"]).first(): + session.add(DBReferenceType(**data)) + + session.flush() + + # --- reference: one default UNKNOWN row -------------------------- + # get_default_uuid_for('reference') needs at least one reference row to + # point at. We use the UNKNOWN reference type with no compound target. + unknown_ref_type = session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one() + default_reference = session.query(DBReference).filter_by( + reference_type_uuid=unknown_ref_type.uuid, + compound_reference_uuid=None, + ).first() + if not default_reference: + default_reference = DBReference( + reference_type_uuid=unknown_ref_type.uuid, + compound_reference_uuid=None, + ) + session.add(default_reference) + + session.flush() + + # --- defaults table --------------------------------------------- + # Maps table/category names to the UUID that get_default_uuid_for() + # should return. Mirrors what the production TimescaleDB init does. + unknown_metric = session.query(DBMetricType).filter_by(name=METRICS.UNKNOWN.name).one() + + for table_name, uuid_value in [ + ("metric_type", unknown_metric.uuid), + ("reference", default_reference.uuid), + ]: + if not session.query(DBDefaults).filter_by(table_name=table_name).first(): + session.add(DBDefaults(table_name=table_name, uuid=uuid_value)) + + session.commit() + + except Exception: + session.rollback() + raise + finally: + session.close() + + +def _install_default_uuid_stub(engine) -> None: + """ + Install the get_default_uuid_for() PL/pgSQL stub. + + Rather than hardcoding UUIDs, the stub queries the defaults table — the + same approach the production TimescaleDB function uses. This means it + automatically returns whatever was seeded above. + """ + schema = Base.metadata.schema + + with engine.begin() as conn: + conn.execute(text(f""" + CREATE OR REPLACE FUNCTION get_default_uuid_for(entity_type TEXT) + RETURNS TEXT AS $$ + DECLARE + result_uuid TEXT; + BEGIN + SELECT uuid + INTO result_uuid + FROM {schema}.defaults + WHERE table_name = entity_type; + + RETURN result_uuid; + END; + $$ LANGUAGE plpgsql; + """)) + + +# --------------------------------------------------------------------------- +# Per-test session — savepoint rollback keeps tests isolated +# --------------------------------------------------------------------------- + +@pytest.fixture +def db_session(db_engine) -> Session: + """ + Function-scoped database session backed by a savepoint. + + Every test starts with the full seeded dataset intact. Any rows inserted + or updated during the test are rolled back when the test ends without + disturbing the seeded rows, and without the cost of recreating the schema. + + Usage:: + + def test_something(db_session): + factory = TableFactory("locations", db_session) + factory.write({"name": "test-loc", "lat": 35.9, "lon": -84.3}) + result = db_session.query(Locations).filter_by(name="test-loc").one() + assert result.name == "test-loc" + # row is gone after the test + """ + connection = db_engine.connect() + outer_transaction = connection.begin() + session = Session(bind=connection) + session.begin_nested() # SAVEPOINT — inner rollback target + + yield session + + session.close() + outer_transaction.rollback() # wipes everything written during the test + connection.close() + + +# --------------------------------------------------------------------------- +# Seeded UUIDs — expose the canonical lookup UUIDs tests may need directly +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="session") +def seeded_uuids(db_engine) -> dict: + """ + Session-scoped dict of UUIDs inserted during seed, keyed by logical name. + + Useful when a test needs to construct ORM objects by hand (e.g. ProbeData) + and must reference real FK values. + + Keys + ---- + metric_type. — UUID from the metric_type table + reference_type. — UUID from the reference_type table + reference.unknown — UUID of the default UNKNOWN reference row + default.metric_type — what get_default_uuid_for('metric_type') returns + default.reference — what get_default_uuid_for('reference') returns + + Example:: + + def test_probe_data(db_session, seeded_uuids): + phase_offset_uuid = seeded_uuids["metric_type.Phase Offset"] + """ + SessionLocal = sessionmaker(bind=db_engine) + session = SessionLocal() + + try: + uuids: dict = {} + + for row in session.query(DBMetricType).all(): + uuids[f"metric_type.{row.name}"] = row.uuid + + for row in session.query(DBReferenceType).all(): + uuids[f"reference_type.{row.name}"] = row.uuid + + unknown_ref_type = session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one() + unknown_ref = session.query(DBReference).filter_by( + reference_type_uuid=unknown_ref_type.uuid, + compound_reference_uuid=None, + ).one() + uuids["reference.unknown"] = unknown_ref.uuid + + for row in session.query(DBDefaults).all(): + uuids[f"default.{row.table_name}"] = row.uuid + + return uuids + + finally: + session.close() + + +# --------------------------------------------------------------------------- +# Routing environment — patch BaseConfig for @route-decorated functions +# --------------------------------------------------------------------------- + +@pytest.fixture +def db_env(db_engine, monkeypatch) -> None: + """ + Set env vars so that BaseConfig routes directly to the test DB. + + Any test that calls a @route-decorated function (write_to_table, + load_time_data, load_probe_metadata, create_new_tables) must include + this fixture. Pass session=db_session explicitly so the route wrapper + uses your test session rather than opening a new one. + + Usage:: + + def test_write_to_table(db_env, db_session): + write_to_table( + "locations", + {"name": "test-loc", "lat": 35.9, "lon": -84.3}, + session=db_session, + ) + """ + monkeypatch.setenv("ROUTE_TO_BACKEND", "false") + monkeypatch.setenv("DATABASE_URL", str(db_engine.url)) + monkeypatch.delenv("BACKEND_URL", raising=False) \ No newline at end of file diff --git a/tests/integration/test_db_setup.py b/tests/integration/test_db_setup.py new file mode 100644 index 0000000..6f1f542 --- /dev/null +++ b/tests/integration/test_db_setup.py @@ -0,0 +1,106 @@ +""" +tests/integration/test_db_setup.py + +Smoke tests to verify the test database spun up and seeded correctly. +""" + +import pytest +from sqlalchemy import text + +from opensampl.db.orm import Defaults as DBDefaults +from opensampl.db.orm import MetricType as DBMetricType +from opensampl.db.orm import Reference as DBReference +from opensampl.db.orm import ReferenceType as DBReferenceType +from opensampl.metrics import METRICS, MetricType +from opensampl.references import REF_TYPES, ReferenceType + + +def test_schema_exists(db_session): + """castdb schema was created.""" + result = db_session.execute( + text("SELECT schema_name FROM information_schema.schemata WHERE schema_name = 'castdb'") + ).scalar() + assert result == "castdb" + + +def test_postgis_installed(db_session): + """PostGIS extension is available (required for Locations.geom).""" + result = db_session.execute(text("SELECT extname FROM pg_extension WHERE extname = 'postgis'")).scalar() + assert result == "postgis" + + +def test_all_metrics_seeded(db_session): + """Every MetricType defined on METRICS is present in the metric_type table.""" + expected = {v.name for v in METRICS.__dict__.values() if isinstance(v, MetricType)} + seeded = {row.name for row in db_session.query(DBMetricType).all()} + assert expected == seeded + + +def test_all_reference_types_seeded(db_session): + """Every ReferenceType defined on REF_TYPES is present in the reference_type table.""" + expected = {v.name for v in REF_TYPES.__dict__.values() if isinstance(v, ReferenceType)} + seeded = {row.name for row in db_session.query(DBReferenceType).all()} + assert expected == seeded + + +def test_default_reference_row_exists(db_session): + """A default UNKNOWN reference row exists for get_default_uuid_for('reference').""" + unknown_ref_type = db_session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one() + ref = db_session.query(DBReference).filter_by( + reference_type_uuid=unknown_ref_type.uuid, + compound_reference_uuid=None, + ).first() + assert ref is not None + + +def test_defaults_table_seeded(db_session): + """defaults table has entries for both metric_type and reference.""" + rows = {row.table_name for row in db_session.query(DBDefaults).all()} + assert "metric_type" in rows + assert "reference" in rows + + +def test_get_default_uuid_for_metric_type(db_session): + """Stub function returns the UUID of the UNKNOWN metric type.""" + result = db_session.execute(text("SELECT get_default_uuid_for('metric_type')")).scalar() + expected = db_session.query(DBMetricType.uuid).filter_by(name=METRICS.UNKNOWN.name).scalar() + assert result == expected + + +def test_get_default_uuid_for_reference(db_session): + """Stub function returns the UUID of the default UNKNOWN reference row.""" + result = db_session.execute(text("SELECT get_default_uuid_for('reference')")).scalar() + unknown_ref_type = db_session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one() + expected = db_session.query(DBReference.uuid).filter_by( + reference_type_uuid=unknown_ref_type.uuid, + compound_reference_uuid=None, + ).scalar() + assert result == expected + + +def test_seeded_uuids_fixture(seeded_uuids): + """seeded_uuids convenience fixture has the expected keys.""" + assert f"metric_type.{METRICS.UNKNOWN.name}" in seeded_uuids + assert f"metric_type.{METRICS.PHASE_OFFSET.name}" in seeded_uuids + assert f"reference_type.{REF_TYPES.UNKNOWN.name}" in seeded_uuids + assert "reference.unknown" in seeded_uuids + assert "default.metric_type" in seeded_uuids + assert "default.reference" in seeded_uuids + + +def test_session_rollback_isolation(db_session, db_engine): + """Writes in one session do not leak — the savepoint rolls back cleanly.""" + from opensampl.db.orm import TestMetadata + + db_session.add(TestMetadata(name="rollback-canary")) + db_session.flush() + + # Row is visible within this session + assert db_session.query(TestMetadata).filter_by(name="rollback-canary").one() + + # After the test ends the fixture rolls back, but we can verify the + # mechanism works by checking a fresh session sees nothing yet + from sqlalchemy.orm import Session + with Session(bind=db_engine) as fresh: + result = fresh.query(TestMetadata).filter_by(name="rollback-canary").first() + assert result is None, "Savepoint did not isolate the write from other sessions" \ No newline at end of file From 3ba95cb509f8a6dc96f89032e18c46a2fc769840 Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Mon, 20 Apr 2026 16:01:41 -0400 Subject: [PATCH 21/22] adding the other dash stuff --- .../public-timing-dashboard.json | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json b/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json index 687ceae..24b7a6f 100644 --- a/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json +++ b/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json @@ -338,7 +338,7 @@ "group": [], "metricColumn": "none", "rawQuery": true, - "rawSql": "select\n pm.name as \"Clock\",\n l.name as \"Location Name\",\n l.latitude,\n l.longitude,\n l.campus\nfrom castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\nwhere pm.uuid in (${clock_name:sqlstring});", + "rawSql": "select\n pm.name as \"Clock\",\n l.name as \"Location Name\",\n l.latitude,\n l.longitude,\n l.campus\nfrom castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\nwhere (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')));", "refId": "ClockProbes", "select": [ [ @@ -379,13 +379,13 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "P55EB97F79F5EB88E" + "uid": "castdb-datasource" }, "editorMode": "code", "format": "table", "hide": false, "rawQuery": true, - "rawSql": "SELECT\n l.latitude,\n l.longitude,\n l.campus,\n sum(\n CASE\n when pm.public = True and pm.vendor in ('ADVA', 'MicrochipTP4100') then 1 else 0\n end \n ) as visible_clocks,\n sum(\n CASE\n when pm.uuid in (${clock_name:sqlstring}) then 1 else 0\n end \n ) as selected_clocks\n from castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\n where l.public = True\n group by\n l.latitude, l.longitude, l.campus;", + "rawSql": "SELECT\n l.latitude,\n l.longitude,\n l.campus,\n sum(\n CASE\n when pm.public = True and pm.vendor in ('ADVA', 'MicrochipTP4100', 'NTP') then 1 else 0\n end \n ) as visible_clocks,\n sum(\n CASE\n when (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ','))) then 1 else 0\n end \n ) as selected_clocks\n from castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\n where l.public = True\n group by\n l.latitude, l.longitude, l.campus;", "refId": "A", "sql": { "columns": [ @@ -465,7 +465,7 @@ }, "format": "table", "rawQuery": true, - "rawSql": "SELECT COUNT(*) as \"Total Clock Probes\" FROM castdb.probe_metadata where uuid in ($clock_name)", + "rawSql": "SELECT COUNT(*)::bigint AS \"Total Clock Probes\" FROM castdb.probe_metadata pm WHERE pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP') AND coalesce(pm.public, true) AND (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))", "refId": "A" } ], @@ -539,7 +539,7 @@ "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT \n coalesce(pm.name, concat(pm.ip_address, 'Inteface', pm.probe_id)) as \"Clock Probe\", \n COUNT(*) as \"Total Records\" \nFROM castdb.probe_data pd\njoin castdb.probe_metadata pm on pd.probe_uuid = pm.uuid \nwhere pm.uuid in (${clock_name:sqlstring})\nAND pd.\"time\" >= $__timeFrom()\nAND pd.\"time\" <= $__timeTo()\ngroup by pm.uuid, pm.name, pm.ip_address, pm.probe_id;", + "rawSql": "SELECT \n coalesce(pm.name, concat(pm.ip_address, ' Interface ', pm.probe_id)) AS \"Clock Probe\", \n COUNT(*)::bigint AS \"Total Records\" \nFROM castdb.probe_data pd\nJOIN castdb.probe_metadata pm ON pd.probe_uuid = pm.uuid \nWHERE pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n AND coalesce(pm.public, true)\n AND (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\n AND pd.\"time\" >= $__timeFrom()\n AND pd.\"time\" <= $__timeTo()\nGROUP BY pm.uuid, pm.name, pm.ip_address, pm.probe_id;", "refId": "A", "sql": { "columns": [ @@ -568,7 +568,7 @@ "type": "grafana-postgresql-datasource", "uid": "castdb-datasource" }, - "description": "Average time error \n(averaged on selected resolution)", + "description": "Average time error vs stored reference (resolution as selected). GNSS-specific labeling applies only when the probe/reference model is GNSS-backed.", "fieldConfig": { "defaults": { "color": { @@ -650,7 +650,7 @@ "editorMode": "code", "format": "time_series", "rawQuery": true, - "rawSql": "SELECT \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n coalesce(pm.name, concat(pm.ip_address, ' Interface ', pm.probe_id)),\n AVG(pd.value::FLOAT) * 1e9 AS value\nFROM castdb.probe_data pd join castdb.probe_metadata pm on pd.probe_uuid = pm.uuid\nWHERE\n $__timeFilter(pd.time)\n AND pd.probe_uuid IN (${clock_name:sqlstring})\nGROUP BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n pd.probe_uuid,\n pm.name,\n pm.ip_address,\n pm.probe_id\nORDER BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n", + "rawSql": "SELECT \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n coalesce(pm.name, concat(pm.ip_address, ' Interface ', pm.probe_id)),\n AVG(pd.value::FLOAT) * 1e9 AS value\nFROM castdb.probe_data pd JOIN castdb.probe_metadata pm ON pd.probe_uuid = pm.uuid\nWHERE\n $__timeFilter(pd.time)\n AND pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n AND coalesce(pm.public, true)\n AND (trim('${clock_name:csv}') = '' OR pd.probe_uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\nGROUP BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n pd.probe_uuid,\n pm.name,\n pm.ip_address,\n pm.probe_id\nORDER BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n", "refId": "A", "sql": { "columns": [ @@ -671,7 +671,7 @@ } } ], - "title": "Time Error - Clock Time vs GNSS", + "title": "Time Error - Clock Time vs Reference", "transformations": [ { "id": "prepareTimeSeries", @@ -776,7 +776,7 @@ "editorMode": "code", "format": "time_series", "rawQuery": true, - "rawSql": "SELECT \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n COALESCE(name, CONCAT(ip_address, ' Interface ', probe_id)),\n (MAX(pd.value::FLOAT) - MIN(pd.value::FLOAT)) * 1e9 AS value\nFROM castdb.probe_data pd join castdb.probe_metadata pm on pd.probe_uuid = pm.uuid\nWHERE\n $__timeFilter(pd.time)\n AND pd.probe_uuid in (${clock_name:sqlstring})\nGROUP BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n pd.probe_uuid,\n pm.name,\n pm.ip_address,\n pm.probe_id\nORDER BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n", + "rawSql": "SELECT \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n COALESCE(pm.name, CONCAT(pm.ip_address, ' Interface ', pm.probe_id)),\n (MAX(pd.value::FLOAT) - MIN(pd.value::FLOAT)) * 1e9 AS value\nFROM castdb.probe_data pd JOIN castdb.probe_metadata pm ON pd.probe_uuid = pm.uuid\nWHERE\n $__timeFilter(pd.time)\n AND pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n AND coalesce(pm.public, true)\n AND (trim('${clock_name:csv}') = '' OR pd.probe_uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\nGROUP BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n pd.probe_uuid,\n pm.name,\n pm.ip_address,\n pm.probe_id\nORDER BY \n time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n", "refId": "A", "sql": { "columns": [ @@ -797,7 +797,7 @@ } } ], - "title": "Maximum Time Interval Error VS GNSS", + "title": "Maximum Time Interval Error vs Reference", "transformations": [ { "id": "prepareTimeSeries", @@ -834,7 +834,7 @@ "type": "grafana-postgresql-datasource", "uid": "castdb-datasource" }, - "description": "Average time error \n(averaged on selected resolution)", + "description": "Average time error vs stored reference (resolution as selected). GNSS-specific labeling applies only when the probe/reference model is GNSS-backed.", "fieldConfig": { "defaults": { "color": { @@ -937,7 +937,7 @@ } } ], - "title": "Time Error - Clock Time vs GNSS", + "title": "Time Error - Clock Time vs Reference", "type": "timeseries" }, { @@ -1048,14 +1048,44 @@ } } ], - "title": "Maximum Time Interval Error", + "title": "Maximum Time Interval Error vs Reference", "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 29}, + "id": 101, + "panels": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "castdb-datasource"}, + "description": "Rows reflect stored `probe_metadata`, `ntp_metadata` (when vendor is NTP), `locations`, and one sample `reference`/`reference_type` from `probe_data` per probe.", + "fieldConfig": {"defaults": {}, "overrides": []}, + "gridPos": {"h": 10, "w": 24, "x": 0, "y": 0}, + "id": 100, + "options": {"cellHeight": "sm", "showHeader": true}, + "pluginVersion": "12.0.0", + "targets": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "castdb-datasource"}, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT\n COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS probe,\n pm.vendor,\n COALESCE(rt.name, '') AS reference_type,\n COALESCE(nm.target_host::text, '') AS ntp_server,\n COALESCE(nm.mode::text, '') AS ntp_mode,\n COALESCE(nm.reference_id::text, '') AS ntp_ref_id,\n COALESCE(l.name, '') AS location,\n COALESCE(pm.public::text, '') AS public\nFROM castdb.probe_metadata pm\nLEFT JOIN castdb.ntp_metadata nm ON nm.probe_uuid = pm.uuid\nLEFT JOIN castdb.locations l ON l.uuid = pm.location_uuid\nLEFT JOIN LATERAL (\n SELECT pd.reference_uuid FROM castdb.probe_data pd WHERE pd.probe_uuid = pm.uuid LIMIT 1\n) rp ON true\nLEFT JOIN castdb.reference r ON r.uuid = rp.reference_uuid\nLEFT JOIN castdb.reference_type rt ON rt.uuid = r.reference_type_uuid\nWHERE pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n AND coalesce(pm.public, true)\n AND (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\nORDER BY 1;", + "refId": "A" + } + ], + "title": "Probe reference & source (stored metadata)", + "type": "table" + } + ], + "title": "Reference & source metadata", + "type": "row" } ], "preload": false, "refresh": "", "schemaVersion": 41, - "tags": [], + "tags": ["opensampl", "reference", "geospatial"], "templating": { "list": [ { @@ -1067,12 +1097,12 @@ "type": "grafana-postgresql-datasource", "uid": "castdb-datasource" }, - "definition": "SELECT uuid AS __value, COALESCE(name, CONCAT(ip_address, ' Interface ', probe_id)) AS __text FROM castdb.probe_metadata WHERE vendor in ('ADVA', 'MicrochipTP4100') and public;", + "definition": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' Interface ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor in ('ADVA', 'MicrochipTP4100', 'NTP') AND coalesce(pm.public, true) ORDER BY 2;", "includeAll": true, "multi": true, "name": "clock_name", "options": [], - "query": "SELECT uuid AS __value, COALESCE(name, CONCAT(ip_address, ' Interface ', probe_id)) AS __text FROM castdb.probe_metadata WHERE vendor in ('ADVA', 'MicrochipTP4100') and public;", + "query": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' Interface ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor in ('ADVA', 'MicrochipTP4100', 'NTP') AND coalesce(pm.public, true) ORDER BY 2;", "refresh": 1, "regex": "", "type": "query" @@ -1130,7 +1160,8 @@ }, "timepicker": {}, "timezone": "utc", - "title": "Public Geospatial and Timing Combined Dashboard", + "description": "Geospatial views use stored `locations` geometry. Timing series are relative to each probe\u2019s stored reference (OpenSAMPL `reference` / `reference_type`) and are **not** GNSS-truth unless a GNSS-backed probe supplies that semantics.", + "title": "Public Geospatial and Timing (Reference)", "uid": "public-geospatial-dashboard", "version": 10 } \ No newline at end of file From 03ca8a095d9530088ca4446e257acf5e08f6b51f Mon Sep 17 00:00:00 2001 From: "MacFarland, Midgie" Date: Wed, 22 Apr 2026 08:33:56 -0400 Subject: [PATCH 22/22] small tweak --- opensampl/mixins/collect.py | 2 +- opensampl/vendors/ntp.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opensampl/mixins/collect.py b/opensampl/mixins/collect.py index ed48d93..9606b9e 100644 --- a/opensampl/mixins/collect.py +++ b/opensampl/mixins/collect.py @@ -144,7 +144,7 @@ def _collect_and_save(cls, collect_config: CollectConfig) -> None: @classmethod def filter_files(cls, files: list[Path]) -> list[Path]: """Filter the files found in the input directory when loading this vendor's data files""" - return [f for f in files if f.name.startswith(f"{cls.vendor.parser_class}_") and f.stem == ".txt"] + return [f for f in files if f.name.startswith(f"{cls.vendor.parser_class}_") and f.suffix == ".txt"] @classmethod def load_metadata(cls, probe_key: ProbeKey, metadata: dict) -> None: diff --git a/opensampl/vendors/ntp.py b/opensampl/vendors/ntp.py index 57f835b..0b28fca 100644 --- a/opensampl/vendors/ntp.py +++ b/opensampl/vendors/ntp.py @@ -510,8 +510,8 @@ class CollectConfig(CollectMixin.CollectConfig): ip_address: str = "127.0.0.1" port: int | None = None mode: Literal["remote", "local"] = "local" - interval: float = 0.0 - duration: int = 1 + interval: float = Field(0.0, ge=0.0) + duration: int = Field(1, ge=1) timeout: float = 3.0 collection_ip: str = Field(default_factory=collect_ip_factory) collection_id: str = Field(default_factory=collect_id_factory) @@ -540,7 +540,7 @@ class RandomDataConfig(RandomDataMixin.RandomDataConfig): description="random.uniform(-1e-12, 1e-12)", ) - def __init__(self, input_file: str): + def __init__(self, input_file: str, ): """Initialize NtpProbe from input file""" super().__init__(input_file) self.collection_probe = None