From c43a387927c82d852422a2ef4e997fdd6a01d2d6 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Mon, 13 Apr 2026 15:36:57 -0400
Subject: [PATCH 01/22] adding scaffolding

---
 opensampl/db/orm.py            |  24 +++++++
 opensampl/vendors/constants.py |   8 +++
 opensampl/vendors/ntp2.py      | 123 +++++++++++++++++++++++++++++++++
 3 files changed, 155 insertions(+)
 create mode 100644 opensampl/vendors/ntp2.py

diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py
index ee6b374..8c90605 100644
--- a/opensampl/db/orm.py
+++ b/opensampl/db/orm.py
@@ -434,6 +434,30 @@ class MicrochipTP4100Metadata(Base):
 
 
 # --- CUSTOM TABLES ---      !! Do not remove line, used as reference when inserting metadata table
+class NtpMetadata2(Base):
+    __tablename__ = "ntp_metadata2"
+    
+    probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True)
+    mode = Column(Text)
+    probe_name = Column(Text)
+    target_host = Column(Text)
+    target_port = Column(Integer)
+    sync_status = Column(Text)
+    leap_status = Column(Text)
+    stratum = Column(Integer)
+    reachability = Column(Integer)
+    offset_last_s = Column(Float)
+    delay_s = Column(Float)
+    jitter_s = Column(Float)
+    dispersion_s = Column(Float)
+    root_delay_s = Column(Float)
+    root_dispersion_s = Column(Float)
+    poll_interval_s = Column(Float)
+    reference_id = Column(Text)
+    observation_source = Column(Text)
+    collection_host = Column(Text)
+    additional_metadata = Column(JSONB)
+    probe = relationship("ProbeMetadata", back_populates="ntp_metadata2")
 
 # --- TABLE FUNCTIONS ---
 
diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py
index 4725b05..9c4b27c 100644
--- a/opensampl/vendors/constants.py
+++ b/opensampl/vendors/constants.py
@@ -71,6 +71,14 @@ class VENDORS:
     )
 
     # --- CUSTOM VENDORS ---      !! Do not remove line, used as reference when inserting vendor
+    NTP2 = VendorType(
+        name='NTP2',
+        parser_class='NtpProbe2',
+        parser_module='ntp2',
+        metadata_table='ntp_metadata2',
+        metadata_orm='NtpMetadata2'
+    )
+
 
     # --- VENDOR FUNCTIONS ---
 
diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py
new file mode 100644
index 0000000..812f038
--- /dev/null
+++ b/opensampl/vendors/ntp2.py
@@ -0,0 +1,123 @@
+"""Probe implementation for NTP2 vendor"""
+
+import pandas as pd
+
+from opensampl.vendors.base_probe import BaseProbe
+from opensampl.vendors.constants import ProbeKey, VENDORS
+from opensampl.references import REF_TYPES
+from opensampl.mixins.collect import CollectMixin
+
+class NtpProbe2(BaseProbe, CollectMixin):
+    """Probe parser for NTP2 vendor data files"""
+
+    vendor = VENDORS.NTP2
+
+    class CollectConfig(CollectMixin.CollectConfig):
+        """
+        The following configuration fields are inherited from the Collect mixin.
+        Change the defaults by uncommenting and changing value
+
+        Add additional fields, which will automatically be added to the collect click options
+        and provided to calls to collect
+        output_dir: Optional[Path] = None
+        load: bool = False
+        duration: int = 300
+
+        ip_address: str = '127.0.0.1'
+        probe_id: str = '1-1'
+        """
+
+
+    def __init__(self, input_file: str, **kwargs):
+        """Initialize NtpProbe2 from input file"""
+        super().__init__(input_file)
+        # TODO: parse self.input_file to extract self.probe_key
+        # self.probe_key = ProbeKey(probe_id=..., ip_address=...)
+
+    def process_metadata(self) -> dict:
+        """
+        Parse and return probe metadata from input file.
+
+        Expected metadata fields:
+		['mode',
+		 'probe_name',
+		 'target_host',
+		 'target_port',
+		 'sync_status',
+		 'leap_status',
+		 'stratum',
+		 'reachability',
+		 'offset_last_s',
+		 'delay_s',
+		 'jitter_s',
+		 'dispersion_s',
+		 'root_delay_s',
+		 'root_dispersion_s',
+		 'poll_interval_s',
+		 'reference_id',
+		 'observation_source',
+		 'collection_host',
+		 'additional_metadata']
+
+        Returns:
+            dict with metadata field names as keys
+        """
+        # TODO: implement metadata parsing
+        # return {
+        #     "field_name": value,
+        #     ...
+        # }
+        raise NotImplementedError
+
+    def process_time_data(self) -> pd.DataFrame:
+        """
+        Parse and load time series data from self.input_file.
+
+        Use either send_time_data (which prefills METRICS.PHASE_OFFSET)
+        or send_data and provide alternative METRICS type.
+        Both require a df as follows:
+            pd.DataFrame with columns:
+                - time (datetime64[ns]): timestamp for each measurement
+                - value (float64): measured value at each timestamp
+
+
+        """
+        # TODO: implement time data parsing and call self.send_time_data(df, reference_type)
+        #                                       or self.send_data(df, metric_type, reference_type)
+        # df = pd.DataFrame({"time": [...], "value": [...]})
+        # self.send_time_data(df, reference_type=...)
+
+        # Ensure the format it is reading in matches that in save_to_file
+        raise NotImplementedError
+
+    @classmethod
+    def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact:
+        """
+            Create a collect artifact defined as follows
+            class CollectArtifact(BaseModel):
+                data: pd.DataFrame
+                metric: MetricType = METRICS.UNKNOWN
+                reference_type: ReferenceType = REF_TYPES.UNKNOWN
+                compound_reference: Optional[dict[str, Any]] = None
+                probe_key: Optional[ProbeKey] = None
+                metadata: Optional[dict] = Field(default_factory=dict)
+
+            on a collect_config.load, the metadata and data will be loaded into db.
+
+            define logic for the save_to_file as well.
+        """
+        # TODO: implement the logic for creating a CollectArtifact, as above.
+        #
+
+        raise NotImplementedError
+
+    @classmethod
+    def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str:
+        # TODO: Create the str content for an output file. Ensure readable by parse functions & that required metadata is available
+        #  Filename will be automatically generated as {ip_address}_{probe_id}_{vendor}_{timestamp}.txt and saved to directory provided by cli
+        raise NotImplementedError
+
+
+
+
+

From 699f3b3daf114fa33967147fad8be2b87bd1c24f Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Mon, 13 Apr 2026 17:49:32 -0400
Subject: [PATCH 02/22] just my stuff

# Conflicts:
#	opensampl/mixins/collect.py
---
 opensampl/metrics.py      |  57 +++++++
 opensampl/vendors/ntp2.py | 338 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 384 insertions(+), 11 deletions(-)

diff --git a/opensampl/metrics.py b/opensampl/metrics.py
index 8cc1418..d859cb4 100644
--- a/opensampl/metrics.py
+++ b/opensampl/metrics.py
@@ -64,3 +64,60 @@ class METRICS:
     )
 
     # --- CUSTOM METRICS ---      !! Do not remove line, used as reference when inserting metric
+    NTP_DELAY = MetricType(
+        name="NTP Delay",
+        description="Round-trip delay (RTT) to the NTP server or observed path delay in seconds",
+        unit="s",
+        value_type=float,
+    )
+    NTP_JITTER = MetricType(
+        name="NTP Jitter",
+        description=(
+            "Jitter or offset variation for NTP in seconds (true value from chrony/ntpq when available; "
+            "remote single-packet collection may use a delay/dispersion bound estimate)"
+        ),
+        unit="s",
+        value_type=float,
+    )
+    NTP_STRATUM = MetricType(
+        name="NTP Stratum",
+        description="NTP stratum level (distance from reference clock)",
+        unit="level",
+        value_type=float,
+    )
+    NTP_REACHABILITY = MetricType(
+        name="NTP Reachability",
+        description="NTP reachability register (0-255) as a scalar for plotting",
+        unit="count",
+        value_type=float,
+    )
+    NTP_DISPERSION = MetricType(
+        name="NTP Dispersion",
+        description="Combined error budget / dispersion in seconds",
+        unit="s",
+        value_type=float,
+    )
+    NTP_ROOT_DELAY = MetricType(
+        name="NTP Root Delay",
+        description="Root delay from NTP packet or local estimate in seconds",
+        unit="s",
+        value_type=float,
+    )
+    NTP_ROOT_DISPERSION = MetricType(
+        name="NTP Root Dispersion",
+        description="Root dispersion from NTP packet or local estimate in seconds",
+        unit="s",
+        value_type=float,
+    )
+    NTP_POLL_INTERVAL = MetricType(
+        name="NTP Poll Interval",
+        description="Poll interval in seconds",
+        unit="s",
+        value_type=float,
+    )
+    NTP_SYNC_HEALTH = MetricType(
+        name="NTP Sync Health",
+        description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)",
+        unit="ratio",
+        value_type=float,
+    )
diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py
index 812f038..71be6c7 100644
--- a/opensampl/vendors/ntp2.py
+++ b/opensampl/vendors/ntp2.py
@@ -1,32 +1,266 @@
 """Probe implementation for NTP2 vendor"""
 
 import pandas as pd
+import re
+
+from pandas.conftest import datetime64_dtype
 
 from opensampl.vendors.base_probe import BaseProbe
 from opensampl.vendors.constants import ProbeKey, VENDORS
 from opensampl.references import REF_TYPES
 from opensampl.mixins.collect import CollectMixin
+from typing import Literal, Optional, Any, TypeVar
+from pydantic import model_validator, BaseModel, Field, field_serializer
+from pydanclick import from_pydantic
+import click
+import shutil
+import subprocess
+from datetime import datetime, timezone
+from loguru import logger
+from opensampl.metrics import METRICS
+
+
+T = TypeVar('T')
+def _merge(a: T | None, b: T | None) -> T | None:
+    return a if a is not None else b
+
 
 class NtpProbe2(BaseProbe, CollectMixin):
     """Probe parser for NTP2 vendor data files"""
 
     vendor = VENDORS.NTP2
 
-    class CollectConfig(CollectMixin.CollectConfig):
-        """
-        The following configuration fields are inherited from the Collect mixin.
-        Change the defaults by uncommenting and changing value
+    metric_map = {
+        "phase_offset_s": METRICS.PHASE_OFFSET,
+        "delay_s": METRICS.NTP_DELAY,
+        "jitter_s": METRICS.NTP_JITTER,
+        "stratum": METRICS.NTP_STRATUM,
+        "reachability": METRICS.NTP_REACHABILITY,
+        "dispersion_s": METRICS.NTP_DISPERSION,
+        "root_delay_s": METRICS.NTP_ROOT_DELAY,
+        "root_dispersion_s": METRICS.NTP_ROOT_DISPERSION,
+        "poll_interval_s": METRICS.NTP_POLL_INTERVAL,
+        "sync_health": METRICS.NTP_SYNC_HEALTH,
+    }
 
-        Add additional fields, which will automatically be added to the collect click options
-        and provided to calls to collect
-        output_dir: Optional[Path] = None
-        load: bool = False
-        duration: int = 300
+    class NTPMetadata(BaseModel):
+        mode: Literal['remote', 'local']
 
-        ip_address: str = '127.0.0.1'
-        probe_id: str = '1-1'
+        target_host: str = ""
+        target_port: int = 0
+
+        sync_status: str = Field("unknown", serialization_alias='sync_health')
+        leap_status: str = "unknown"
+        stratum: int | None = None
+        reachability: int | None = None
+        offset_last_s: float | None = Field(None, serialization_alias='phase_offset_s')
+        delay_s: float | None = None
+        jitter_s: float | None = None
+        dispersion_s: float | None = None
+        root_delay_s: float | None = None
+        root_dispersion_s: float | None = None
+        poll_interval_s: float | None = None
+        reference_id: str | None = None
+        observation_sources: list[str] = Field(default_factory=list)
+        collection_host: str | None = None
+
+        extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata')
+
+        def parse_chronyc_tracking(self, text: str) -> None:
+            """Parse `chronyc tracking` key: value output."""
+            out: dict[str, Any] = {}
+            for line in text.splitlines():
+                line = line.strip()
+                if not line or ":" not in line:
+                    continue
+                key, _, rest = line.partition(":")
+                key = key.strip().lower().replace(" ", "_")
+                val = rest.strip()
+                out[key] = val
+
+            # Last offset     : +0.000000123 seconds
+            m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
+            if m:
+                try:
+                    self.offset_last_s = _merge(self.offset_last_s, (m.group(1)))
+                except ValueError:
+                    pass
+            m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
+            if m:
+                try:
+                    self.jitter_s = _merge(self.jitter_s, float(m.group(1)))
+                except ValueError:
+                    pass
+            m = re.search(r"stratum\s*:\s*(\d+)", text, re.I)
+            if m:
+                try:
+                    self.stratum = _merge(self.stratum, int(m.group(1)))
+                except ValueError:
+                    pass
+            m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I)
+            if m:
+                self.reference_id = (m.group(2) or m.group(1)) or self.reference_id
+
+            self.sync_status = "unsynchronized"
+            if "normal" in text.lower() or self.offset_last_s is not None:
+                self.sync_status = "tracking"
+            self.extras['chronyc_raw_tracking'] = out
+            self.observation_sources.append("chronyc_tracking")
+
+        def parse_chronyc_sources(self, text: str) -> None:
+            """Parse `chronyc sources` for reach and selected source."""
+            reach: Optional[int] = None
+            selected: Optional[str] = None
+            for line in text.splitlines():
+                line = line.strip()
+                if not line or line.startswith("MS") or line.startswith("="):
+                    continue
+                # ^* or ^+ prefix indicates selected/accepted
+                if line.startswith("*") or line.startswith("+"):
+                    parts = line.split()
+                    if len(parts) >= 7:
+                        try:
+                            reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5])
+                        except ValueError:
+                            try:
+                                reach = int(parts[5])
+                            except ValueError:
+                                pass
+                        selected = parts[1]
+                    break
+                # Fallback: last column often reach (octal)
+                parts = line.split()
+                if len(parts) >= 7 and parts[0] in ("^*", "^+", "*", "+"):
+                    # already handled
+                    pass
+            if reach is None:
+                # Try any line with 377 octal style
+                m = re.search(r"\b([0-7]{3})\b", text)
+                if m:
+                    try:
+                        reach = int(m.group(1), 8)
+                    except ValueError:
+                        pass
+
+            self.reachability = self.reachability or reach
+            self.reference_id = self.reference_id or selected
+            self.observation_sources.append( "chronyc_sources")
+
+        def parse_ntpq(self, text: str) -> None:
+            """Parse `ntpq -p` / `ntpq -pn` output."""
+            offset_s: Optional[float] = None
+            delay_s: Optional[float] = None
+            jitter_s: Optional[float] = None
+            stratum: Optional[int] = None
+            reach: Optional[int] = None
+            ref = None
+            for line in text.splitlines():
+                line = line.strip()
+                if not line or line.startswith("remote") or line.startswith("="):
+                    continue
+                if line.startswith("*") or line.startswith("+") or line.startswith("-"):
+                    parts = line.split()
+                    # remote refid st t when poll reach delay offset jitter
+                    if len(parts) >= 10:
+                        try:
+                            stratum = int(parts[2])
+                        except ValueError:
+                            pass
+                        try:
+                            delay_s = float(parts[7]) / 1000.0  # ms -> s
+                            offset_s = float(parts[8]) / 1000.0
+                            jitter_s = float(parts[9]) / 1000.0
+                        except (ValueError, IndexError):
+                            pass
+                        try:
+                            reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6])
+                        except ValueError:
+                            try:
+                                reach = int(parts[6])
+                            except ValueError:
+                                pass
+                        ref = parts[1]
+                    break
+            sync_status = "synced" if offset_s is not None else "unknown"
+
+            self.offset_last_s = self.offset_last_s or offset_s
+            self.delay_s = self.delay_s or delay_s
+            self.jitter_s = self.jitter_s or jitter_s
+            self.stratum = self.stratum or stratum
+            self.reachability = self.reachability or reach
+            self.reference_id = self.reference_id or ref
+            self.sync_status = sync_status or self.sync_status
+            self.observation_sources.append("ntpq")
+
+        def parse_timedatectl(self, text: str) -> None:
+            """Parse `timedatectl status` / `show-timesync --all`."""
+            sync = None
+            for line in text.splitlines():
+                low = line.lower()
+                if "system clock synchronized" in low or "ntp synchronized" in low:
+                    if "yes" in low:
+                        sync = True
+                    elif "no" in low:
+                        sync = False
+            sync_status = "unknown"
+            if sync is True:
+                sync_status = "synchronized"
+            elif sync is False:
+                sync_status = "unsynchronized"
+
+            if self.sync_status == 'unknown':
+                self.sync_status = sync_status or self.sync_status
+            self.observation_sources.append("timedatectl")
+            self.extras['timedatectl'] = text[:2000]
+
+        def parse_systemctl_show(self, text: str) -> None:
+            """Parse `systemctl show` / `systemctl status` for systemd-timesyncd."""
+            active = None
+            for line in text.splitlines():
+                if line.strip().lower().startswith("activestate="):
+                    active = line.split("=", 1)[1].strip().lower() == "active"
+                    break
+            if active is None and "active (running)" in text.lower():
+                active = True
+            sync_status = "unknown"
+            if active is True:
+                sync_status = "service_active"
+            elif active is False:
+                sync_status = "service_inactive"
+
+            if self.sync_status == 'unknown':
+                self.sync_status = sync_status or self.sync_status
+            self.extras['systemctl'] = text[:2000]
+            self.observation_sources.append("systemctl_timesyncd")
+
+
+    class CollectConfig(CollectMixin.CollectConfig):
+        """
+        Attributes:
+            probe_id: stable probe_id slug (e.g. local-chrony)
+            ip_address: Host or IP address for Probe (default '127.0.0.1')
+            port: UDP port for remote mode (use high ports for lab mocks)
+            output_dir: When provided, will save collected data as a file to provided directory. Filename will be automatically generated as ntp_{ip_address}_{probe_id}_{ts.strftime('%Y%m%dT%H%M%SZ')}.json
+            load: Whether to load collected data directly to the database
+            duration: Number of seconds to collect data for
+            mode: Collect remote or local NTP. Default is 'local'.
+            interval: Seconds between samples; 0 = single sample and exit
+            duration: Samples to collect when interval > 0
+            timeout: UDP request timeout for remote mode(seconds) default: 3.0
         """
+        ip_address: str = '127.0.0.1'
+        port: Optional[int] = None
+        mode: Literal['remote', 'local'] = 'local'
+        interval: float = 0.0
+        duration: int = 1
+        timeout: float = 3.0
 
+    @classmethod
+    def get_collect_cli_options(cls):
+        return [
+            from_pydantic(cls.CollectConfig, rename={'ip_address': 'host'}),
+            click.pass_context,
+        ]
 
     def __init__(self, input_file: str, **kwargs):
         """Initialize NtpProbe2 from input file"""
@@ -90,6 +324,88 @@ def process_time_data(self) -> pd.DataFrame:
         # Ensure the format it is reading in matches that in save_to_file
         raise NotImplementedError
 
+    @staticmethod
+    def _run(cmd: list[str], timeout: float=8.0) -> Optional[str]:
+        """Run command; return stdout or None if missing/failed."""
+        bin0 = cmd[0]
+        if shutil.which(bin0) is None:
+            return None
+        try:
+            proc = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                check=False,
+            )
+        except (OSError, subprocess.SubprocessError) as e:
+            logger.debug(f"ntp local: command {cmd!r} failed: {e}")
+            return None
+        if proc.returncode != 0:
+            logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}")
+            return None
+        return proc.stdout or ""
+
+    def collect_local(self, collect_config: CollectConfig) -> CollectMixin.CollectArtifact:
+        merged = self.NTPMetadata(mode='local', probe_name=collect_config.probe_id)
+        t = self._run(["chronyc", "tracking"])
+        if t:
+            merged.parse_chronyc_tracking(t)
+
+        t = self._run(["chronyc", "sources", "-v"]) or self._run(["chronyc", "sources"])
+        if t:
+            merged.parse_chronyc_sources(t)
+
+        if merged.offset_last_s is None and merged.stratum is None:
+            t = self._run(["ntpq", "-pn"]) or self._run(["ntpq", "-p"])
+            if t:
+                merged.parse_ntpq(t)
+
+        t = self._run(["timedatectl", "show-timesync", "--all"]) or self._run(["timedatectl", "status"])
+        if t:
+            merged.parse_timedatectl(t)
+
+        t = self._run(["systemctl", "show", "systemd-timesyncd", "--property=ActiveState"])
+        if not t:
+            t = self._run(["systemctl", "status", "systemd-timesyncd", "--no-pager"])
+
+        if t:
+            merged.parse_systemctl_show(t)
+
+        if not merged.observation_sources:
+            merged.observation_source = ['none']
+
+        now = datetime.now(tz=timezone.utc)
+
+
+
+        row = merged.model_dump(
+            include={'offset_last_s', 'delay_s', 'jitter_s', 'stratum', 'reachability', 'dispersion_s',
+                     'root_delay_s', 'root_dispersion_s', 'poll_interval_s'})
+        row['sync_health'] = 1.0 if merged.sync_status in ("tracking", "synchronized", "synced") else 0.0
+        meta = merged.model_dump(exclude_none=True)
+        if merged.reference_id:
+            reference_type = REF_TYPES.PROBE
+            compound_reference = merged.reference_id
+        else:
+            reference_type = REF_TYPES.UNKNOWN
+            compound_reference = None
+
+        artifacts: list[CollectMixin.DataArtifact] = []
+        for k, v in row.items():
+            value = pd.DataFrame([(now, v)], columns=['time', 'value'], dtype={'time': datetime64_dtype})
+            metric = self.metric_map.get(k, None)
+            if not metric:
+                logger.warning(f'No metric mapping found for {k}')
+                continue
+            artifacts.append(CollectMixin.DataArtifact(metric=metric,
+                                                       reference_type=reference_type,
+                                                       compound_reference=compound_reference,
+                                                       value=value))
+
+        return CollectMixin.CollectArtifact(data=artifacts, metadata=meta)
+
+
     @classmethod
     def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact:
         """

From 882569dc3ce2e0418490180b54155d1a444679b6 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Mon, 13 Apr 2026 17:51:37 -0400
Subject: [PATCH 03/22] flexible

---
 opensampl/db/orm.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py
index 8c90605..08a89bb 100644
--- a/opensampl/db/orm.py
+++ b/opensampl/db/orm.py
@@ -181,6 +181,7 @@ class ProbeMetadata(Base):
     adva_metadata = relationship("AdvaMetadata", back_populates="probe", uselist=False)
     microchip_twst_metadata = relationship("MicrochipTWSTMetadata", back_populates="probe", uselist=False)
     microchip_tp4100_metadata = relationship("MicrochipTP4100Metadata", back_populates="probe", uselist=False)
+    ntp_metadata = relationship("NtpMetadata", back_populates="probe", uselist=False)
 
     # --- CUSTOM PROBE METADATA RELATIONSHIP ---
 
@@ -436,7 +437,7 @@ class MicrochipTP4100Metadata(Base):
 # --- CUSTOM TABLES ---      !! Do not remove line, used as reference when inserting metadata table
 class NtpMetadata2(Base):
     __tablename__ = "ntp_metadata2"
-    
+
     probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True)
     mode = Column(Text)
     probe_name = Column(Text)
@@ -454,7 +455,7 @@ class NtpMetadata2(Base):
     root_dispersion_s = Column(Float)
     poll_interval_s = Column(Float)
     reference_id = Column(Text)
-    observation_source = Column(Text)
+    observation_sources = Column(JSONB)
     collection_host = Column(Text)
     additional_metadata = Column(JSONB)
     probe = relationship("ProbeMetadata", back_populates="ntp_metadata2")

From 208c63e92b3c27a412e65cd03cc8429728b6e4ec Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 13:14:23 -0400
Subject: [PATCH 04/22] remote collect worked

---
 opensampl/db/orm.py       |   7 -
 opensampl/vendors/ntp2.py | 705 +++++++++++++++++++++++---------------
 2 files changed, 424 insertions(+), 288 deletions(-)

diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py
index 08a89bb..9927c5c 100644
--- a/opensampl/db/orm.py
+++ b/opensampl/db/orm.py
@@ -440,17 +440,10 @@ class NtpMetadata2(Base):
 
     probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True)
     mode = Column(Text)
-    probe_name = Column(Text)
     target_host = Column(Text)
     target_port = Column(Integer)
     sync_status = Column(Text)
     leap_status = Column(Text)
-    stratum = Column(Integer)
-    reachability = Column(Integer)
-    offset_last_s = Column(Float)
-    delay_s = Column(Float)
-    jitter_s = Column(Float)
-    dispersion_s = Column(Float)
     root_delay_s = Column(Float)
     root_dispersion_s = Column(Float)
     poll_interval_s = Column(Float)
diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py
index 71be6c7..a7e7cb9 100644
--- a/opensampl/vendors/ntp2.py
+++ b/opensampl/vendors/ntp2.py
@@ -1,36 +1,34 @@
 """Probe implementation for NTP2 vendor"""
+import socket
 
 import pandas as pd
 import re
 
-from pandas.conftest import datetime64_dtype
-
 from opensampl.vendors.base_probe import BaseProbe
 from opensampl.vendors.constants import ProbeKey, VENDORS
-from opensampl.references import REF_TYPES
+from opensampl.references import REF_TYPES, ReferenceType
 from opensampl.mixins.collect import CollectMixin
-from typing import Literal, Optional, Any, TypeVar
-from pydantic import model_validator, BaseModel, Field, field_serializer
+from typing import Literal, Optional, Any, TypeVar, ClassVar
+from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict
 from pydanclick import from_pydantic
 import click
 import shutil
 import subprocess
 from datetime import datetime, timezone
 from loguru import logger
-from opensampl.metrics import METRICS
-
+from opensampl.metrics import METRICS, MetricType
+import json
+import yaml
+import textwrap
+from io import StringIO
 
 T = TypeVar('T')
 def _merge(a: T | None, b: T | None) -> T | None:
     return a if a is not None else b
 
-
-class NtpProbe2(BaseProbe, CollectMixin):
-    """Probe parser for NTP2 vendor data files"""
-
-    vendor = VENDORS.NTP2
-
-    metric_map = {
+class NTPCollector(BaseModel):
+    mode: ClassVar[Literal['remote', 'local']]
+    metric_map: ClassVar[dict[str, MetricType]] = {
         "phase_offset_s": METRICS.PHASE_OFFSET,
         "delay_s": METRICS.NTP_DELAY,
         "jitter_s": METRICS.NTP_JITTER,
@@ -43,196 +41,390 @@ class NtpProbe2(BaseProbe, CollectMixin):
         "sync_health": METRICS.NTP_SYNC_HEALTH,
     }
 
-    class NTPMetadata(BaseModel):
-        mode: Literal['remote', 'local']
-
-        target_host: str = ""
-        target_port: int = 0
-
-        sync_status: str = Field("unknown", serialization_alias='sync_health')
-        leap_status: str = "unknown"
-        stratum: int | None = None
-        reachability: int | None = None
-        offset_last_s: float | None = Field(None, serialization_alias='phase_offset_s')
-        delay_s: float | None = None
-        jitter_s: float | None = None
-        dispersion_s: float | None = None
-        root_delay_s: float | None = None
-        root_dispersion_s: float | None = None
-        poll_interval_s: float | None = None
-        reference_id: str | None = None
-        observation_sources: list[str] = Field(default_factory=list)
-        collection_host: str | None = None
-
-        extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata')
-
-        def parse_chronyc_tracking(self, text: str) -> None:
-            """Parse `chronyc tracking` key: value output."""
-            out: dict[str, Any] = {}
-            for line in text.splitlines():
-                line = line.strip()
-                if not line or ":" not in line:
-                    continue
-                key, _, rest = line.partition(":")
-                key = key.strip().lower().replace(" ", "_")
-                val = rest.strip()
-                out[key] = val
-
-            # Last offset     : +0.000000123 seconds
-            m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
-            if m:
-                try:
-                    self.offset_last_s = _merge(self.offset_last_s, (m.group(1)))
-                except ValueError:
-                    pass
-            m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
-            if m:
-                try:
-                    self.jitter_s = _merge(self.jitter_s, float(m.group(1)))
-                except ValueError:
-                    pass
-            m = re.search(r"stratum\s*:\s*(\d+)", text, re.I)
+    sync_status: str = Field("unknown")
+    sync_health: float | None = Field(None, json_schema_extra={'metric': True})
+
+    stratum: float | None = Field(None, json_schema_extra={'metric': True})
+    reachability: int | None = Field(None, json_schema_extra={'metric': True})
+    offset_s: float | None = Field(None, serialization_alias='phase_offset_s', json_schema_extra={'metric': True})
+    delay_s: float | None = Field(None, json_schema_extra={'metric': True})
+    jitter_s: float | None = Field(None, json_schema_extra={'metric': True})
+    reference_id: str | None = None
+    observation_sources: list[str] = Field(default_factory=list)
+    collection_host: str = Field(default_factory=socket.gethostname)
+
+    extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata')
+    model_config = ConfigDict(serialize_by_alias=True)
+
+    def collect(self):
+        raise NotImplementedError()
+
+    def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]:
+        return REF_TYPES.UNKNOWN, None
+
+    def export_data(self) -> list[CollectMixin.DataArtifact]:
+        now = datetime.now(tz=timezone.utc)
+        include_list = {f for f, field_info
+                        in type(self).model_fields.items()
+                        if field_info.json_schema_extra and field_info.json_schema_extra.get('metric', False)}
+        reference_type, compound_reference = self.determine_reference()
+        metric_values = self.model_dump(include=include_list, exclude_none=True)
+
+        artifacts: list[CollectMixin.DataArtifact] = []
+        for m, v in metric_values.items():
+            metric = self.metric_map.get(m, None)
+            if metric is None:
+                metric = MetricType(name=m,
+                           description=f'Automatically generated metric type for {m}',
+                           value_type=object,
+                           unit="unknown")
+                logger.warning(f'Generated new metric type for {m}')
+            value = pd.DataFrame([(now, v)], columns=['time', 'value'])
+            value['time'] = pd.to_datetime(value['time'])
+
+            artifacts.append(CollectMixin.DataArtifact(metric=metric,
+                                                       reference_type=reference_type,
+                                                       compound_reference=compound_reference,
+                                                       value=value))
+        return artifacts
+
+    def export_metadata(self) -> dict[str, Any]:
+        include_list = {f for f, field_info
+                        in type(self).model_fields.items()
+                        if not field_info.json_schema_extra or not field_info.json_schema_extra.get('metric', False)}
+        meta = self.model_dump(include=include_list, exclude_none=True)
+        meta['mode'] = self.mode
+        return meta
+
+    def export(self) -> CollectMixin.CollectArtifact:
+        meta = self.export_metadata()
+
+        artifacts: list[CollectMixin.DataArtifact] = self.export_data()
+
+        return CollectMixin.CollectArtifact(data=artifacts, metadata=meta)
+
+    @classmethod
+    def invert_metric_map(cls):
+        return {v.name: k for k, v in cls.metric_map.items()}
+
+class NTPLocalCollector(NTPCollector):
+    mode: ClassVar[Literal['remote', 'local']] = 'local'
+
+    @staticmethod
+    def _run(cmd: list[str], timeout: float = 8.0) -> Optional[str]:
+        """Run command; return stdout or None if missing/failed."""
+        bin0 = cmd[0]
+        if shutil.which(bin0) is None:
+            logger.debug(f"ntp local: command {bin0!r} not found")
+            return None
+        try:
+            proc = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                check=False,
+            )
+        except (OSError, subprocess.SubprocessError) as e:
+            logger.debug(f"ntp local: command {cmd!r} failed: {e}")
+            return None
+        if proc.returncode != 0:
+            logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}")
+            return None
+        logger.debug(f'ntp local: {cmd!r} exit {proc.stdout}')
+        return proc.stdout or ""
+
+    def _parse_chronyc_tracking(self, text: str) -> None:
+        """Parse `chronyc tracking` key: value output."""
+        out: dict[str, Any] = {}
+        for line in text.splitlines():
+            line = line.strip()
+            if not line or ":" not in line:
+                continue
+            key, _, rest = line.partition(":")
+            key = key.strip().lower().replace(" ", "_")
+            val = rest.strip()
+            out[key] = val
+
+        # Last offset     : +0.000000123 seconds
+        m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
+        if m:
+            try:
+                self.offset_s = _merge(self.offset_s, (m.group(1)))
+            except ValueError:
+                pass
+        m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
+        if m:
+            try:
+                self.jitter_s = _merge(self.jitter_s, float(m.group(1)))
+            except ValueError:
+                pass
+        m = re.search(r"stratum\s*:\s*(\d+)", text, re.I)
+        if m:
+            try:
+                self.stratum = _merge(self.stratum, int(m.group(1)))
+            except ValueError:
+                pass
+        m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I)
+        if m:
+            self.reference_id = (m.group(2) or m.group(1)) or self.reference_id
+
+        self.sync_status = "unsynchronized"
+        if "normal" in text.lower() or self.offset_s is not None:
+            self.sync_status = "tracking"
+        self.extras['chronyc_raw_tracking'] = out
+        self.observation_sources.append("chronyc_tracking")
+
+    def _parse_chronyc_sources(self, text: str) -> None:
+        """Parse `chronyc sources` for reach and selected source."""
+        reach: Optional[int] = None
+        selected: Optional[str] = None
+        for line in text.splitlines():
+            line = line.strip()
+            if not line or line.startswith("MS") or line.startswith("="):
+                continue
+            # ^* or ^+ prefix indicates selected/accepted
+            if line.startswith("*") or line.startswith("+"):
+                parts = line.split()
+                if len(parts) >= 7:
+                    try:
+                        reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5])
+                    except ValueError:
+                        try:
+                            reach = int(parts[5])
+                        except ValueError:
+                            pass
+                    selected = parts[1]
+                break
+            # Fallback: last column often reach (octal)
+            parts = line.split()
+            if len(parts) >= 7 and parts[0] in ("^*", "^+", "*", "+"):
+                # already handled
+                pass
+        if reach is None:
+            # Try any line with 377 octal style
+            m = re.search(r"\b([0-7]{3})\b", text)
             if m:
                 try:
-                    self.stratum = _merge(self.stratum, int(m.group(1)))
+                    reach = int(m.group(1), 8)
                 except ValueError:
                     pass
-            m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I)
-            if m:
-                self.reference_id = (m.group(2) or m.group(1)) or self.reference_id
-
-            self.sync_status = "unsynchronized"
-            if "normal" in text.lower() or self.offset_last_s is not None:
-                self.sync_status = "tracking"
-            self.extras['chronyc_raw_tracking'] = out
-            self.observation_sources.append("chronyc_tracking")
-
-        def parse_chronyc_sources(self, text: str) -> None:
-            """Parse `chronyc sources` for reach and selected source."""
-            reach: Optional[int] = None
-            selected: Optional[str] = None
-            for line in text.splitlines():
-                line = line.strip()
-                if not line or line.startswith("MS") or line.startswith("="):
-                    continue
-                # ^* or ^+ prefix indicates selected/accepted
-                if line.startswith("*") or line.startswith("+"):
-                    parts = line.split()
-                    if len(parts) >= 7:
-                        try:
-                            reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5])
-                        except ValueError:
-                            try:
-                                reach = int(parts[5])
-                            except ValueError:
-                                pass
-                        selected = parts[1]
-                    break
-                # Fallback: last column often reach (octal)
+
+        self.reachability = self.reachability or reach
+        self.reference_id = self.reference_id or selected
+        self.observation_sources.append( "chronyc_sources")
+
+    def _parse_ntpq(self, text: str) -> None:
+        """Parse `ntpq -p` / `ntpq -pn` output."""
+        offset_s: Optional[float] = None
+        delay_s: Optional[float] = None
+        jitter_s: Optional[float] = None
+        stratum: Optional[int] = None
+        reach: Optional[int] = None
+        ref = None
+        for line in text.splitlines():
+            line = line.strip()
+            if not line or line.startswith("remote") or line.startswith("="):
+                continue
+            if line.startswith("*") or line.startswith("+") or line.startswith("-"):
                 parts = line.split()
-                if len(parts) >= 7 and parts[0] in ("^*", "^+", "*", "+"):
-                    # already handled
-                    pass
-            if reach is None:
-                # Try any line with 377 octal style
-                m = re.search(r"\b([0-7]{3})\b", text)
-                if m:
+                # remote refid st t when poll reach delay offset jitter
+                if len(parts) >= 10:
                     try:
-                        reach = int(m.group(1), 8)
+                        stratum = int(parts[2])
                     except ValueError:
                         pass
-
-            self.reachability = self.reachability or reach
-            self.reference_id = self.reference_id or selected
-            self.observation_sources.append( "chronyc_sources")
-
-        def parse_ntpq(self, text: str) -> None:
-            """Parse `ntpq -p` / `ntpq -pn` output."""
-            offset_s: Optional[float] = None
-            delay_s: Optional[float] = None
-            jitter_s: Optional[float] = None
-            stratum: Optional[int] = None
-            reach: Optional[int] = None
-            ref = None
-            for line in text.splitlines():
-                line = line.strip()
-                if not line or line.startswith("remote") or line.startswith("="):
-                    continue
-                if line.startswith("*") or line.startswith("+") or line.startswith("-"):
-                    parts = line.split()
-                    # remote refid st t when poll reach delay offset jitter
-                    if len(parts) >= 10:
+                    try:
+                        delay_s = float(parts[7]) / 1000.0  # ms -> s
+                        offset_s = float(parts[8]) / 1000.0
+                        jitter_s = float(parts[9]) / 1000.0
+                    except (ValueError, IndexError):
+                        pass
+                    try:
+                        reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6])
+                    except ValueError:
                         try:
-                            stratum = int(parts[2])
+                            reach = int(parts[6])
                         except ValueError:
                             pass
-                        try:
-                            delay_s = float(parts[7]) / 1000.0  # ms -> s
-                            offset_s = float(parts[8]) / 1000.0
-                            jitter_s = float(parts[9]) / 1000.0
-                        except (ValueError, IndexError):
-                            pass
-                        try:
-                            reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6])
-                        except ValueError:
-                            try:
-                                reach = int(parts[6])
-                            except ValueError:
-                                pass
-                        ref = parts[1]
-                    break
-            sync_status = "synced" if offset_s is not None else "unknown"
-
-            self.offset_last_s = self.offset_last_s or offset_s
-            self.delay_s = self.delay_s or delay_s
-            self.jitter_s = self.jitter_s or jitter_s
-            self.stratum = self.stratum or stratum
-            self.reachability = self.reachability or reach
-            self.reference_id = self.reference_id or ref
+                    ref = parts[1]
+                break
+        sync_status = "synced" if offset_s is not None else "unknown"
+
+        self.offset_s = self.offset_s or offset_s
+        self.delay_s = self.delay_s or delay_s
+        self.jitter_s = self.jitter_s or jitter_s
+        self.stratum = self.stratum or stratum
+        self.reachability = self.reachability or reach
+        self.reference_id = self.reference_id or ref
+        self.sync_status = sync_status or self.sync_status
+        self.observation_sources.append("ntpq")
+
+    def _parse_timedatectl(self, text: str) -> None:
+        """Parse `timedatectl status` / `show-timesync --all`."""
+        sync = None
+        for line in text.splitlines():
+            low = line.lower()
+            if "system clock synchronized" in low or "ntp synchronized" in low:
+                if "yes" in low:
+                    sync = True
+                elif "no" in low:
+                    sync = False
+        sync_status = "unknown"
+        if sync is True:
+            sync_status = "synchronized"
+        elif sync is False:
+            sync_status = "unsynchronized"
+
+        if self.sync_status == 'unknown':
+            self.sync_status = sync_status or self.sync_status
+        self.observation_sources.append("timedatectl")
+        self.extras['timedatectl'] = text[:2000]
+
+    def _parse_systemctl_show(self, text: str) -> None:
+        """Parse `systemctl show` / `systemctl status` for systemd-timesyncd."""
+        active = None
+        for line in text.splitlines():
+            if line.strip().lower().startswith("activestate="):
+                active = line.split("=", 1)[1].strip().lower() == "active"
+                break
+        if active is None and "active (running)" in text.lower():
+            active = True
+        sync_status = "unknown"
+        if active is True:
+            sync_status = "service_active"
+        elif active is False:
+            sync_status = "service_inactive"
+
+        if self.sync_status == 'unknown':
             self.sync_status = sync_status or self.sync_status
-            self.observation_sources.append("ntpq")
-
-        def parse_timedatectl(self, text: str) -> None:
-            """Parse `timedatectl status` / `show-timesync --all`."""
-            sync = None
-            for line in text.splitlines():
-                low = line.lower()
-                if "system clock synchronized" in low or "ntp synchronized" in low:
-                    if "yes" in low:
-                        sync = True
-                    elif "no" in low:
-                        sync = False
-            sync_status = "unknown"
-            if sync is True:
-                sync_status = "synchronized"
-            elif sync is False:
-                sync_status = "unsynchronized"
-
-            if self.sync_status == 'unknown':
-                self.sync_status = sync_status or self.sync_status
-            self.observation_sources.append("timedatectl")
-            self.extras['timedatectl'] = text[:2000]
-
-        def parse_systemctl_show(self, text: str) -> None:
-            """Parse `systemctl show` / `systemctl status` for systemd-timesyncd."""
-            active = None
-            for line in text.splitlines():
-                if line.strip().lower().startswith("activestate="):
-                    active = line.split("=", 1)[1].strip().lower() == "active"
-                    break
-            if active is None and "active (running)" in text.lower():
-                active = True
-            sync_status = "unknown"
-            if active is True:
-                sync_status = "service_active"
-            elif active is False:
-                sync_status = "service_inactive"
-
-            if self.sync_status == 'unknown':
-                self.sync_status = sync_status or self.sync_status
-            self.extras['systemctl'] = text[:2000]
-            self.observation_sources.append("systemctl_timesyncd")
+        self.extras['systemctl'] = text[:2000]
+        self.observation_sources.append("systemctl_timesyncd")
+
+    def collect(self):
+        t = self._run(["chronyc", "tracking"])
+        if t:
+            self._parse_chronyc_tracking(t)
+
+        t = self._run(["chronyc", "sources", "-v"]) or self._run(["chronyc", "sources"])
+        if t:
+            self._parse_chronyc_sources(t)
+
+        if self.offset_s is None and self.stratum is None:
+            t = self._run(["ntpq", "-pn"]) or self._run(["ntpq", "-p"])
+            if t:
+                self._parse_ntpq(t)
+
+        t = self._run(["timedatectl", "show-timesync", "--all"]) or self._run(["timedatectl", "status"])
+        if t:
+            self._parse_timedatectl(t)
+
+        t = self._run(["systemctl", "show", "systemd-timesyncd", "--property=ActiveState"])
+        if not t:
+            t = self._run(["systemctl", "status", "systemd-timesyncd", "--no-pager"])
+
+        if t:
+            self._parse_systemctl_show(t)
+
+        if not self.observation_sources:
+            self.observation_sources = ['none']
+
+        self.sync_health = 1.0 if self.sync_status in ("tracking", "synchronized", "synced") else 0.0
+
+    def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]:
+        if self.reference_id:
+            reference_type = REF_TYPES.PROBE
+            compound_reference = self.reference_id
+        else:
+            reference_type = REF_TYPES.UNKNOWN
+            compound_reference = None
+        return reference_type, compound_reference
+
+class NTPRemoteCollector(NTPCollector):
+    mode: ClassVar[Literal['remote', 'local']] = 'remote'
+
+    target_host: str
+    target_port: int
+    timeout: float = 3.0
+
+    root_delay_s: float | None = Field(None, json_schema_extra={'metric': True})
+    root_dispersion_s: float | None = Field(None, json_schema_extra={'metric': True})
+    poll_interval_s: float | None = Field(None, json_schema_extra={'metric': True})
+    leap_status: str = "unknown"
+
+    def configure_failure(self, e):
+        self.sync_status = 'unreachable'
+        self.sync_health = 0
+        self.extras['error'] = str(e)
+        self.observation_sources.append("ntplib")
+        self.observation_sources.append("error")
+
+    def _estimate_jitter_s(self) -> None:
+        """
+        Single NTP client response does not include RFC5905 peer jitter (that needs multiple samples).
+
+        Emit a conservative positive bound from round-trip delay and root dispersion so downstream
+        ``NTP Jitter`` metrics and dashboards have a value; chrony/ntpq local paths still supply true jitter when available.
+        """
+        if self.delay_s is None and self.root_dispersion_s is None:
+            return
+        d = float(self.delay_s) if self.delay_s is not None else 0.0
+        r = float(self.root_dispersion_s) if self.root_dispersion_s is not None else 0.0
+        est = 0.05 * d + 0.25 * r
+        if est > 0:
+            self.jitter_s = est
+        return
+
+    def collect(self):
+        try:
+            import ntplib  # type: ignore[import-untyped]
+        except ImportError as e:
+            raise ImportError(
+                "Remote NTP collection requires the 'ntplib' package (install opensampl[collect]).") from e
+        client = ntplib.NTPClient()
+        try:
+            resp = client.request(self.target_host, port=self.target_port, version=3, timeout=self.timeout)
+        except Exception as e:
+            logger.warning(f"NTP request to {self.target_host}:{self.target_port} failed: {e}")
+            self.configure_failure(e)
+            return
 
+        leap = int(resp.leap)
+        leap_map = {0: "no_warning", 1: "add_second", 2: "del_second", 3: "alarm"}
+        self.leap_status = leap_map.get(leap, str(leap))
+
+        stratum = int(resp.stratum)
+
+        try:
+            self.poll_interval_s = float(2 ** int(resp.poll))
+        except (TypeError, ValueError, OverflowError):
+            logger.debug(f'No poll interval determined')
+
+        self.root_delay_s = float(resp.root_delay) if resp.root_delay is not None else None
+        self.root_dispersion_s = float(resp.root_dispersion) if resp.root_dispersion is not None else None
+        self.delay_s = float(resp.delay) if resp.delay is not None else None
+        self.offset_s = float(resp.offset) if resp.offset is not None else None
+
+        ref_id = getattr(resp, "ref_id", None)
+        if hasattr(ref_id, "decode"):
+            try:
+                ref_id = ref_id.decode("ascii", errors="replace")
+            except Exception:
+                ref_id = str(ref_id)
+        self.reference_id = str(ref_id) if ref_id is not None else None
+
+        sync_ok = stratum < 16 and self.offset_s is not None
+        self.observation_sources.append("ntplib")
+        self.sync_status = "synchronized" if sync_ok else "unsynchronized"
+        self.sync_health = 1.0 if sync_ok else 0.0
+        self._estimate_jitter_s()
+
+        self.extras['version'] = getattr(resp, 'version', None)
+
+class NtpProbe2(BaseProbe, CollectMixin):
+    """Probe parser for NTP2 vendor data files"""
+
+    vendor = VENDORS.NTP2
 
     class CollectConfig(CollectMixin.CollectConfig):
         """
@@ -281,7 +473,7 @@ def process_metadata(self) -> dict:
 		 'leap_status',
 		 'stratum',
 		 'reachability',
-		 'offset_last_s',
+		 'offset_s',
 		 'delay_s',
 		 'jitter_s',
 		 'dispersion_s',
@@ -324,87 +516,6 @@ def process_time_data(self) -> pd.DataFrame:
         # Ensure the format it is reading in matches that in save_to_file
         raise NotImplementedError
 
-    @staticmethod
-    def _run(cmd: list[str], timeout: float=8.0) -> Optional[str]:
-        """Run command; return stdout or None if missing/failed."""
-        bin0 = cmd[0]
-        if shutil.which(bin0) is None:
-            return None
-        try:
-            proc = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=timeout,
-                check=False,
-            )
-        except (OSError, subprocess.SubprocessError) as e:
-            logger.debug(f"ntp local: command {cmd!r} failed: {e}")
-            return None
-        if proc.returncode != 0:
-            logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}")
-            return None
-        return proc.stdout or ""
-
-    def collect_local(self, collect_config: CollectConfig) -> CollectMixin.CollectArtifact:
-        merged = self.NTPMetadata(mode='local', probe_name=collect_config.probe_id)
-        t = self._run(["chronyc", "tracking"])
-        if t:
-            merged.parse_chronyc_tracking(t)
-
-        t = self._run(["chronyc", "sources", "-v"]) or self._run(["chronyc", "sources"])
-        if t:
-            merged.parse_chronyc_sources(t)
-
-        if merged.offset_last_s is None and merged.stratum is None:
-            t = self._run(["ntpq", "-pn"]) or self._run(["ntpq", "-p"])
-            if t:
-                merged.parse_ntpq(t)
-
-        t = self._run(["timedatectl", "show-timesync", "--all"]) or self._run(["timedatectl", "status"])
-        if t:
-            merged.parse_timedatectl(t)
-
-        t = self._run(["systemctl", "show", "systemd-timesyncd", "--property=ActiveState"])
-        if not t:
-            t = self._run(["systemctl", "status", "systemd-timesyncd", "--no-pager"])
-
-        if t:
-            merged.parse_systemctl_show(t)
-
-        if not merged.observation_sources:
-            merged.observation_source = ['none']
-
-        now = datetime.now(tz=timezone.utc)
-
-
-
-        row = merged.model_dump(
-            include={'offset_last_s', 'delay_s', 'jitter_s', 'stratum', 'reachability', 'dispersion_s',
-                     'root_delay_s', 'root_dispersion_s', 'poll_interval_s'})
-        row['sync_health'] = 1.0 if merged.sync_status in ("tracking", "synchronized", "synced") else 0.0
-        meta = merged.model_dump(exclude_none=True)
-        if merged.reference_id:
-            reference_type = REF_TYPES.PROBE
-            compound_reference = merged.reference_id
-        else:
-            reference_type = REF_TYPES.UNKNOWN
-            compound_reference = None
-
-        artifacts: list[CollectMixin.DataArtifact] = []
-        for k, v in row.items():
-            value = pd.DataFrame([(now, v)], columns=['time', 'value'], dtype={'time': datetime64_dtype})
-            metric = self.metric_map.get(k, None)
-            if not metric:
-                logger.warning(f'No metric mapping found for {k}')
-                continue
-            artifacts.append(CollectMixin.DataArtifact(metric=metric,
-                                                       reference_type=reference_type,
-                                                       compound_reference=compound_reference,
-                                                       value=value))
-
-        return CollectMixin.CollectArtifact(data=artifacts, metadata=meta)
-
 
     @classmethod
     def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact:
@@ -422,17 +533,49 @@ class CollectArtifact(BaseModel):
 
             define logic for the save_to_file as well.
         """
-        # TODO: implement the logic for creating a CollectArtifact, as above.
-        #
-
-        raise NotImplementedError
+        collector = None
+        if collect_config.mode == 'local':
+            collector = NTPLocalCollector()
+        elif collect_config.mode == 'remote':
+            collector = NTPRemoteCollector(target_host=collect_config.ip_address,
+                                           target_port=collect_config.port,
+                                           timeout=collect_config.timeout)
+        if collector is None:
+            raise ValueError('Could not determine mode from collect_config')
+        collector.collect()
+
+        return collector.export()
 
     @classmethod
     def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str:
-        # TODO: Create the str content for an output file. Ensure readable by parse functions & that required metadata is available
-        #  Filename will be automatically generated as {ip_address}_{probe_id}_{vendor}_{timestamp}.txt and saved to directory provided by cli
-        raise NotImplementedError
-
+        single_reference = collected.single_reference
+        first_data = next(iter(collected.data or []), None)
+        if not single_reference:
+            collected.metadata['reference'] = 'varied'
+        elif first_data and first_data.compound_reference:
+            collected.metadata['reference'] = json.dumps(collected.single_reference)
+
+        metric_names = NTPCollector.invert_metric_map()
+        dfs = []
+        for d in collected.data or []:
+            df = d.value
+            df['metric'] = metric_names.get(d.metric.name, d.metric.name.lower().replace(' ', '_'))
+            if not single_reference:
+                df['reference'] = json.dumps(d.compound_reference)
+            dfs.append(df)
+        value_df = pd.concat(dfs) if dfs else None
+
+        header = yaml.dump(collected.metadata, sort_keys=False)
+        header = textwrap.indent(header, prefix='# ')
+        buffer = StringIO()
+        buffer.write(header)
+        buffer.write('\n')
+
+        if value_df is not None:
+            # write dataframe
+            value_df.to_csv(buffer, index=False)
+
+        return buffer.getvalue()
 
 
 

From 99f40334f81f08eac8c20d2f2c40e36772f980a8 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 15:34:30 -0400
Subject: [PATCH 05/22] ok loading works

---
 opensampl/db/orm.py       |  10 +-
 opensampl/load_data.py    |   6 +-
 opensampl/vendors/ntp2.py | 206 +++++++++++++++++++++++---------------
 3 files changed, 137 insertions(+), 85 deletions(-)

diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py
index 9927c5c..6cc4631 100644
--- a/opensampl/db/orm.py
+++ b/opensampl/db/orm.py
@@ -181,7 +181,7 @@ class ProbeMetadata(Base):
     adva_metadata = relationship("AdvaMetadata", back_populates="probe", uselist=False)
     microchip_twst_metadata = relationship("MicrochipTWSTMetadata", back_populates="probe", uselist=False)
     microchip_tp4100_metadata = relationship("MicrochipTP4100Metadata", back_populates="probe", uselist=False)
-    ntp_metadata = relationship("NtpMetadata", back_populates="probe", uselist=False)
+    ntp_metadata2 = relationship("NtpMetadata2", back_populates="probe", uselist=False)
 
     # --- CUSTOM PROBE METADATA RELATIONSHIP ---
 
@@ -440,16 +440,16 @@ class NtpMetadata2(Base):
 
     probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True)
     mode = Column(Text)
+    reference = Column(Boolean, comment="Is used as a reference for other probes")
     target_host = Column(Text)
     target_port = Column(Integer)
     sync_status = Column(Text)
     leap_status = Column(Text)
-    root_delay_s = Column(Float)
-    root_dispersion_s = Column(Float)
-    poll_interval_s = Column(Float)
     reference_id = Column(Text)
     observation_sources = Column(JSONB)
-    collection_host = Column(Text)
+    collection_id = Column(Text)
+    collection_ip = Column(Text)
+    timeout = Column(Float)
     additional_metadata = Column(JSONB)
     probe = relationship("ProbeMetadata", back_populates="ntp_metadata2")
 
diff --git a/opensampl/load_data.py b/opensampl/load_data.py
index f427167..fce9864 100644
--- a/opensampl/load_data.py
+++ b/opensampl/load_data.py
@@ -125,9 +125,10 @@ def load_time_data(
             strict=strict,
             session=session,
         )
+        probe = data_definition.probe  # ty: ignore[possibly-unbound-attribute]
         probe_readable = (
-            data_definition.probe.name  # ty: ignore[possibly-unbound-attribute]
-            or f"{data_definition.probe.ip_address} ({data_definition.probe.probe_id})"  # ty: ignore[possibly-unbound-attribute]
+            probe.name
+            or f"{probe.ip_address} ({probe.probe_id})"  # ty: ignore[possibly-unbound-attribute]
         )
 
         if any(x is None for x in [data_definition.probe, data_definition.metric, data_definition.reference]):
@@ -227,6 +228,7 @@ def create_new_tables(*, _config: BaseConfig, create_schema: bool = True, sessio
             session.execute(text(f"CREATE SCHEMA IF NOT EXISTS {Base.metadata.schema}"))
             session.commit()
         Base.metadata.create_all(session.bind)
+        session.commit()
     except Exception as e:
         session.rollback()
         logger.error(f"Error writing to table: {e}")
diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py
index a7e7cb9..b21e512 100644
--- a/opensampl/vendors/ntp2.py
+++ b/opensampl/vendors/ntp2.py
@@ -3,13 +3,15 @@
 
 import pandas as pd
 import re
-
+import time
+import requests
+from opensampl.load_data import load_probe_metadata
 from opensampl.vendors.base_probe import BaseProbe
 from opensampl.vendors.constants import ProbeKey, VENDORS
 from opensampl.references import REF_TYPES, ReferenceType
 from opensampl.mixins.collect import CollectMixin
 from typing import Literal, Optional, Any, TypeVar, ClassVar
-from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict
+from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict, field_validator
 from pydanclick import from_pydantic
 import click
 import shutil
@@ -21,6 +23,10 @@
 import yaml
 import textwrap
 from io import StringIO
+import psycopg2.errors
+
+from sqlalchemy.exc import IntegrityError
+
 
 T = TypeVar('T')
 def _merge(a: T | None, b: T | None) -> T | None:
@@ -41,6 +47,8 @@ class NTPCollector(BaseModel):
         "sync_health": METRICS.NTP_SYNC_HEALTH,
     }
 
+    target_host: str
+
     sync_status: str = Field("unknown")
     sync_health: float | None = Field(None, json_schema_extra={'metric': True})
 
@@ -51,7 +59,9 @@ class NTPCollector(BaseModel):
     jitter_s: float | None = Field(None, json_schema_extra={'metric': True})
     reference_id: str | None = None
     observation_sources: list[str] = Field(default_factory=list)
-    collection_host: str = Field(default_factory=socket.gethostname)
+    collection_id: str
+    collection_ip: str
+    probe_id: str | None = None
 
     extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata')
     model_config = ConfigDict(serialize_by_alias=True)
@@ -59,9 +69,6 @@ class NTPCollector(BaseModel):
     def collect(self):
         raise NotImplementedError()
 
-    def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]:
-        return REF_TYPES.UNKNOWN, None
-
     def export_data(self) -> list[CollectMixin.DataArtifact]:
         now = datetime.now(tz=timezone.utc)
         include_list = {f for f, field_info
@@ -107,6 +114,9 @@ def export(self) -> CollectMixin.CollectArtifact:
     def invert_metric_map(cls):
         return {v.name: k for k, v in cls.metric_map.items()}
 
+    def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]:
+        return REF_TYPES.PROBE, {'ip_address': self.collection_ip, 'probe_id': self.collection_id}
+
 class NTPLocalCollector(NTPCollector):
     mode: ClassVar[Literal['remote', 'local']] = 'local'
 
@@ -331,19 +341,12 @@ def collect(self):
 
         self.sync_health = 1.0 if self.sync_status in ("tracking", "synchronized", "synced") else 0.0
 
-    def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]:
-        if self.reference_id:
-            reference_type = REF_TYPES.PROBE
-            compound_reference = self.reference_id
-        else:
-            reference_type = REF_TYPES.UNKNOWN
-            compound_reference = None
-        return reference_type, compound_reference
+        if self.probe_id is None:
+            self.probe_id = 'ntp-local'
 
 class NTPRemoteCollector(NTPCollector):
     mode: ClassVar[Literal['remote', 'local']] = 'remote'
 
-    target_host: str
     target_port: int
     timeout: float = 3.0
 
@@ -388,7 +391,7 @@ def collect(self):
             logger.warning(f"NTP request to {self.target_host}:{self.target_port} failed: {e}")
             self.configure_failure(e)
             return
-
+        from pprint import pformat
         leap = int(resp.leap)
         leap_map = {0: "no_warning", 1: "add_second", 2: "del_second", 3: "alarm"}
         self.leap_status = leap_map.get(leap, str(leap))
@@ -421,6 +424,25 @@ def collect(self):
 
         self.extras['version'] = getattr(resp, 'version', None)
 
+        if self.probe_id is None:
+            self.probe_id = f'remote:{self.target_port}'
+
+def collect_ip_factory():
+    s = None
+    try:
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.connect(("8.8.8.8", 80))  # doesn't actually send data
+        v = s.getsockname()[0]
+    except:
+        v = '127.0.0.1'
+    finally:
+        if s:
+            s.close()
+    return v
+
+def collect_id_factory():
+    return socket.gethostname() or 'collection-host'
+
 class NtpProbe2(BaseProbe, CollectMixin):
     """Probe parser for NTP2 vendor data files"""
 
@@ -439,6 +461,8 @@ class CollectConfig(CollectMixin.CollectConfig):
             interval: Seconds between samples; 0 = single sample and exit
             duration: Samples to collect when interval > 0
             timeout: UDP request timeout for remote mode(seconds) default: 3.0
+            collection_ip: Override for the IP address of device collecting readings. Will attempt to resolve a local network IP using socket and fall back to '127.0.0.1'
+            collection_id: Override for the Probe ID of the device collecting readings. Will attempt to resolve using socket.gethostname and fall back to 'collection-host'
         """
         ip_address: str = '127.0.0.1'
         port: Optional[int] = None
@@ -446,56 +470,50 @@ class CollectConfig(CollectMixin.CollectConfig):
         interval: float = 0.0
         duration: int = 1
         timeout: float = 3.0
+        collection_ip: str = Field(default_factory=collect_ip_factory)
+        collection_id: str = Field(default_factory=collect_id_factory)
 
     @classmethod
     def get_collect_cli_options(cls):
         return [
-            from_pydantic(cls.CollectConfig, rename={'ip_address': 'host'}),
+            from_pydantic(cls.CollectConfig, rename={'ip_address': 'host', 'duration': 'count'}),
             click.pass_context,
         ]
 
     def __init__(self, input_file: str, **kwargs):
         """Initialize NtpProbe2 from input file"""
         super().__init__(input_file)
-        # TODO: parse self.input_file to extract self.probe_key
-        # self.probe_key = ProbeKey(probe_id=..., ip_address=...)
+        self.collection_probe = None
 
     def process_metadata(self) -> dict:
         """
         Parse and return probe metadata from input file.
 
-        Expected metadata fields:
-		['mode',
-		 'probe_name',
-		 'target_host',
-		 'target_port',
-		 'sync_status',
-		 'leap_status',
-		 'stratum',
-		 'reachability',
-		 'offset_s',
-		 'delay_s',
-		 'jitter_s',
-		 'dispersion_s',
-		 'root_delay_s',
-		 'root_dispersion_s',
-		 'poll_interval_s',
-		 'reference_id',
-		 'observation_source',
-		 'collection_host',
-		 'additional_metadata']
-
         Returns:
             dict with metadata field names as keys
         """
-        # TODO: implement metadata parsing
-        # return {
-        #     "field_name": value,
-        #     ...
-        # }
-        raise NotImplementedError
-
-    def process_time_data(self) -> pd.DataFrame:
+        if not self.metadata_parsed:
+            header_lines = []
+            with self.input_file.open() as f:
+                for line in f:
+                    if line.startswith("#"):
+                        header_lines.append(line[2:])
+                    else:
+                        break
+
+            header_str = "".join(header_lines)
+            self.metadata = yaml.safe_load(header_str)
+            self.collection_probe = ProbeKey(ip_address=self.metadata.get('collection_ip'),
+                                        probe_id=self.metadata.get('collection_id'))
+            load_probe_metadata(vendor=self.vendor,
+                                probe_key=self.collection_probe,
+                                data={'reference': True, })
+            self.probe_key = ProbeKey(ip_address=self.metadata.get('target_host'), probe_id=self.metadata.get('probe_id'))
+            self.metadata_parsed = True
+
+        return self.metadata
+
+    def process_time_data(self) -> None:
         """
         Parse and load time series data from self.input_file.
 
@@ -506,16 +524,37 @@ def process_time_data(self) -> pd.DataFrame:
                 - time (datetime64[ns]): timestamp for each measurement
                 - value (float64): measured value at each timestamp
 
-
         """
-        # TODO: implement time data parsing and call self.send_time_data(df, reference_type)
-        #                                       or self.send_data(df, metric_type, reference_type)
-        # df = pd.DataFrame({"time": [...], "value": [...]})
-        # self.send_time_data(df, reference_type=...)
-
-        # Ensure the format it is reading in matches that in save_to_file
-        raise NotImplementedError
-
+        raw_df = pd.read_csv(
+            self.input_file,
+            comment="#",
+        )
+        self.process_metadata()
+
+        reference_type = REF_TYPES.PROBE
+        grouped_dfs: dict[str, pd.DataFrame] = {str(metric): group.reset_index(drop=True) for metric, group in raw_df.groupby('metric')}
+        for metr, df in grouped_dfs.items():
+            metric = NTPCollector.metric_map.get(metr)
+            if not metric:
+                logger.warning(f"Metric {metr} is not supported for NTP. Will not ingest {len(df)} rows")
+                continue
+            try:
+                self.send_data(data=df,
+                               metric=metric,
+                               reference_type=reference_type,
+                               compound_reference=self.collection_probe.model_dump())
+            except requests.HTTPError as e:
+                resp = e.response
+                if resp is None:
+                    raise
+                status_code = resp.status_code
+                if status_code == 409:
+                    logger.info(f"{metr} against {self.collection_probe} already loaded for time frame, continuing..")
+                    continue
+                raise
+            except IntegrityError as e:
+                if isinstance(e.orig, psycopg2.errors.UniqueViolation):  # ty: ignore[unresolved-attribute]
+                    logger.info(f"{metr} against {self.collection_probe} already loaded for time frame already loaded for time frame, continuing..")
 
     @classmethod
     def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact:
@@ -533,35 +572,46 @@ class CollectArtifact(BaseModel):
 
             define logic for the save_to_file as well.
         """
-        collector = None
-        if collect_config.mode == 'local':
-            collector = NTPLocalCollector()
-        elif collect_config.mode == 'remote':
-            collector = NTPRemoteCollector(target_host=collect_config.ip_address,
-                                           target_port=collect_config.port,
-                                           timeout=collect_config.timeout)
-        if collector is None:
-            raise ValueError('Could not determine mode from collect_config')
-        collector.collect()
-
-        return collector.export()
+        collector_overrides = collect_config.model_dump(include=['collection_ip', 'collection_id', 'probe_id'], exclude_none=True)
+
+        def collect_once() -> CollectMixin.CollectArtifact:
+            collector = None
+            if collect_config.mode == 'local':
+                collector = NTPLocalCollector(target_host=collect_config.ip_address,
+                                              **collector_overrides)
+            elif collect_config.mode == 'remote':
+                collector = NTPRemoteCollector(target_host=collect_config.ip_address,
+                                               target_port=collect_config.port,
+                                               timeout=collect_config.timeout, **collector_overrides)
+            if collector is None:
+                raise ValueError('Could not determine mode from collect_config')
+            collector.collect()
+
+            return collector.export()
+
+        if collect_config.interval <= 0:
+            return collect_once()
+
+        artifact = None
+        for _ in range(max(collect_config.duration, 1)):
+            newer = collect_once()
+            if artifact is None:
+                artifact = newer
+            else:
+                artifact.data.extend(newer.data)
+                artifact.metadata |= newer.metadata
+
+            time.sleep(collect_config.interval)
+
+        return artifact
 
     @classmethod
     def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str:
-        single_reference = collected.single_reference
-        first_data = next(iter(collected.data or []), None)
-        if not single_reference:
-            collected.metadata['reference'] = 'varied'
-        elif first_data and first_data.compound_reference:
-            collected.metadata['reference'] = json.dumps(collected.single_reference)
-
         metric_names = NTPCollector.invert_metric_map()
         dfs = []
         for d in collected.data or []:
             df = d.value
             df['metric'] = metric_names.get(d.metric.name, d.metric.name.lower().replace(' ', '_'))
-            if not single_reference:
-                df['reference'] = json.dumps(d.compound_reference)
             dfs.append(df)
         value_df = pd.concat(dfs) if dfs else None
 

From d20bd097f6a8e15f986f45d107596be9a418dad7 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 15:39:45 -0400
Subject: [PATCH 06/22] doing some cleaning here

---
 opensampl/config/server.py      | 12 ++++++++++++
 opensampl/vendors/base_probe.py | 12 ++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/opensampl/config/server.py b/opensampl/config/server.py
index 6478145..0e302e1 100644
--- a/opensampl/config/server.py
+++ b/opensampl/config/server.py
@@ -35,6 +35,8 @@ class ServerConfig(BaseConfig):
 
     COMPOSE_FILE: str = Field(default="", description="Fully resolved path to the Docker Compose file.")
 
+    OVERRIDE_FILE: str | None = Field(defualt=None, description="Override for the compose file")
+
     DOCKER_ENV_FILE: str = Field(default="", description="Fully resolved path to the Docker .env file.")
 
     docker_env_values: dict[str, Any] = Field(default_factory=dict, init=False)
@@ -67,6 +69,14 @@ def resolve_compose_file(cls, v: Any) -> str:
             return get_resolved_resource_path(opensampl.server, "docker-compose.yaml")
         return str(Path(v).expanduser().resolve())
 
+    @field_validator("OVERRIDE_FILE", mode="before")
+    @classmethod
+    def resolve_override_file(cls, v: Any) -> str:
+        """Resolve the provided compose file for docker to use, or default to the docker-compose.yaml provided"""
+        if v:
+            return str(Path(v).expanduser().resolve())
+        return v
+
     @field_validator("DOCKER_ENV_FILE", mode="before")
     @classmethod
     def resolve_docker_env_file(cls, v: Any) -> str:
@@ -89,6 +99,8 @@ def build_docker_compose_base(self):
         compose_command = self.get_compose_command()
         command = shlex.split(compose_command)
         command.extend(["--env-file", self.DOCKER_ENV_FILE, "-f", self.COMPOSE_FILE])
+        if self.OVERRIDE_FILE:
+            command.extend(["-f", self.OVERRIDE_FILE])
         return command
 
     def set_by_name(self, name: str, value: Any):
diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py
index 1253844..020213b 100644
--- a/opensampl/vendors/base_probe.py
+++ b/opensampl/vendors/base_probe.py
@@ -461,16 +461,16 @@ def ip_address(self):
         return self.probe_key.ip_address
 
     @abstractmethod
-    def process_time_data(self) -> pd.DataFrame:
+    def process_time_data(self) -> None:
         """
-        Process time series data.
+        Parse and load time series data from self.input_file.
 
-        Returns
-        -------
-            pd.DataFrame: DataFrame with columns:
+        Use either send_time_data (which prefills METRICS.PHASE_OFFSET)
+        or send_data and provide alternative METRICS type.
+        Both require a df as follows:
+            pd.DataFrame with columns:
                 - time (datetime64[ns]): timestamp for each measurement
                 - value (float64): measured value at each timestamp
-
         """
 
     @dualmethod

From 06a0e1bc3ed35d24ee325f30af904022d61ff3e2 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 15:48:22 -0400
Subject: [PATCH 07/22] some funkiness from calling as a class

---
 opensampl/vendors/base_probe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py
index 020213b..68ddd59 100644
--- a/opensampl/vendors/base_probe.py
+++ b/opensampl/vendors/base_probe.py
@@ -483,13 +483,13 @@ def send_data(
         probe_key: ProbeKey | None = None,
     ) -> None:
         """Ingests data into the database"""
-        if isinstance(self, BaseProbe):
+        if isinstance(self, BaseProbe) and probe_key is None:
             probe_key = self.probe_key
 
         if probe_key is None:
             raise ValueError("send data must be called with probe_key if used as class method")
 
-        if self.chunk_size:
+        if hasattr(self, 'chunk_size') and self.chunk_size:
             for chunk_start in range(0, len(data), self.chunk_size):
                 chunk = data.iloc[chunk_start : chunk_start + self.chunk_size]
                 load_time_data(

From 2a5fba1ba0f7d9bf86ec2c607db2974e91523b35 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 16:11:32 -0400
Subject: [PATCH 08/22] missed the extra metadata when loading direct (no file)

---
 opensampl/vendors/ntp2.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py
index b21e512..b63a557 100644
--- a/opensampl/vendors/ntp2.py
+++ b/opensampl/vendors/ntp2.py
@@ -513,6 +513,23 @@ def process_metadata(self) -> dict:
 
         return self.metadata
 
+    @classmethod
+    def load_metadata(cls, probe_key: ProbeKey, metadata: dict) -> None:
+        """
+        Parse and return probe metadata from input file.
+
+        Returns:
+            dict with metadata field names as keys
+        """
+        collection_probe = ProbeKey(ip_address=metadata.get('collection_ip'),
+                                    probe_id=metadata.get('collection_id'))
+        load_probe_metadata(vendor=cls.vendor,
+                            probe_key=collection_probe,
+                            data={'reference': True, })
+        load_probe_metadata(vendor=cls.vendor,
+                            probe_key=probe_key,
+                            data=metadata)
+
     def process_time_data(self) -> None:
         """
         Parse and load time series data from self.input_file.

From a5c2f2b7af7279fef961d26d2563f8b2bc49adac Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 17:02:21 -0400
Subject: [PATCH 09/22] adding random

---
 opensampl/vendors/ntp2.py | 95 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 3 deletions(-)

diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp2.py
index b63a557..00e226f 100644
--- a/opensampl/vendors/ntp2.py
+++ b/opensampl/vendors/ntp2.py
@@ -10,20 +10,23 @@
 from opensampl.vendors.constants import ProbeKey, VENDORS
 from opensampl.references import REF_TYPES, ReferenceType
 from opensampl.mixins.collect import CollectMixin
+from opensampl.mixins.random_data import RandomDataMixin
 from typing import Literal, Optional, Any, TypeVar, ClassVar
 from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict, field_validator
 from pydanclick import from_pydantic
 import click
 import shutil
 import subprocess
-from datetime import datetime, timezone
+from datetime import datetime, timezone, timedelta
 from loguru import logger
 from opensampl.metrics import METRICS, MetricType
 import json
+import random
 import yaml
 import textwrap
 from io import StringIO
 import psycopg2.errors
+import numpy as np
 
 from sqlalchemy.exc import IntegrityError
 
@@ -441,9 +444,12 @@ def collect_ip_factory():
     return v
 
 def collect_id_factory():
-    return socket.gethostname() or 'collection-host'
+    try:
+        return socket.gethostname() or 'collection-host'
+    except Exception:
+        return 'collection-host'
 
-class NtpProbe2(BaseProbe, CollectMixin):
+class NtpProbe2(BaseProbe, CollectMixin, RandomDataMixin):
     """Probe parser for NTP2 vendor data files"""
 
     vendor = VENDORS.NTP2
@@ -480,6 +486,22 @@ def get_collect_cli_options(cls):
             click.pass_context,
         ]
 
+    class RandomDataConfig(RandomDataMixin.RandomDataConfig):
+        """Random NTP-like test data."""
+
+        base_value: float = Field(
+            default_factory=lambda: random.uniform(-1e-4, 1e-4),
+            description="random.uniform(-1e-4, 1e-4)",
+        )
+        noise_amplitude: float = Field(
+            default_factory=lambda: random.uniform(1e-9, 1e-7),
+            description="random.uniform(1e-9, 1e-7)",
+        )
+        drift_rate: float = Field(
+            default_factory=lambda: random.uniform(-1e-12, 1e-12),
+            description="random.uniform(-1e-12, 1e-12)",
+        )
+
     def __init__(self, input_file: str, **kwargs):
         """Initialize NtpProbe2 from input file"""
         super().__init__(input_file)
@@ -644,6 +666,73 @@ def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str:
 
         return buffer.getvalue()
 
+    @classmethod
+    def generate_random_data(
+            cls,
+            config: RandomDataConfig,
+            probe_key: ProbeKey,
+    ) -> ProbeKey:
+        """Generate synthetic NTP-like metrics for testing."""
+        cls._setup_random_seed(config.seed)
+        logger.info(f"Generating random NTP data for {probe_key}")
+
+        meta = {
+            "mode": "random",
+            "name": f"Random NTP {probe_key}",
+            "target_host": "",
+            "target_port": 0,
+            "sync_status": "tracking",
+            "leap_status": "no_warning",
+            "observation_sources": ["random"],
+            "additional_metadata": {"test_data": True},
+        }
+        cls._send_metadata_to_db(probe_key, meta)
+
+        total_seconds = config.duration_hours * 3600
+        num_samples = int(total_seconds / config.sample_interval)
+        times = []
+        metric_maps = {
+            'offset': {'metric': METRICS.PHASE_OFFSET,
+                    'values': []},
+            'delay_s': {'metric': METRICS.NTP_DELAY,
+                    'values': []},
+            'jitter_s': {'metric': METRICS.NTP_JITTER,
+                         'values': []},
+            'stratum': {'metric': METRICS.NTP_STRATUM,
+                        'values': []},
+            'sync_health': {'metric': METRICS.NTP_SYNC_HEALTH,
+                            'values': []},
+        }
+
+        for i in range(num_samples):
+            sample_time = config.start_time + timedelta(seconds=i * config.sample_interval)
+            times.append(sample_time)
+            time_offset = i * config.sample_interval
+            drift_component = config.drift_rate * time_offset
+            noise = float(np.random.normal(0, config.noise_amplitude))
+            offset = config.base_value + drift_component + noise
+            if random.random() < config.outlier_probability:
+                offset += float(np.random.normal(0, config.noise_amplitude * config.outlier_multiplier))
+
+            delay_s = 0.02 + abs(0.0001 * random.random())
+            jitter_s = abs(float(config.noise_amplitude * 5))
+            stratum = 2.0 + (1.0 if random.random() < 0.05 else 0.0)
+            sync_health = 1.0
+            metric_maps['offset']['values'].append(offset)
+            metric_maps['delay_s']['values'].append(delay_s)
+            metric_maps['jitter_s']['values'].append(jitter_s)
+            metric_maps['stratum']['values'].append(stratum)
+            metric_maps['sync_health']['values'].append(sync_health)
+
+        for metric in metric_maps.values():
+            cls.send_data(probe_key=probe_key,
+                          metric=metric.get('metric'),
+                          reference_type=REF_TYPES.UNKNOWN,
+                          data=pd.DataFrame({"time": times, "value": metric.get('values')}))
+
+        logger.info(f"Finished random NTP generation for {probe_key}")
+        return probe_key
+
 
 
 

From 07f9190414f879929b0a1b7c8076c7aca74f5616 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 17:08:50 -0400
Subject: [PATCH 10/22] removing the 2 suffix

---
 .gitignore                            |  3 +++
 opensampl/db/orm.py                   | 13 +++++++------
 opensampl/metrics.py                  |  5 +++--
 opensampl/vendors/base_probe.py       |  1 +
 opensampl/vendors/constants.py        | 12 ++++++------
 opensampl/vendors/{ntp2.py => ntp.py} | 10 +++++-----
 6 files changed, 25 insertions(+), 19 deletions(-)
 rename opensampl/vendors/{ntp2.py => ntp.py} (99%)

diff --git a/.gitignore b/.gitignore
index 41e29a2..8d8939a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# OpenSAMPL data paths
+archive/
+ntp-snapshots/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py
index 6cc4631..ca49647 100644
--- a/opensampl/db/orm.py
+++ b/opensampl/db/orm.py
@@ -181,7 +181,7 @@ class ProbeMetadata(Base):
     adva_metadata = relationship("AdvaMetadata", back_populates="probe", uselist=False)
     microchip_twst_metadata = relationship("MicrochipTWSTMetadata", back_populates="probe", uselist=False)
     microchip_tp4100_metadata = relationship("MicrochipTP4100Metadata", back_populates="probe", uselist=False)
-    ntp_metadata2 = relationship("NtpMetadata2", back_populates="probe", uselist=False)
+    ntp_metadata = relationship("NtpMetadata", back_populates="probe", uselist=False)
 
     # --- CUSTOM PROBE METADATA RELATIONSHIP ---
 
@@ -433,10 +433,8 @@ class MicrochipTP4100Metadata(Base):
     additional_metadata = Column(JSONB)
     probe = relationship("ProbeMetadata", back_populates="microchip_tp4100_metadata")
 
-
-# --- CUSTOM TABLES ---      !! Do not remove line, used as reference when inserting metadata table
-class NtpMetadata2(Base):
-    __tablename__ = "ntp_metadata2"
+class NtpMetadata(Base):
+    __tablename__ = "ntp_metadata"
 
     probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True)
     mode = Column(Text)
@@ -451,7 +449,10 @@ class NtpMetadata2(Base):
     collection_ip = Column(Text)
     timeout = Column(Float)
     additional_metadata = Column(JSONB)
-    probe = relationship("ProbeMetadata", back_populates="ntp_metadata2")
+    probe = relationship("ProbeMetadata", back_populates="ntp_metadata")
+
+# --- CUSTOM TABLES ---      !! Do not remove line, used as reference when inserting metadata table
+
 
 # --- TABLE FUNCTIONS ---
 
diff --git a/opensampl/metrics.py b/opensampl/metrics.py
index d859cb4..4900e37 100644
--- a/opensampl/metrics.py
+++ b/opensampl/metrics.py
@@ -62,8 +62,6 @@ class METRICS:
         unit="unknown",
         value_type=object,
     )
-
-    # --- CUSTOM METRICS ---      !! Do not remove line, used as reference when inserting metric
     NTP_DELAY = MetricType(
         name="NTP Delay",
         description="Round-trip delay (RTT) to the NTP server or observed path delay in seconds",
@@ -121,3 +119,6 @@ class METRICS:
         unit="ratio",
         value_type=float,
     )
+
+    # --- CUSTOM METRICS ---      !! Do not remove line, used as reference when inserting metric
+
diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py
index 68ddd59..82bccd1 100644
--- a/opensampl/vendors/base_probe.py
+++ b/opensampl/vendors/base_probe.py
@@ -538,6 +538,7 @@ def _send_metadata_to_db(cls, probe_key: ProbeKey, metadata: dict) -> None:
         load_probe_metadata(vendor=cls.vendor, probe_key=probe_key, data=metadata)
         logger.debug(f"Sent metadata for probe {probe_key}")
 
+
     def send_metadata(self):
         """Send metadata to database"""
         metadata = self.process_metadata()
diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py
index 9c4b27c..66b4ac5 100644
--- a/opensampl/vendors/constants.py
+++ b/opensampl/vendors/constants.py
@@ -71,12 +71,12 @@ class VENDORS:
     )
 
     # --- CUSTOM VENDORS ---      !! Do not remove line, used as reference when inserting vendor
-    NTP2 = VendorType(
-        name='NTP2',
-        parser_class='NtpProbe2',
-        parser_module='ntp2',
-        metadata_table='ntp_metadata2',
-        metadata_orm='NtpMetadata2'
+    NTP = VendorType(
+        name='NTP',
+        parser_class='NtpProbe',
+        parser_module='ntp',
+        metadata_table='ntp_metadata',
+        metadata_orm='NtpMetadata'
     )
 
 
diff --git a/opensampl/vendors/ntp2.py b/opensampl/vendors/ntp.py
similarity index 99%
rename from opensampl/vendors/ntp2.py
rename to opensampl/vendors/ntp.py
index 00e226f..38427e8 100644
--- a/opensampl/vendors/ntp2.py
+++ b/opensampl/vendors/ntp.py
@@ -1,4 +1,4 @@
-"""Probe implementation for NTP2 vendor"""
+"""Probe implementation for NTP vendor"""
 import socket
 
 import pandas as pd
@@ -449,10 +449,10 @@ def collect_id_factory():
     except Exception:
         return 'collection-host'
 
-class NtpProbe2(BaseProbe, CollectMixin, RandomDataMixin):
-    """Probe parser for NTP2 vendor data files"""
+class NtpProbe(BaseProbe, CollectMixin, RandomDataMixin):
+    """Probe parser for NTP vendor data files"""
 
-    vendor = VENDORS.NTP2
+    vendor = VENDORS.NTP
 
     class CollectConfig(CollectMixin.CollectConfig):
         """
@@ -503,7 +503,7 @@ class RandomDataConfig(RandomDataMixin.RandomDataConfig):
         )
 
     def __init__(self, input_file: str, **kwargs):
-        """Initialize NtpProbe2 from input file"""
+        """Initialize NtpProbe from input file"""
         super().__init__(input_file)
         self.collection_probe = None
 

From 82fb461799abac2ec10a72b2df305419db398bab Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Tue, 14 Apr 2026 18:09:02 -0400
Subject: [PATCH 11/22] adding the geolocator stuff

---
 opensampl/config/base.py        |   2 +
 opensampl/helpers/geolocator.py | 117 ++++++++++++++++++++++++++++++++
 opensampl/load_data.py          |  11 +++
 3 files changed, 130 insertions(+)
 create mode 100644 opensampl/helpers/geolocator.py

diff --git a/opensampl/config/base.py b/opensampl/config/base.py
index aa2af28..8a39790 100644
--- a/opensampl/config/base.py
+++ b/opensampl/config/base.py
@@ -43,6 +43,8 @@ class BaseConfig(BaseSettings):
         False, description="Allow insecure requests to be made to the backend", alias="INSECURE_REQUESTS"
     )
 
+    ENABLE_GEOLOCATE: bool = Field(False, description="Enable geolocate features which extract a location from ip addresses", alias="ENABLE_GEOLOCATE")
+
     @field_serializer("ARCHIVE_PATH")
     def convert_to_str(self, v: Path) -> str:
         """Convert archive path to a string for serialization"""
diff --git a/opensampl/helpers/geolocator.py b/opensampl/helpers/geolocator.py
new file mode 100644
index 0000000..8ed8490
--- /dev/null
+++ b/opensampl/helpers/geolocator.py
@@ -0,0 +1,117 @@
+"""Associate NTP probes with ``castdb.locations`` for the geospatial Grafana dashboard."""
+
+from __future__ import annotations
+
+import ipaddress
+import json
+import os
+import socket
+import urllib.request
+from typing import TYPE_CHECKING, Any
+
+from loguru import logger
+
+from opensampl.load.table_factory import TableFactory
+
+if TYPE_CHECKING:
+    from sqlalchemy.orm import Session
+
+    from opensampl.vendors.constants import ProbeKey
+
+_GEO_CACHE: dict[str, tuple[float, float, str]] = {}
+
+
+def _env_bool(name: str, default: bool) -> bool:
+    v = os.getenv(name)
+    if v is None:
+        return default
+    return v.strip().lower() in ("1", "true", "yes", "on")
+
+
+def _default_lab_coords() -> tuple[float, float]:
+    lat = float(os.getenv("DEFAULT_LAT", "37.4419"))
+    lon = float(os.getenv("DEFAULT_LON", "-122.1430"))
+    return lat, lon
+
+
+def _is_private_or_loopback(ip: str) -> bool:
+    try:
+        addr = ipaddress.ip_address(ip)
+    except ValueError:
+        return True
+    return bool(addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_reserved)
+
+
+def _lookup_geo_ipapi(ip: str) -> tuple[float, float, str] | None:
+    if ip in _GEO_CACHE:
+        return _GEO_CACHE[ip]
+    url = f"http://ip-api.com/json/{ip}?fields=status,lat,lon,city,country"
+    try:
+        with urllib.request.urlopen(url, timeout=4.0) as resp:  # noqa: S310
+            body = json.loads(resp.read().decode("utf-8"))
+    except Exception as e:
+        logger.warning("ip-api geolocation failed for {}: {}", ip, e)
+        return None
+
+    if body.get("status") != "success" or body.get("lat") is None or body.get("lon") is None:
+        logger.warning("ip-api returned no coordinates for {}", ip)
+        return None
+
+    city = body.get("city") or ""
+    country = body.get("country") or ""
+    label = ", ".join(x for x in (city, country) if x)
+    out = (float(body["lat"]), float(body["lon"]), label or ip)
+    _GEO_CACHE[ip] = out
+    return out
+
+def create_location(session: Session, geolocate_enabled: bool, ip_address: str, geo_override: dict) -> str | None:
+    """
+    Set probe ``name``, ``public``, and ``location_uuid`` on NTP metadata before ``probe_metadata`` insert.
+
+    Uses ``additional_metadata.geo_override`` when present (lat/lon/label). Otherwise resolves the remote
+    host, uses RFC1918/loopback defaults from env, or ip-api.com for public IPs (HTTP, no API key).
+    """
+
+    lat: float | None = None
+    lon: float | None = None
+    name: str | None = None
+
+    if isinstance(geo_override, dict) and geo_override.get("lat") is not None and geo_override.get("lon") is not None:
+        lat = float(geo_override["lat"])
+        lon = float(geo_override["lon"])
+
+    if isinstance(geo_override, dict) and geo_override.get("name") is not None:
+        name = geo_override["name"]
+
+    if geolocate_enabled and lat is None and lon is None:
+        ip_for_geo = ip_address
+        try:
+            ip_for_geo = socket.gethostbyname(ip_address)
+        except OSError as e:
+            logger.debug("Could not resolve {}: {}", ip_address, e)
+
+        if _is_private_or_loopback(ip_for_geo):
+            lat, lon = _default_lab_coords()
+        else:
+            geo = _lookup_geo_ipapi(ip_for_geo)
+            if geo:
+                lat, lon, _name = geo
+                name = name or _name
+            else:
+                lat, lon = _default_lab_coords()
+
+    loc_factory = TableFactory("locations", session=session)
+    loc = None
+    if name:
+        loc = loc_factory.find_existing({"name": name})
+
+    if loc is None:
+        loc = loc_factory.write(
+            {"name": name, "lat": lat, "lon": lon, "public": True},
+            if_exists="ignore",
+        )
+
+    if loc:
+        return loc.uuid
+    return None
+
diff --git a/opensampl/load_data.py b/opensampl/load_data.py
index fce9864..e723851 100644
--- a/opensampl/load_data.py
+++ b/opensampl/load_data.py
@@ -15,6 +15,7 @@
 from opensampl.metrics import MetricType
 from opensampl.references import ReferenceType
 from opensampl.vendors.constants import ProbeKey, VendorType
+from opensampl.helpers.geolocator import create_location
 
 conflict_actions = Literal["error", "replace", "update", "ignore"]
 
@@ -200,6 +201,16 @@ def load_probe_metadata(
 
         pm_cols = {col.name for col in pm_factory.inspector.columns}
         probe_info = {k: data.pop(k) for k in list(data.keys()) if k in pm_cols}
+        location_name = probe_info.pop('location_name', None)
+        geolocation = ({'name': location_name} if location_name else {}) | probe_info.pop('geolocation', {})
+
+        if geolocation or _config.ENABLE_GEOLOCATE:
+            location_uuid = create_location(session,
+                                            geolocate_enabled=_config.ENABLE_GEOLOCATE,
+                                            geo_override=geolocation, ip_address=probe_key.ip_address)
+            if location_uuid:
+                probe_info.update({'location_uuid': location_uuid})
+
         probe_info.update({"probe_id": probe_key.probe_id, "ip_address": probe_key.ip_address, "vendor": vendor.name})
         probe = pm_factory.write(data=probe_info, if_exists="update")
 

From e3618a1e51da42dfd7611a20f3c87a01b5598a21 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Wed, 15 Apr 2026 09:25:37 -0400
Subject: [PATCH 12/22] hmm i think this is it

---
 pyproject.toml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d297b5d..ce69916 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,7 +77,10 @@ backend = [
     "uvicorn",
     "prometheus-client",
 ]
-collect = ["telnetlib3==2.0.4"]
+collect = [
+    "telnetlib3==2.0.4",
+    "ntplib>=0.4.0,<0.5"
+]
 
 [project.scripts]
 opensampl = "opensampl.cli:cli"
@@ -147,6 +150,7 @@ ignore = ["D203", "D212", "D400", "D415", "ANN401", "S101", "PLR2004", "COM812",
 "opensampl/vendors/**/*.py" = ['S311'] # we want to ignore the errors about random
 "opensampl/server/backend/main.py" = ['B008', 'ARG001'] #ignore complaints about calling functions in args
 "opensampl/mixins/random_data.py" = ['S311']
+
 [tool.ruff.lint.pylint]
 max-args = 10
 

From 2d31e9a14c8055c8d2ef54709ea1a532266fd9df Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Thu, 16 Apr 2026 17:01:37 -0400
Subject: [PATCH 13/22] so ruff out here

---
 opensampl/config/base.py        |   6 +-
 opensampl/config/server.py      |  12 +-
 opensampl/db/orm.py             |   4 +
 opensampl/helpers/geolocator.py |   6 +-
 opensampl/load_data.py          |  20 +-
 opensampl/metrics.py            |   1 -
 opensampl/mixins/collect.py     |   2 +-
 opensampl/vendors/base_probe.py |   3 +-
 opensampl/vendors/constants.py  |  11 +-
 opensampl/vendors/ntp.py        | 444 +++++++++++++++++---------------
 pyproject.toml                  |   4 +-
 11 files changed, 275 insertions(+), 238 deletions(-)

diff --git a/opensampl/config/base.py b/opensampl/config/base.py
index 8a39790..b304dfd 100644
--- a/opensampl/config/base.py
+++ b/opensampl/config/base.py
@@ -43,7 +43,11 @@ class BaseConfig(BaseSettings):
         False, description="Allow insecure requests to be made to the backend", alias="INSECURE_REQUESTS"
     )
 
-    ENABLE_GEOLOCATE: bool = Field(False, description="Enable geolocate features which extract a location from ip addresses", alias="ENABLE_GEOLOCATE")
+    ENABLE_GEOLOCATE: bool = Field(
+        False,
+        description="Enable geolocate features which extract a location from ip addresses",
+        alias="ENABLE_GEOLOCATE",
+    )
 
     @field_serializer("ARCHIVE_PATH")
     def convert_to_str(self, v: Path) -> str:
diff --git a/opensampl/config/server.py b/opensampl/config/server.py
index 0e302e1..a0bc6e6 100644
--- a/opensampl/config/server.py
+++ b/opensampl/config/server.py
@@ -5,11 +5,12 @@
 configuration validation, and settings management.
 """
 
+from __future__ import annotations
+
 import shlex
 from importlib.resources import as_file, files
 from pathlib import Path
-from types import ModuleType
-from typing import Any, Union
+from typing import TYPE_CHECKING, Any
 
 from dotenv import dotenv_values, set_key
 from loguru import logger
@@ -20,8 +21,11 @@
 from opensampl.config.base import BaseConfig
 from opensampl.server import check_command
 
+if TYPE_CHECKING:
+    from types import ModuleType
+
 
-def get_resolved_resource_path(pkg: Union[str, ModuleType], relative_path: str) -> str:
+def get_resolved_resource_path(pkg: str | ModuleType, relative_path: str) -> str:
     """Retrieve the resolved path to a resource in a package."""
     resource = files(pkg).joinpath(relative_path)
     with as_file(resource) as real_path:
@@ -56,7 +60,7 @@ def _ignore_in_set(self) -> list[str]:
         return ignored
 
     @model_validator(mode="after")
-    def get_docker_values(self) -> "ServerConfig":
+    def get_docker_values(self) -> ServerConfig:
         """Get the values that the docker containers will use on startup"""
         self.docker_env_values = dotenv_values(self.DOCKER_ENV_FILE)
         return self
diff --git a/opensampl/db/orm.py b/opensampl/db/orm.py
index ca49647..7c214db 100644
--- a/opensampl/db/orm.py
+++ b/opensampl/db/orm.py
@@ -433,7 +433,10 @@ class MicrochipTP4100Metadata(Base):
     additional_metadata = Column(JSONB)
     probe = relationship("ProbeMetadata", back_populates="microchip_tp4100_metadata")
 
+
 class NtpMetadata(Base):
+    """NTP Clock Probe specific metadata"""
+
     __tablename__ = "ntp_metadata"
 
     probe_uuid = Column(String, ForeignKey("probe_metadata.uuid"), primary_key=True)
@@ -451,6 +454,7 @@ class NtpMetadata(Base):
     additional_metadata = Column(JSONB)
     probe = relationship("ProbeMetadata", back_populates="ntp_metadata")
 
+
 # --- CUSTOM TABLES ---      !! Do not remove line, used as reference when inserting metadata table
 
 
diff --git a/opensampl/helpers/geolocator.py b/opensampl/helpers/geolocator.py
index 8ed8490..99e9858 100644
--- a/opensampl/helpers/geolocator.py
+++ b/opensampl/helpers/geolocator.py
@@ -7,7 +7,7 @@
 import os
 import socket
 import urllib.request
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 from loguru import logger
 
@@ -16,7 +16,6 @@
 if TYPE_CHECKING:
     from sqlalchemy.orm import Session
 
-    from opensampl.vendors.constants import ProbeKey
 
 _GEO_CACHE: dict[str, tuple[float, float, str]] = {}
 
@@ -64,6 +63,7 @@ def _lookup_geo_ipapi(ip: str) -> tuple[float, float, str] | None:
     _GEO_CACHE[ip] = out
     return out
 
+
 def create_location(session: Session, geolocate_enabled: bool, ip_address: str, geo_override: dict) -> str | None:
     """
     Set probe ``name``, ``public``, and ``location_uuid`` on NTP metadata before ``probe_metadata`` insert.
@@ -71,7 +71,6 @@ def create_location(session: Session, geolocate_enabled: bool, ip_address: str,
     Uses ``additional_metadata.geo_override`` when present (lat/lon/label). Otherwise resolves the remote
     host, uses RFC1918/loopback defaults from env, or ip-api.com for public IPs (HTTP, no API key).
     """
-
     lat: float | None = None
     lon: float | None = None
     name: str | None = None
@@ -114,4 +113,3 @@ def create_location(session: Session, geolocate_enabled: bool, ip_address: str,
     if loc:
         return loc.uuid
     return None
-
diff --git a/opensampl/load_data.py b/opensampl/load_data.py
index e723851..dc163e7 100644
--- a/opensampl/load_data.py
+++ b/opensampl/load_data.py
@@ -10,12 +10,12 @@
 
 from opensampl.config.base import BaseConfig
 from opensampl.db.orm import Base, ProbeData
+from opensampl.helpers.geolocator import create_location
 from opensampl.load.routing import route
 from opensampl.load.table_factory import TableFactory
 from opensampl.metrics import MetricType
 from opensampl.references import ReferenceType
 from opensampl.vendors.constants import ProbeKey, VendorType
-from opensampl.helpers.geolocator import create_location
 
 conflict_actions = Literal["error", "replace", "update", "ignore"]
 
@@ -128,8 +128,7 @@ def load_time_data(
         )
         probe = data_definition.probe  # ty: ignore[possibly-unbound-attribute]
         probe_readable = (
-            probe.name
-            or f"{probe.ip_address} ({probe.probe_id})"  # ty: ignore[possibly-unbound-attribute]
+            probe.name or f"{probe.ip_address} ({probe.probe_id})"  # ty: ignore[possibly-unbound-attribute]
         )
 
         if any(x is None for x in [data_definition.probe, data_definition.metric, data_definition.reference]):
@@ -201,15 +200,18 @@ def load_probe_metadata(
 
         pm_cols = {col.name for col in pm_factory.inspector.columns}
         probe_info = {k: data.pop(k) for k in list(data.keys()) if k in pm_cols}
-        location_name = probe_info.pop('location_name', None)
-        geolocation = ({'name': location_name} if location_name else {}) | probe_info.pop('geolocation', {})
+        location_name = probe_info.pop("location_name", None)
+        geolocation = ({"name": location_name} if location_name else {}) | probe_info.pop("geolocation", {})
 
         if geolocation or _config.ENABLE_GEOLOCATE:
-            location_uuid = create_location(session,
-                                            geolocate_enabled=_config.ENABLE_GEOLOCATE,
-                                            geo_override=geolocation, ip_address=probe_key.ip_address)
+            location_uuid = create_location(
+                session,
+                geolocate_enabled=_config.ENABLE_GEOLOCATE,
+                geo_override=geolocation,
+                ip_address=probe_key.ip_address,
+            )
             if location_uuid:
-                probe_info.update({'location_uuid': location_uuid})
+                probe_info.update({"location_uuid": location_uuid})
 
         probe_info.update({"probe_id": probe_key.probe_id, "ip_address": probe_key.ip_address, "vendor": vendor.name})
         probe = pm_factory.write(data=probe_info, if_exists="update")
diff --git a/opensampl/metrics.py b/opensampl/metrics.py
index 4900e37..72979b8 100644
--- a/opensampl/metrics.py
+++ b/opensampl/metrics.py
@@ -121,4 +121,3 @@ class METRICS:
     )
 
     # --- CUSTOM METRICS ---      !! Do not remove line, used as reference when inserting metric
-
diff --git a/opensampl/mixins/collect.py b/opensampl/mixins/collect.py
index 91da293..ed48d93 100644
--- a/opensampl/mixins/collect.py
+++ b/opensampl/mixins/collect.py
@@ -58,7 +58,7 @@ class CollectConfig(BaseModel):
 
         Attributes:
             output_dir: When provided, will save collected data as a file to provided directory.
-                Filename will be automatically generated as {ip_address}_{probe_id}_{vendor}_{timestamp}.txt
+                Filename will be automatically generated as {vendor}_{ip_address}_{probe_id}_{vendor}_{timestamp}.txt
             load: Whether to load collected data directly to the database
             duration: Number of seconds to collect data for
 
diff --git a/opensampl/vendors/base_probe.py b/opensampl/vendors/base_probe.py
index 82bccd1..388fdc8 100644
--- a/opensampl/vendors/base_probe.py
+++ b/opensampl/vendors/base_probe.py
@@ -489,7 +489,7 @@ def send_data(
         if probe_key is None:
             raise ValueError("send data must be called with probe_key if used as class method")
 
-        if hasattr(self, 'chunk_size') and self.chunk_size:
+        if hasattr(self, "chunk_size") and self.chunk_size:
             for chunk_start in range(0, len(data), self.chunk_size):
                 chunk = data.iloc[chunk_start : chunk_start + self.chunk_size]
                 load_time_data(
@@ -538,7 +538,6 @@ def _send_metadata_to_db(cls, probe_key: ProbeKey, metadata: dict) -> None:
         load_probe_metadata(vendor=cls.vendor, probe_key=probe_key, data=metadata)
         logger.debug(f"Sent metadata for probe {probe_key}")
 
-
     def send_metadata(self):
         """Send metadata to database"""
         metadata = self.process_metadata()
diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py
index 66b4ac5..e2bc68a 100644
--- a/opensampl/vendors/constants.py
+++ b/opensampl/vendors/constants.py
@@ -72,14 +72,13 @@ class VENDORS:
 
     # --- CUSTOM VENDORS ---      !! Do not remove line, used as reference when inserting vendor
     NTP = VendorType(
-        name='NTP',
-        parser_class='NtpProbe',
-        parser_module='ntp',
-        metadata_table='ntp_metadata',
-        metadata_orm='NtpMetadata'
+        name="NTP",
+        parser_class="NtpProbe",
+        parser_module="ntp",
+        metadata_table="ntp_metadata",
+        metadata_orm="NtpMetadata",
     )
 
-
     # --- VENDOR FUNCTIONS ---
 
     @classmethod
diff --git a/opensampl/vendors/ntp.py b/opensampl/vendors/ntp.py
index 38427e8..8ecb065 100644
--- a/opensampl/vendors/ntp.py
+++ b/opensampl/vendors/ntp.py
@@ -1,42 +1,49 @@
 """Probe implementation for NTP vendor"""
-import socket
 
-import pandas as pd
+from __future__ import annotations
+
+import contextlib
+import random
 import re
-import time
-import requests
-from opensampl.load_data import load_probe_metadata
-from opensampl.vendors.base_probe import BaseProbe
-from opensampl.vendors.constants import ProbeKey, VENDORS
-from opensampl.references import REF_TYPES, ReferenceType
-from opensampl.mixins.collect import CollectMixin
-from opensampl.mixins.random_data import RandomDataMixin
-from typing import Literal, Optional, Any, TypeVar, ClassVar
-from pydantic import model_validator, BaseModel, Field, field_serializer, ConfigDict, field_validator
-from pydanclick import from_pydantic
-import click
 import shutil
+import socket
 import subprocess
-from datetime import datetime, timezone, timedelta
-from loguru import logger
-from opensampl.metrics import METRICS, MetricType
-import json
-import random
-import yaml
 import textwrap
+import time
+from datetime import datetime, timedelta, timezone
 from io import StringIO
-import psycopg2.errors
-import numpy as np
+from typing import Any, Callable, ClassVar, Literal, TypeVar
 
+import click
+import numpy as np
+import pandas as pd
+import psycopg2.errors
+import requests
+import yaml
+from loguru import logger
+from pydanclick import from_pydantic
+from pydantic import BaseModel, ConfigDict, Field
 from sqlalchemy.exc import IntegrityError
 
+from opensampl.load_data import load_probe_metadata
+from opensampl.metrics import METRICS, MetricType
+from opensampl.mixins.collect import CollectMixin
+from opensampl.mixins.random_data import RandomDataMixin
+from opensampl.references import REF_TYPES, ReferenceType
+from opensampl.vendors.base_probe import BaseProbe
+from opensampl.vendors.constants import VENDORS, ProbeKey
+
+T = TypeVar("T")
+
 
-T = TypeVar('T')
 def _merge(a: T | None, b: T | None) -> T | None:
     return a if a is not None else b
 
+
 class NTPCollector(BaseModel):
-    mode: ClassVar[Literal['remote', 'local']]
+    """Base class for NTP Collector, for specific implementations to inherit."""
+
+    mode: ClassVar[Literal["remote", "local"]]
     metric_map: ClassVar[dict[str, MetricType]] = {
         "phase_offset_s": METRICS.PHASE_OFFSET,
         "delay_s": METRICS.NTP_DELAY,
@@ -53,30 +60,38 @@ class NTPCollector(BaseModel):
     target_host: str
 
     sync_status: str = Field("unknown")
-    sync_health: float | None = Field(None, json_schema_extra={'metric': True})
+    sync_health: float | None = Field(None, json_schema_extra={"metric": True})
 
-    stratum: float | None = Field(None, json_schema_extra={'metric': True})
-    reachability: int | None = Field(None, json_schema_extra={'metric': True})
-    offset_s: float | None = Field(None, serialization_alias='phase_offset_s', json_schema_extra={'metric': True})
-    delay_s: float | None = Field(None, json_schema_extra={'metric': True})
-    jitter_s: float | None = Field(None, json_schema_extra={'metric': True})
+    stratum: float | None = Field(None, json_schema_extra={"metric": True})
+    reachability: int | None = Field(None, json_schema_extra={"metric": True})
+    offset_s: float | None = Field(None, serialization_alias="phase_offset_s", json_schema_extra={"metric": True})
+    delay_s: float | None = Field(None, json_schema_extra={"metric": True})
+    jitter_s: float | None = Field(None, json_schema_extra={"metric": True})
     reference_id: str | None = None
     observation_sources: list[str] = Field(default_factory=list)
     collection_id: str
     collection_ip: str
     probe_id: str | None = None
 
-    extras: dict = Field(default_factory=dict, serialization_alias='additional_metadata')
+    extras: dict = Field(default_factory=dict, serialization_alias="additional_metadata")
     model_config = ConfigDict(serialize_by_alias=True)
 
     def collect(self):
-        raise NotImplementedError()
+        """Collect a single NTP Reading"""
+        raise NotImplementedError
 
     def export_data(self) -> list[CollectMixin.DataArtifact]:
+        """
+        Export the data from the NTP Collection to a list of DataArtifacts
+
+        Each distinct metric type will get it's own data artifact
+        """
         now = datetime.now(tz=timezone.utc)
-        include_list = {f for f, field_info
-                        in type(self).model_fields.items()
-                        if field_info.json_schema_extra and field_info.json_schema_extra.get('metric', False)}
+        include_list = {
+            f
+            for f, field_info in type(self).model_fields.items()
+            if field_info.json_schema_extra and field_info.json_schema_extra.get("metric", False)
+        }
         reference_type, compound_reference = self.determine_reference()
         metric_values = self.model_dump(include=include_list, exclude_none=True)
 
@@ -84,29 +99,36 @@ def export_data(self) -> list[CollectMixin.DataArtifact]:
         for m, v in metric_values.items():
             metric = self.metric_map.get(m, None)
             if metric is None:
-                metric = MetricType(name=m,
-                           description=f'Automatically generated metric type for {m}',
-                           value_type=object,
-                           unit="unknown")
-                logger.warning(f'Generated new metric type for {m}')
-            value = pd.DataFrame([(now, v)], columns=['time', 'value'])
-            value['time'] = pd.to_datetime(value['time'])
-
-            artifacts.append(CollectMixin.DataArtifact(metric=metric,
-                                                       reference_type=reference_type,
-                                                       compound_reference=compound_reference,
-                                                       value=value))
+                metric = MetricType(
+                    name=m,
+                    description=f"Automatically generated metric type for {m}",
+                    value_type=object,
+                    unit="unknown",
+                )
+                logger.warning(f"Generated new metric type for {m}")
+            value = pd.DataFrame([(now, v)], columns=["time", "value"])
+            value["time"] = pd.to_datetime(value["time"])
+
+            artifacts.append(
+                CollectMixin.DataArtifact(
+                    metric=metric, reference_type=reference_type, compound_reference=compound_reference, value=value
+                )
+            )
         return artifacts
 
     def export_metadata(self) -> dict[str, Any]:
-        include_list = {f for f, field_info
-                        in type(self).model_fields.items()
-                        if not field_info.json_schema_extra or not field_info.json_schema_extra.get('metric', False)}
+        """Export the metadata from the NTP Collection to a dict"""
+        include_list = {
+            f
+            for f, field_info in type(self).model_fields.items()
+            if not field_info.json_schema_extra or not field_info.json_schema_extra.get("metric", False)
+        }
         meta = self.model_dump(include=include_list, exclude_none=True)
-        meta['mode'] = self.mode
+        meta["mode"] = self.mode
         return meta
 
     def export(self) -> CollectMixin.CollectArtifact:
+        """Export the data + metadata for the NTP Collection to a CollectArtifact"""
         meta = self.export_metadata()
 
         artifacts: list[CollectMixin.DataArtifact] = self.export_data()
@@ -114,24 +136,29 @@ def export(self) -> CollectMixin.CollectArtifact:
         return CollectMixin.CollectArtifact(data=artifacts, metadata=meta)
 
     @classmethod
-    def invert_metric_map(cls):
+    def invert_metric_map(cls) -> dict[str, str]:
+        """Invert metric map to go from MetricType.name to string"""
         return {v.name: k for k, v in cls.metric_map.items()}
 
-    def determine_reference(self) -> tuple[ReferenceType, Optional[dict[str, Any]]]:
-        return REF_TYPES.PROBE, {'ip_address': self.collection_ip, 'probe_id': self.collection_id}
+    def determine_reference(self) -> tuple[ReferenceType, None | dict[str, Any]]:
+        """Get the reference type and compound reference details"""
+        return REF_TYPES.PROBE, {"ip_address": self.collection_ip, "probe_id": self.collection_id}
+
 
 class NTPLocalCollector(NTPCollector):
-    mode: ClassVar[Literal['remote', 'local']] = 'local'
+    """Collector model for taking NTP readings from local device"""
+
+    mode: ClassVar[Literal["remote", "local"]] = "local"
 
     @staticmethod
-    def _run(cmd: list[str], timeout: float = 8.0) -> Optional[str]:
+    def _run(cmd: list[str], timeout: float = 8.0) -> str | None:
         """Run command; return stdout or None if missing/failed."""
         bin0 = cmd[0]
         if shutil.which(bin0) is None:
             logger.debug(f"ntp local: command {bin0!r} not found")
             return None
         try:
-            proc = subprocess.run(
+            proc = subprocess.run(  # noqa: S603
                 cmd,
                 capture_output=True,
                 text=True,
@@ -144,14 +171,14 @@ def _run(cmd: list[str], timeout: float = 8.0) -> Optional[str]:
         if proc.returncode != 0:
             logger.debug(f"ntp local: {cmd!r} exit {proc.returncode}: {proc.stderr!r}")
             return None
-        logger.debug(f'ntp local: {cmd!r} exit {proc.stdout}')
+        logger.debug(f"ntp local: {cmd!r} exit {proc.stdout}")
         return proc.stdout or ""
 
     def _parse_chronyc_tracking(self, text: str) -> None:
         """Parse `chronyc tracking` key: value output."""
         out: dict[str, Any] = {}
-        for line in text.splitlines():
-            line = line.strip()
+        for l in text.splitlines():
+            line = l.strip()
             if not line or ":" not in line:
                 continue
             key, _, rest = line.partition(":")
@@ -160,53 +187,48 @@ def _parse_chronyc_tracking(self, text: str) -> None:
             out[key] = val
 
         # Last offset     : +0.000000123 seconds
-        m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
+        m = re.search(r"last offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.IGNORECASE)
         if m:
-            try:
+            with contextlib.suppress(ValueError):
                 self.offset_s = _merge(self.offset_s, (m.group(1)))
-            except ValueError:
-                pass
-        m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.I)
+
+        m = re.search(r"rms offset\s*:\s*([+-]?[\d.eE+-]+)\s*seconds?", text, re.IGNORECASE)
         if m:
-            try:
+            with contextlib.suppress(ValueError):
                 self.jitter_s = _merge(self.jitter_s, float(m.group(1)))
-            except ValueError:
-                pass
-        m = re.search(r"stratum\s*:\s*(\d+)", text, re.I)
+
+        m = re.search(r"stratum\s*:\s*(\d+)", text, re.IGNORECASE)
         if m:
-            try:
+            with contextlib.suppress(ValueError):
                 self.stratum = _merge(self.stratum, int(m.group(1)))
-            except ValueError:
-                pass
-        m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.I)
+
+        m = re.search(r"reference id\s*:\s*(\S+)(?:\s*\(([^)]+)\))?", text, re.IGNORECASE)
         if m:
             self.reference_id = (m.group(2) or m.group(1)) or self.reference_id
 
         self.sync_status = "unsynchronized"
         if "normal" in text.lower() or self.offset_s is not None:
             self.sync_status = "tracking"
-        self.extras['chronyc_raw_tracking'] = out
+        self.extras["chronyc_raw_tracking"] = out
         self.observation_sources.append("chronyc_tracking")
 
     def _parse_chronyc_sources(self, text: str) -> None:
         """Parse `chronyc sources` for reach and selected source."""
-        reach: Optional[int] = None
-        selected: Optional[str] = None
-        for line in text.splitlines():
-            line = line.strip()
-            if not line or line.startswith("MS") or line.startswith("="):
+        reach: int | None = None
+        selected: str | None = None
+        for l in text.splitlines():
+            line = l.strip()
+            if not line or line.startswith(("MS", "=")):
                 continue
             # ^* or ^+ prefix indicates selected/accepted
-            if line.startswith("*") or line.startswith("+"):
+            if line.startswith(("*", "+")):
                 parts = line.split()
                 if len(parts) >= 7:
                     try:
                         reach = int(parts[5], 8) if parts[5].startswith("0") else int(parts[5])
                     except ValueError:
-                        try:
+                        with contextlib.suppress(ValueError):
                             reach = int(parts[5])
-                        except ValueError:
-                            pass
                     selected = parts[1]
                 break
             # Fallback: last column often reach (octal)
@@ -218,35 +240,32 @@ def _parse_chronyc_sources(self, text: str) -> None:
             # Try any line with 377 octal style
             m = re.search(r"\b([0-7]{3})\b", text)
             if m:
-                try:
+                with contextlib.suppress(ValueError):
                     reach = int(m.group(1), 8)
-                except ValueError:
-                    pass
 
         self.reachability = self.reachability or reach
         self.reference_id = self.reference_id or selected
-        self.observation_sources.append( "chronyc_sources")
+        self.observation_sources.append("chronyc_sources")
 
     def _parse_ntpq(self, text: str) -> None:
         """Parse `ntpq -p` / `ntpq -pn` output."""
-        offset_s: Optional[float] = None
-        delay_s: Optional[float] = None
-        jitter_s: Optional[float] = None
-        stratum: Optional[int] = None
-        reach: Optional[int] = None
+        offset_s: float | None = None
+        delay_s: float | None = None
+        jitter_s: float | None = None
+        stratum: int | None = None
+        reach: int | None = None
         ref = None
-        for line in text.splitlines():
-            line = line.strip()
-            if not line or line.startswith("remote") or line.startswith("="):
+        for l in text.splitlines():
+            line = l.strip()
+            if not line or line.startswith(("remote", "=")):
                 continue
-            if line.startswith("*") or line.startswith("+") or line.startswith("-"):
+            if line.startswith(("*", "+", "-")):
                 parts = line.split()
                 # remote refid st t when poll reach delay offset jitter
                 if len(parts) >= 10:
-                    try:
+                    with contextlib.suppress(ValueError):
                         stratum = int(parts[2])
-                    except ValueError:
-                        pass
+
                     try:
                         delay_s = float(parts[7]) / 1000.0  # ms -> s
                         offset_s = float(parts[8]) / 1000.0
@@ -256,10 +275,9 @@ def _parse_ntpq(self, text: str) -> None:
                     try:
                         reach = int(parts[6], 8) if parts[6].startswith("0") else int(parts[6])
                     except ValueError:
-                        try:
+                        with contextlib.suppress(ValueError):
                             reach = int(parts[6])
-                        except ValueError:
-                            pass
+
                     ref = parts[1]
                 break
         sync_status = "synced" if offset_s is not None else "unknown"
@@ -289,10 +307,10 @@ def _parse_timedatectl(self, text: str) -> None:
         elif sync is False:
             sync_status = "unsynchronized"
 
-        if self.sync_status == 'unknown':
+        if self.sync_status == "unknown":
             self.sync_status = sync_status or self.sync_status
         self.observation_sources.append("timedatectl")
-        self.extras['timedatectl'] = text[:2000]
+        self.extras["timedatectl"] = text[:2000]
 
     def _parse_systemctl_show(self, text: str) -> None:
         """Parse `systemctl show` / `systemctl status` for systemd-timesyncd."""
@@ -309,12 +327,13 @@ def _parse_systemctl_show(self, text: str) -> None:
         elif active is False:
             sync_status = "service_inactive"
 
-        if self.sync_status == 'unknown':
+        if self.sync_status == "unknown":
             self.sync_status = sync_status or self.sync_status
-        self.extras['systemctl'] = text[:2000]
+        self.extras["systemctl"] = text[:2000]
         self.observation_sources.append("systemctl_timesyncd")
 
     def collect(self):
+        """Collect local NTP readings using various tools"""
         t = self._run(["chronyc", "tracking"])
         if t:
             self._parse_chronyc_tracking(t)
@@ -340,28 +359,32 @@ def collect(self):
             self._parse_systemctl_show(t)
 
         if not self.observation_sources:
-            self.observation_sources = ['none']
+            self.observation_sources = ["none"]
 
         self.sync_health = 1.0 if self.sync_status in ("tracking", "synchronized", "synced") else 0.0
 
         if self.probe_id is None:
-            self.probe_id = 'ntp-local'
+            self.probe_id = "ntp-local"
+
 
 class NTPRemoteCollector(NTPCollector):
-    mode: ClassVar[Literal['remote', 'local']] = 'remote'
+    """Collector model for taking readings from remote NTP Server."""
+
+    mode: ClassVar[Literal["remote", "local"]] = "remote"
 
     target_port: int
     timeout: float = 3.0
 
-    root_delay_s: float | None = Field(None, json_schema_extra={'metric': True})
-    root_dispersion_s: float | None = Field(None, json_schema_extra={'metric': True})
-    poll_interval_s: float | None = Field(None, json_schema_extra={'metric': True})
+    root_delay_s: float | None = Field(None, json_schema_extra={"metric": True})
+    root_dispersion_s: float | None = Field(None, json_schema_extra={"metric": True})
+    poll_interval_s: float | None = Field(None, json_schema_extra={"metric": True})
     leap_status: str = "unknown"
 
-    def configure_failure(self, e):
-        self.sync_status = 'unreachable'
+    def configure_failure(self, e: Exception) -> None:
+        """Set all metric and metadata values to reflect failure to connect"""
+        self.sync_status = "unreachable"
         self.sync_health = 0
-        self.extras['error'] = str(e)
+        self.extras["error"] = str(e)
         self.observation_sources.append("ntplib")
         self.observation_sources.append("error")
 
@@ -370,7 +393,8 @@ def _estimate_jitter_s(self) -> None:
         Single NTP client response does not include RFC5905 peer jitter (that needs multiple samples).
 
         Emit a conservative positive bound from round-trip delay and root dispersion so downstream
-        ``NTP Jitter`` metrics and dashboards have a value; chrony/ntpq local paths still supply true jitter when available.
+        ``NTP Jitter`` metrics and dashboards have a value; chrony/ntpq local paths still supply
+        true jitter when available.
         """
         if self.delay_s is None and self.root_dispersion_s is None:
             return
@@ -382,11 +406,13 @@ def _estimate_jitter_s(self) -> None:
         return
 
     def collect(self):
+        """Collect readings from a single ping against a remote NTP server."""
         try:
             import ntplib  # type: ignore[import-untyped]
         except ImportError as e:
             raise ImportError(
-                "Remote NTP collection requires the 'ntplib' package (install opensampl[collect]).") from e
+                "Remote NTP collection requires the 'ntplib' package (install opensampl[collect])."
+            ) from e
         client = ntplib.NTPClient()
         try:
             resp = client.request(self.target_host, port=self.target_port, version=3, timeout=self.timeout)
@@ -394,7 +420,6 @@ def collect(self):
             logger.warning(f"NTP request to {self.target_host}:{self.target_port} failed: {e}")
             self.configure_failure(e)
             return
-        from pprint import pformat
         leap = int(resp.leap)
         leap_map = {0: "no_warning", 1: "add_second", 2: "del_second", 3: "alarm"}
         self.leap_status = leap_map.get(leap, str(leap))
@@ -404,7 +429,7 @@ def collect(self):
         try:
             self.poll_interval_s = float(2 ** int(resp.poll))
         except (TypeError, ValueError, OverflowError):
-            logger.debug(f'No poll interval determined')
+            logger.debug("No poll interval determined")
 
         self.root_delay_s = float(resp.root_delay) if resp.root_delay is not None else None
         self.root_dispersion_s = float(resp.root_dispersion) if resp.root_dispersion is not None else None
@@ -425,29 +450,34 @@ def collect(self):
         self.sync_health = 1.0 if sync_ok else 0.0
         self._estimate_jitter_s()
 
-        self.extras['version'] = getattr(resp, 'version', None)
+        self.extras["version"] = getattr(resp, "version", None)
 
         if self.probe_id is None:
-            self.probe_id = f'remote:{self.target_port}'
+            self.probe_id = f"remote:{self.target_port}"
+
 
-def collect_ip_factory():
+def collect_ip_factory() -> str:
+    """Get ip address for collection host using socket (default to 127.0.0.1)"""
     s = None
     try:
         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
         s.connect(("8.8.8.8", 80))  # doesn't actually send data
         v = s.getsockname()[0]
-    except:
-        v = '127.0.0.1'
+    except Exception:
+        v = "127.0.0.1"
     finally:
         if s:
             s.close()
     return v
 
-def collect_id_factory():
+
+def collect_id_factory() -> str:
+    """Get humanreadable host name for collection host using socket (default to collection-host)"""
     try:
-        return socket.gethostname() or 'collection-host'
+        return socket.gethostname() or "collection-host"
     except Exception:
-        return 'collection-host'
+        return "collection-host"
+
 
 class NtpProbe(BaseProbe, CollectMixin, RandomDataMixin):
     """Probe parser for NTP vendor data files"""
@@ -456,23 +486,30 @@ class NtpProbe(BaseProbe, CollectMixin, RandomDataMixin):
 
     class CollectConfig(CollectMixin.CollectConfig):
         """
+        Configuration for Collecting NTP Readings
+
         Attributes:
             probe_id: stable probe_id slug (e.g. local-chrony)
             ip_address: Host or IP address for Probe (default '127.0.0.1')
             port: UDP port for remote mode (use high ports for lab mocks)
-            output_dir: When provided, will save collected data as a file to provided directory. Filename will be automatically generated as ntp_{ip_address}_{probe_id}_{ts.strftime('%Y%m%dT%H%M%SZ')}.json
+            output_dir: When provided, will save collected data as a file to provided directory. Filename will be
+                automatically generated as NTP_{ip_address}_{probe_id}_{vendor}_{timestamp}.txt
             load: Whether to load collected data directly to the database
             duration: Number of seconds to collect data for
             mode: Collect remote or local NTP. Default is 'local'.
             interval: Seconds between samples; 0 = single sample and exit
             duration: Samples to collect when interval > 0
             timeout: UDP request timeout for remote mode(seconds) default: 3.0
-            collection_ip: Override for the IP address of device collecting readings. Will attempt to resolve a local network IP using socket and fall back to '127.0.0.1'
-            collection_id: Override for the Probe ID of the device collecting readings. Will attempt to resolve using socket.gethostname and fall back to 'collection-host'
+            collection_ip: Override for the IP address of device collecting readings. Will attempt to resolve a local
+                network IP using socket and fall back to '127.0.0.1'
+            collection_id: Override for the Probe ID of the device collecting readings. Will attempt to resolve using
+                socket.gethostname and fall back to 'collection-host'
+
         """
-        ip_address: str = '127.0.0.1'
-        port: Optional[int] = None
-        mode: Literal['remote', 'local'] = 'local'
+
+        ip_address: str = "127.0.0.1"
+        port: int | None = None
+        mode: Literal["remote", "local"] = "local"
         interval: float = 0.0
         duration: int = 1
         timeout: float = 3.0
@@ -480,9 +517,10 @@ class CollectConfig(CollectMixin.CollectConfig):
         collection_id: str = Field(default_factory=collect_id_factory)
 
     @classmethod
-    def get_collect_cli_options(cls):
+    def get_collect_cli_options(cls) -> list[Callable]:
+        """Get the decorators to generate collection options for CLI"""
         return [
-            from_pydantic(cls.CollectConfig, rename={'ip_address': 'host', 'duration': 'count'}),
+            from_pydantic(cls.CollectConfig, rename={"ip_address": "host", "duration": "count"}),
             click.pass_context,
         ]
 
@@ -502,7 +540,7 @@ class RandomDataConfig(RandomDataMixin.RandomDataConfig):
             description="random.uniform(-1e-12, 1e-12)",
         )
 
-    def __init__(self, input_file: str, **kwargs):
+    def __init__(self, input_file: str):
         """Initialize NtpProbe from input file"""
         super().__init__(input_file)
         self.collection_probe = None
@@ -513,6 +551,7 @@ def process_metadata(self) -> dict:
 
         Returns:
             dict with metadata field names as keys
+
         """
         if not self.metadata_parsed:
             header_lines = []
@@ -525,12 +564,13 @@ def process_metadata(self) -> dict:
 
             header_str = "".join(header_lines)
             self.metadata = yaml.safe_load(header_str)
-            self.collection_probe = ProbeKey(ip_address=self.metadata.get('collection_ip'),
-                                        probe_id=self.metadata.get('collection_id'))
-            load_probe_metadata(vendor=self.vendor,
-                                probe_key=self.collection_probe,
-                                data={'reference': True, })
-            self.probe_key = ProbeKey(ip_address=self.metadata.get('target_host'), probe_id=self.metadata.get('probe_id'))
+            self.collection_probe = ProbeKey(
+                ip_address=self.metadata.get("collection_ip"), probe_id=self.metadata.get("collection_id")
+            )
+            load_probe_metadata(vendor=self.vendor, probe_key=self.collection_probe, data={"reference": True})
+            self.probe_key = ProbeKey(
+                ip_address=self.metadata.get("target_host"), probe_id=self.metadata.get("probe_id")
+            )
             self.metadata_parsed = True
 
         return self.metadata
@@ -542,15 +582,11 @@ def load_metadata(cls, probe_key: ProbeKey, metadata: dict) -> None:
 
         Returns:
             dict with metadata field names as keys
+
         """
-        collection_probe = ProbeKey(ip_address=metadata.get('collection_ip'),
-                                    probe_id=metadata.get('collection_id'))
-        load_probe_metadata(vendor=cls.vendor,
-                            probe_key=collection_probe,
-                            data={'reference': True, })
-        load_probe_metadata(vendor=cls.vendor,
-                            probe_key=probe_key,
-                            data=metadata)
+        collection_probe = ProbeKey(ip_address=metadata.get("collection_ip"), probe_id=metadata.get("collection_id"))
+        load_probe_metadata(vendor=cls.vendor, probe_key=collection_probe, data={"reference": True})
+        load_probe_metadata(vendor=cls.vendor, probe_key=probe_key, data=metadata)
 
     def process_time_data(self) -> None:
         """
@@ -571,17 +607,21 @@ def process_time_data(self) -> None:
         self.process_metadata()
 
         reference_type = REF_TYPES.PROBE
-        grouped_dfs: dict[str, pd.DataFrame] = {str(metric): group.reset_index(drop=True) for metric, group in raw_df.groupby('metric')}
+        grouped_dfs: dict[str, pd.DataFrame] = {
+            str(metric): group.reset_index(drop=True) for metric, group in raw_df.groupby("metric")
+        }
         for metr, df in grouped_dfs.items():
             metric = NTPCollector.metric_map.get(metr)
             if not metric:
                 logger.warning(f"Metric {metr} is not supported for NTP. Will not ingest {len(df)} rows")
                 continue
             try:
-                self.send_data(data=df,
-                               metric=metric,
-                               reference_type=reference_type,
-                               compound_reference=self.collection_probe.model_dump())
+                self.send_data(
+                    data=df,
+                    metric=metric,
+                    reference_type=reference_type,
+                    compound_reference=self.collection_probe.model_dump(),
+                )
             except requests.HTTPError as e:
                 resp = e.response
                 if resp is None:
@@ -593,37 +633,31 @@ def process_time_data(self) -> None:
                 raise
             except IntegrityError as e:
                 if isinstance(e.orig, psycopg2.errors.UniqueViolation):  # ty: ignore[unresolved-attribute]
-                    logger.info(f"{metr} against {self.collection_probe} already loaded for time frame already loaded for time frame, continuing..")
+                    logger.info(
+                        f"{metr} against {self.collection_probe} already loaded for time "
+                        f"frame already loaded for time frame, continuing.."
+                    )
 
     @classmethod
     def collect(cls, collect_config: CollectConfig) -> CollectMixin.CollectArtifact:
-        """
-            Create a collect artifact defined as follows
-            class CollectArtifact(BaseModel):
-                data: pd.DataFrame
-                metric: MetricType = METRICS.UNKNOWN
-                reference_type: ReferenceType = REF_TYPES.UNKNOWN
-                compound_reference: Optional[dict[str, Any]] = None
-                probe_key: Optional[ProbeKey] = None
-                metadata: Optional[dict] = Field(default_factory=dict)
-
-            on a collect_config.load, the metadata and data will be loaded into db.
-
-            define logic for the save_to_file as well.
-        """
-        collector_overrides = collect_config.model_dump(include=['collection_ip', 'collection_id', 'probe_id'], exclude_none=True)
+        """Collect readings for an NTP probe according to collect_config."""
+        collector_overrides = collect_config.model_dump(
+            include=["collection_ip", "collection_id", "probe_id"], exclude_none=True
+        )
 
         def collect_once() -> CollectMixin.CollectArtifact:
             collector = None
-            if collect_config.mode == 'local':
-                collector = NTPLocalCollector(target_host=collect_config.ip_address,
-                                              **collector_overrides)
-            elif collect_config.mode == 'remote':
-                collector = NTPRemoteCollector(target_host=collect_config.ip_address,
-                                               target_port=collect_config.port,
-                                               timeout=collect_config.timeout, **collector_overrides)
+            if collect_config.mode == "local":
+                collector = NTPLocalCollector(target_host=collect_config.ip_address, **collector_overrides)
+            elif collect_config.mode == "remote":
+                collector = NTPRemoteCollector(
+                    target_host=collect_config.ip_address,
+                    target_port=collect_config.port,
+                    timeout=collect_config.timeout,
+                    **collector_overrides,
+                )
             if collector is None:
-                raise ValueError('Could not determine mode from collect_config')
+                raise ValueError("Could not determine mode from collect_config")
             collector.collect()
 
             return collector.export()
@@ -646,19 +680,20 @@ def collect_once() -> CollectMixin.CollectArtifact:
 
     @classmethod
     def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str:
+        """Create the content of a file from the CollectArtifacts"""
         metric_names = NTPCollector.invert_metric_map()
         dfs = []
         for d in collected.data or []:
             df = d.value
-            df['metric'] = metric_names.get(d.metric.name, d.metric.name.lower().replace(' ', '_'))
+            df["metric"] = metric_names.get(d.metric.name, d.metric.name.lower().replace(" ", "_"))
             dfs.append(df)
         value_df = pd.concat(dfs) if dfs else None
 
         header = yaml.dump(collected.metadata, sort_keys=False)
-        header = textwrap.indent(header, prefix='# ')
+        header = textwrap.indent(header, prefix="# ")
         buffer = StringIO()
         buffer.write(header)
-        buffer.write('\n')
+        buffer.write("\n")
 
         if value_df is not None:
             # write dataframe
@@ -668,9 +703,9 @@ def create_file_content(cls, collected: CollectMixin.CollectArtifact) -> str:
 
     @classmethod
     def generate_random_data(
-            cls,
-            config: RandomDataConfig,
-            probe_key: ProbeKey,
+        cls,
+        config: RandomDataConfig,
+        probe_key: ProbeKey,
     ) -> ProbeKey:
         """Generate synthetic NTP-like metrics for testing."""
         cls._setup_random_seed(config.seed)
@@ -692,16 +727,11 @@ def generate_random_data(
         num_samples = int(total_seconds / config.sample_interval)
         times = []
         metric_maps = {
-            'offset': {'metric': METRICS.PHASE_OFFSET,
-                    'values': []},
-            'delay_s': {'metric': METRICS.NTP_DELAY,
-                    'values': []},
-            'jitter_s': {'metric': METRICS.NTP_JITTER,
-                         'values': []},
-            'stratum': {'metric': METRICS.NTP_STRATUM,
-                        'values': []},
-            'sync_health': {'metric': METRICS.NTP_SYNC_HEALTH,
-                            'values': []},
+            "offset": {"metric": METRICS.PHASE_OFFSET, "values": []},
+            "delay_s": {"metric": METRICS.NTP_DELAY, "values": []},
+            "jitter_s": {"metric": METRICS.NTP_JITTER, "values": []},
+            "stratum": {"metric": METRICS.NTP_STRATUM, "values": []},
+            "sync_health": {"metric": METRICS.NTP_SYNC_HEALTH, "values": []},
         }
 
         for i in range(num_samples):
@@ -718,21 +748,19 @@ def generate_random_data(
             jitter_s = abs(float(config.noise_amplitude * 5))
             stratum = 2.0 + (1.0 if random.random() < 0.05 else 0.0)
             sync_health = 1.0
-            metric_maps['offset']['values'].append(offset)
-            metric_maps['delay_s']['values'].append(delay_s)
-            metric_maps['jitter_s']['values'].append(jitter_s)
-            metric_maps['stratum']['values'].append(stratum)
-            metric_maps['sync_health']['values'].append(sync_health)
+            metric_maps["offset"]["values"].append(offset)
+            metric_maps["delay_s"]["values"].append(delay_s)
+            metric_maps["jitter_s"]["values"].append(jitter_s)
+            metric_maps["stratum"]["values"].append(stratum)
+            metric_maps["sync_health"]["values"].append(sync_health)
 
         for metric in metric_maps.values():
-            cls.send_data(probe_key=probe_key,
-                          metric=metric.get('metric'),
-                          reference_type=REF_TYPES.UNKNOWN,
-                          data=pd.DataFrame({"time": times, "value": metric.get('values')}))
+            cls.send_data(
+                probe_key=probe_key,
+                metric=metric.get("metric"),
+                reference_type=REF_TYPES.UNKNOWN,
+                data=pd.DataFrame({"time": times, "value": metric.get("values")}),
+            )
 
         logger.info(f"Finished random NTP generation for {probe_key}")
         return probe_key
-
-
-
-
diff --git a/pyproject.toml b/pyproject.toml
index ce69916..b5a4a63 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -134,7 +134,7 @@ build-backend = "hatchling.build"
 
 [tool.ruff]
 line-length = 120
-exclude = [".git", "__pycache__", "venv", "env", ".venv", ".env", "build", "dist", "docs", "opensampl/server/migrations/**/*.py",]
+exclude = [".git", "__pycache__", "venv", "env", ".venv", ".env", "build", "dist", "docs", "opensampl/server/migrations/**/*.py"]
 include = ["opensampl/**/*.py"]
 
 [tool.ruff.lint]
@@ -144,7 +144,7 @@ select = ["F", "E", "W", "C", "I", "D", "N", "B", "ERA", "ANN", "S", "A", "COM",
           "FLY", "PERF", "PL", "UP", "FURB", "RUF", "TRY"]
 ignore = ["D203", "D212", "D400", "D415", "ANN401", "S101", "PLR2004", "COM812",
           "ANN201", "B011", "EM102", "TRY003", "ANN204", "FA100", "PIE790", "EM101",
-            "PLC0415"]
+            "PLC0415", 'E741']
 
 [tool.ruff.lint.per-file-ignores]
 "opensampl/vendors/**/*.py" = ['S311'] # we want to ignore the errors about random

From 0edf9e503ed554435b3e6b2262b06ce357c54a68 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Fri, 17 Apr 2026 12:19:52 -0400
Subject: [PATCH 14/22] fixing ntp specific names

---
 opensampl/metrics.py           | 60 +++++++++++++++++++---------------
 opensampl/vendors/constants.py |  3 +-
 opensampl/vendors/ntp.py       | 26 +++++++--------
 3 files changed, 49 insertions(+), 40 deletions(-)

diff --git a/opensampl/metrics.py b/opensampl/metrics.py
index 72979b8..27b6b8a 100644
--- a/opensampl/metrics.py
+++ b/opensampl/metrics.py
@@ -62,59 +62,67 @@ class METRICS:
         unit="unknown",
         value_type=object,
     )
-    NTP_DELAY = MetricType(
-        name="NTP Delay",
-        description="Round-trip delay (RTT) to the NTP server or observed path delay in seconds",
+    DELAY = MetricType(
+        name="Delay",
+        description=(
+            "Round-trip delay (RTD) or Round-Trip Time (RTT). The time in seconds it takes for a data signal to "
+            "travel from a source to a destination and back, including acknowledgement."
+        ),
         unit="s",
         value_type=float,
     )
-    NTP_JITTER = MetricType(
-        name="NTP Jitter",
-        description=(
-            "Jitter or offset variation for NTP in seconds (true value from chrony/ntpq when available; "
-            "remote single-packet collection may use a delay/dispersion bound estimate)"
-        ),
+    JITTER = MetricType(
+        name="Jitter",
+        description=("Jitter or offset variation in delay in seconds. Represents inconsistent response times."),
         unit="s",
         value_type=float,
     )
-    NTP_STRATUM = MetricType(
-        name="NTP Stratum",
-        description="NTP stratum level (distance from reference clock)",
+    STRATUM = MetricType(
+        name="Stratum",
+        description=(
+            'Stratum level. Hierarchical layer defining the distance (or "hops") between device and reference.'
+        ),
         unit="level",
-        value_type=float,
+        value_type=int,
     )
-    NTP_REACHABILITY = MetricType(
-        name="NTP Reachability",
-        description="NTP reachability register (0-255) as a scalar for plotting",
+    REACHABILITY = MetricType(
+        name="Reachability",
+        description=(
+            "Reachability register (0-255) as a scalar for plotting. Ability of a source node to communicate "
+            "with a target node."
+        ),
         unit="count",
         value_type=float,
     )
-    NTP_DISPERSION = MetricType(
-        name="NTP Dispersion",
-        description="Combined error budget / dispersion in seconds",
+    DISPERSION = MetricType(
+        name="Dispersion",
+        description="Uncertainty in a clock's time relative to its reference source in seconds",
         unit="s",
         value_type=float,
     )
     NTP_ROOT_DELAY = MetricType(
         name="NTP Root Delay",
-        description="Root delay from NTP packet or local estimate in seconds",
+        description=(
+            "Total round-trip network delay from the local system"
+            " all the way to the primary reference clock (stratum 0)"
+        ),
         unit="s",
         value_type=float,
     )
     NTP_ROOT_DISPERSION = MetricType(
         name="NTP Root Dispersion",
-        description="Root dispersion from NTP packet or local estimate in seconds",
+        description="The total accumulated clock uncertainty from the local system back to the primary reference clock",
         unit="s",
         value_type=float,
     )
-    NTP_POLL_INTERVAL = MetricType(
-        name="NTP Poll Interval",
-        description="Poll interval in seconds",
+    POLL_INTERVAL = MetricType(
+        name="Poll Interval",
+        description="Time between requests sent to a time server in seconds",
         unit="s",
         value_type=float,
     )
-    NTP_SYNC_HEALTH = MetricType(
-        name="NTP Sync Health",
+    SYNC_HEALTH = MetricType(
+        name="Sync Health",
         description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)",
         unit="ratio",
         value_type=float,
diff --git a/opensampl/vendors/constants.py b/opensampl/vendors/constants.py
index e2bc68a..b5b7bdd 100644
--- a/opensampl/vendors/constants.py
+++ b/opensampl/vendors/constants.py
@@ -70,7 +70,6 @@ class VENDORS:
         metadata_orm="MicrochipTP4100Metadata",
     )
 
-    # --- CUSTOM VENDORS ---      !! Do not remove line, used as reference when inserting vendor
     NTP = VendorType(
         name="NTP",
         parser_class="NtpProbe",
@@ -79,6 +78,8 @@ class VENDORS:
         metadata_orm="NtpMetadata",
     )
 
+    # --- CUSTOM VENDORS ---      !! Do not remove line, used as reference when inserting vendor
+
     # --- VENDOR FUNCTIONS ---
 
     @classmethod
diff --git a/opensampl/vendors/ntp.py b/opensampl/vendors/ntp.py
index 8ecb065..57f835b 100644
--- a/opensampl/vendors/ntp.py
+++ b/opensampl/vendors/ntp.py
@@ -46,15 +46,15 @@ class NTPCollector(BaseModel):
     mode: ClassVar[Literal["remote", "local"]]
     metric_map: ClassVar[dict[str, MetricType]] = {
         "phase_offset_s": METRICS.PHASE_OFFSET,
-        "delay_s": METRICS.NTP_DELAY,
-        "jitter_s": METRICS.NTP_JITTER,
-        "stratum": METRICS.NTP_STRATUM,
-        "reachability": METRICS.NTP_REACHABILITY,
-        "dispersion_s": METRICS.NTP_DISPERSION,
+        "delay_s": METRICS.DELAY,
+        "jitter_s": METRICS.JITTER,
+        "stratum": METRICS.STRATUM,
+        "reachability": METRICS.REACHABILITY,
+        "dispersion_s": METRICS.DISPERSION,
         "root_delay_s": METRICS.NTP_ROOT_DELAY,
         "root_dispersion_s": METRICS.NTP_ROOT_DISPERSION,
-        "poll_interval_s": METRICS.NTP_POLL_INTERVAL,
-        "sync_health": METRICS.NTP_SYNC_HEALTH,
+        "poll_interval_s": METRICS.POLL_INTERVAL,
+        "sync_health": METRICS.SYNC_HEALTH,
     }
 
     target_host: str
@@ -62,7 +62,7 @@ class NTPCollector(BaseModel):
     sync_status: str = Field("unknown")
     sync_health: float | None = Field(None, json_schema_extra={"metric": True})
 
-    stratum: float | None = Field(None, json_schema_extra={"metric": True})
+    stratum: int | None = Field(None, json_schema_extra={"metric": True})
     reachability: int | None = Field(None, json_schema_extra={"metric": True})
     offset_s: float | None = Field(None, serialization_alias="phase_offset_s", json_schema_extra={"metric": True})
     delay_s: float | None = Field(None, json_schema_extra={"metric": True})
@@ -728,10 +728,10 @@ def generate_random_data(
         times = []
         metric_maps = {
             "offset": {"metric": METRICS.PHASE_OFFSET, "values": []},
-            "delay_s": {"metric": METRICS.NTP_DELAY, "values": []},
-            "jitter_s": {"metric": METRICS.NTP_JITTER, "values": []},
-            "stratum": {"metric": METRICS.NTP_STRATUM, "values": []},
-            "sync_health": {"metric": METRICS.NTP_SYNC_HEALTH, "values": []},
+            "delay_s": {"metric": METRICS.DELAY, "values": []},
+            "jitter_s": {"metric": METRICS.JITTER, "values": []},
+            "stratum": {"metric": METRICS.STRATUM, "values": []},
+            "sync_health": {"metric": METRICS.SYNC_HEALTH, "values": []},
         }
 
         for i in range(num_samples):
@@ -746,7 +746,7 @@ def generate_random_data(
 
             delay_s = 0.02 + abs(0.0001 * random.random())
             jitter_s = abs(float(config.noise_amplitude * 5))
-            stratum = 2.0 + (1.0 if random.random() < 0.05 else 0.0)
+            stratum = 2 + (1 if random.random() < 0.05 else 0)
             sync_health = 1.0
             metric_maps["offset"]["values"].append(offset)
             metric_maps["delay_s"]["values"].append(delay_s)

From 1bedb8b1da2409dbac79bd720edd43b082f2d611 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Fri, 17 Apr 2026 12:55:13 -0400
Subject: [PATCH 15/22] adding migration stuff

---
 opensampl/helpers/geolocator.py               |   4 +-
 .../2026_04_17_1243_add_ntp_values.py         | 159 ++++++++++++++++++
 2 files changed, 161 insertions(+), 2 deletions(-)
 create mode 100644 opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py

diff --git a/opensampl/helpers/geolocator.py b/opensampl/helpers/geolocator.py
index 99e9858..b92c890 100644
--- a/opensampl/helpers/geolocator.py
+++ b/opensampl/helpers/geolocator.py
@@ -28,8 +28,8 @@ def _env_bool(name: str, default: bool) -> bool:
 
 
 def _default_lab_coords() -> tuple[float, float]:
-    lat = float(os.getenv("DEFAULT_LAT", "37.4419"))
-    lon = float(os.getenv("DEFAULT_LON", "-122.1430"))
+    lat = float(os.getenv("DEFAULT_LAT", "35.9312"))
+    lon = float(os.getenv("DEFAULT_LON", "-84.3101"))
     return lat, lon
 
 
diff --git a/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py
new file mode 100644
index 0000000..ebaa5a8
--- /dev/null
+++ b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py
@@ -0,0 +1,159 @@
+"""add ntp values
+
+Revision ID: 5665e5902905
+Revises: d419cac01df2
+Create Date: 2026-04-17 12:43:23.711453
+
+"""
+from typing import Sequence, Union
+import uuid
+from sqlalchemy.dialects import postgresql
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '5665e5902905'
+down_revision: Union[str, None] = 'd419cac01df2'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+SCHEMA = 'castdb'
+
+def upgrade() -> None:
+    op.create_table(
+        "ntp_metadata",
+        sa.Column(
+            "probe_uuid",
+            sa.String(),
+            sa.ForeignKey("probe_metadata.uuid"),
+            primary_key=True,
+            nullable=False,
+        ),
+        sa.Column("mode", sa.Text(), nullable=True),
+        sa.Column(
+            "reference",
+            sa.Boolean(),
+            nullable=True,
+            comment="Is used as a reference for other probes",
+        ),
+        sa.Column("target_host", sa.Text(), nullable=True),
+        sa.Column("target_port", sa.Integer(), nullable=True),
+        sa.Column("sync_status", sa.Text(), nullable=True),
+        sa.Column("leap_status", sa.Text(), nullable=True),
+        sa.Column("reference_id", sa.Text(), nullable=True),
+        sa.Column("observation_sources", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("collection_id", sa.Text(), nullable=True),
+        sa.Column("collection_ip", sa.Text(), nullable=True),
+        sa.Column("timeout", sa.Float(), nullable=True),
+        sa.Column("additional_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        schema=SCHEMA,
+        if_not_exists=True,
+        comment="NTP Clock Probe specific metadata"
+    )
+
+    metric_type_table = sa.table('metric_type',
+                                 sa.column('uuid', sa.String),
+                                 sa.column('name', sa.String),
+                                 sa.column('description', sa.Text),
+                                 sa.column('unit', sa.String),
+                                 sa.column('value_type', sa.String),
+                                 schema=SCHEMA
+                                 )
+    new_metrics = [
+        dict(uuid=str(uuid.uuid4()),
+             name="Delay",
+             description=(
+                 "Round-trip delay (RTD) or Round-Trip Time (RTT). The time in seconds it takes for a data signal to "
+                 "travel from a source to a destination and back, including acknowledgement."
+             ),
+             unit="s",
+             value_type='float',
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="Jitter",
+             description=("Jitter or offset variation in delay in seconds. Represents inconsistent response times."),
+             unit="s",
+             value_type='float',
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="Stratum",
+             description=(
+                 'Stratum level. Hierarchical layer defining the distance (or "hops") between device and reference.'
+             ),
+             unit="level",
+             value_type='int',
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="Reachability",
+             description=(
+                 "Reachability register (0-255) as a scalar for plotting. Ability of a source node to communicate "
+                 "with a target node."
+             ),
+             unit="count",
+             value_type='float',
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="Dispersion",
+             description="Uncertainty in a clock's time relative to its reference source in seconds",
+             unit="s",
+             value_type='float',
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="NTP Root Delay",
+             description=(
+                 "Total round-trip network delay from the local system"
+                 " all the way to the primary reference clock (stratum 0)"
+             ),
+             unit="s",
+             value_type='float'
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="NTP Root Dispersion",
+             description="The total accumulated clock uncertainty from the local system back to the primary reference clock",
+             unit="s",
+             value_type='float',
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="Poll Interval",
+             description="Time between requests sent to a time server in seconds",
+             unit="s",
+             value_type=float,
+             ),
+        dict(uuid=str(uuid.uuid4()),
+             name="Sync Health",
+             description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)",
+             unit="ratio",
+             value_type=float,
+             )
+    ]
+    op.bulk_insert(metric_type_table, new_metrics)
+
+
+
+
+def downgrade() -> None:
+    op.drop_table('ntp_metadata', schema=SCHEMA, if_exists=True)
+    metric_type = sa.sql.table(
+        "metric_type",
+        sa.column("name", sa.String),
+        schema=SCHEMA,
+    )
+
+    op.execute(
+        metric_type.delete().where(
+            metric_type.c.name.in_(
+                [
+                    "Delay",
+                    "Jitter",
+                    "Stratum",
+                    "Reachability",
+                    "Dispersion",
+                    "NTP Root Delay",
+                    "NTP Root Dispersion",
+                    "Poll Interval",
+                    "Sync Health",
+                ]
+            )
+        )
+    )

From 9d0d1472d8e8c677e86df652674f11fedb4b125f Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Fri, 17 Apr 2026 14:15:37 -0400
Subject: [PATCH 16/22] adding reference_probe view

---
 .../2026_04_17_1254_add_reference_view.py     | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py

diff --git a/opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py b/opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py
new file mode 100644
index 0000000..8cbb326
--- /dev/null
+++ b/opensampl/server/migrations/_migrations/versions/2026_04_17_1254_add_reference_view.py
@@ -0,0 +1,56 @@
+"""add reference view
+
+Revision ID: c95e49e551be
+Revises: 5665e5902905
+Create Date: 2026-04-17 12:54:27.037125
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'c95e49e551be'
+down_revision: Union[str, None] = '5665e5902905'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+SCHEMA = 'castdb'
+
+CREATE_VIEW_SQL = f"""
+CREATE VIEW {SCHEMA}.reference_probe_metadata
+AS WITH probe_references AS (
+         SELECT r.uuid,
+            r.reference_type_uuid,
+            r.compound_reference_uuid
+           FROM {SCHEMA}.reference r
+             JOIN {SCHEMA}.reference_type rt ON r.reference_type_uuid::text = rt.uuid::text
+          WHERE rt.name::text = 'PROBE'::text
+        )
+ SELECT pm.uuid,
+    pm.probe_id,
+    pm.ip_address,
+    pm.vendor,
+    pm.model,
+    pm.name,
+    pm.public,
+    pm.location_uuid,
+    pm.test_uuid,
+    pr.uuid AS reference_uuid
+   FROM probe_references pr
+     JOIN {SCHEMA}.probe_metadata pm ON pr.compound_reference_uuid::text = pm.uuid::text;
+"""
+
+DROP_VIEW_SQL = f"""
+DROP VIEW IF EXISTS {SCHEMA}.reference_probe_metadata"""
+
+def upgrade() -> None:
+    # Drop the view first, just to be extra safe.
+    op.execute(DROP_VIEW_SQL)
+    op.execute(CREATE_VIEW_SQL)
+
+
+def downgrade() -> None:
+    op.execute(DROP_VIEW_SQL)

From 45798ee4cf9686abc9df50e6a9c18506c353cd55 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Fri, 17 Apr 2026 14:32:47 -0400
Subject: [PATCH 17/22] typosss

---
 opensampl/config/server.py                                  | 2 +-
 .../_migrations/versions/2026_04_17_1243_add_ntp_values.py  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/opensampl/config/server.py b/opensampl/config/server.py
index a0bc6e6..16d061b 100644
--- a/opensampl/config/server.py
+++ b/opensampl/config/server.py
@@ -39,7 +39,7 @@ class ServerConfig(BaseConfig):
 
     COMPOSE_FILE: str = Field(default="", description="Fully resolved path to the Docker Compose file.")
 
-    OVERRIDE_FILE: str | None = Field(defualt=None, description="Override for the compose file")
+    OVERRIDE_FILE: str | None = Field(default=None, description="Override for the compose file")
 
     DOCKER_ENV_FILE: str = Field(default="", description="Fully resolved path to the Docker .env file.")
 
diff --git a/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py
index ebaa5a8..4cd6b14 100644
--- a/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py
+++ b/opensampl/server/migrations/_migrations/versions/2026_04_17_1243_add_ntp_values.py
@@ -26,7 +26,7 @@ def upgrade() -> None:
         sa.Column(
             "probe_uuid",
             sa.String(),
-            sa.ForeignKey("probe_metadata.uuid"),
+            sa.ForeignKey(f"{SCHEMA}.probe_metadata.uuid"),
             primary_key=True,
             nullable=False,
         ),
@@ -118,13 +118,13 @@ def upgrade() -> None:
              name="Poll Interval",
              description="Time between requests sent to a time server in seconds",
              unit="s",
-             value_type=float,
+             value_type='float',
              ),
         dict(uuid=str(uuid.uuid4()),
              name="Sync Health",
              description="1.0 if synchronized/healthy, 0.0 otherwise (probe-defined)",
              unit="ratio",
-             value_type=float,
+             value_type='float',
              )
     ]
     op.bulk_insert(metric_type_table, new_metrics)

From 73b48f3aa134edc4a80f2d3bba2f56ba7ac60e31 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Fri, 17 Apr 2026 14:35:56 -0400
Subject: [PATCH 18/22] warning should only be on collision

---
 opensampl/load_data.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/opensampl/load_data.py b/opensampl/load_data.py
index dc163e7..7b04f7e 100644
--- a/opensampl/load_data.py
+++ b/opensampl/load_data.py
@@ -157,11 +157,13 @@ def load_time_data(
             total_rows = len(records)
             inserted = result.rowcount  # ty: ignore[unresolved-attribute]
             excluded = total_rows - inserted
-
-            logger.warning(
-                f"Inserted {inserted}/{total_rows} rows for {probe_readable}; "
-                f"{excluded}/{total_rows} rejected due to conflicts"
-            )
+            if excluded > 0:
+                logger.warning(
+                    f"Inserted {inserted}/{total_rows} rows for {probe_readable}; "
+                    f"{excluded}/{total_rows} rejected due to conflicts"
+                )
+            else:
+                logger.info(f"Inserted {inserted}/{total_rows} rows for {probe_readable}")
 
         except Exception as e:
             # In case of an error, roll back the session

From 1f617e44abfe7f65e219ebd68b2650840dcc4cd7 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Fri, 17 Apr 2026 16:44:26 -0400
Subject: [PATCH 19/22] ok, got that looking swanky

---
 .../grafana/grafana-dashboards/ntp_dash.json  | 1412 +++++++++++++++++
 1 file changed, 1412 insertions(+)
 create mode 100644 opensampl/server/grafana/grafana-dashboards/ntp_dash.json

diff --git a/opensampl/server/grafana/grafana-dashboards/ntp_dash.json b/opensampl/server/grafana/grafana-dashboards/ntp_dash.json
new file mode 100644
index 0000000..345fd8c
--- /dev/null
+++ b/opensampl/server/grafana/grafana-dashboards/ntp_dash.json
@@ -0,0 +1,1412 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "description": "NTP reference path: measurements are relative to OpenSAMPL’s configured default reference (UNKNOWN type) unless you add GNSS-backed probes; timing vs GNSS is not implied for these series.",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 0,
+  "links": [],
+  "panels": [
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 52,
+      "panels": [],
+      "title": "All Probes",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ns"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 1
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP phase offset (Phase Offset metric)",
+      "transformations": [
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "jitter": true,
+              "metric_type_uuid": true,
+              "probe_uuid": true,
+              "reference_uuid": true,
+              "stratum": true,
+              "sync_health": true
+            },
+            "includeByName": {},
+            "indexByName": {
+              "metric_type_uuid": 5,
+              "probe_name": 0,
+              "probe_uuid": 3,
+              "reference_uuid": 4,
+              "time": 1,
+              "value": 2
+            },
+            "renameByName": {
+              "probe_name": ""
+            }
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "rowField": "time",
+            "valueField": "phase_offset"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "description": "Remote single-packet paths use a conservative jitter estimate from delay and root dispersion when peer RMS jitter is unavailable; local chrony/ntpq snapshots may supply measured jitter.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ns"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 1
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP jitter (delay/dispersion estimate or measured)",
+      "transformations": [
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "metric_type_uuid": true,
+              "phase_offset": true,
+              "probe_uuid": true,
+              "reference_uuid": true,
+              "stratum": true,
+              "sync_health": true
+            },
+            "includeByName": {},
+            "indexByName": {
+              "metric_type_uuid": 5,
+              "probe_name": 0,
+              "probe_uuid": 3,
+              "reference_uuid": 4,
+              "time": 1,
+              "value": 2
+            },
+            "renameByName": {}
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "emptyValue": "null",
+            "rowField": "time",
+            "valueField": "jitter"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 10
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP stratum",
+      "transformations": [
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "jitter": true,
+              "phase_offset": true,
+              "probe_uuid": true,
+              "sync_health": true
+            },
+            "includeByName": {},
+            "indexByName": {},
+            "renameByName": {}
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "rowField": "time",
+            "valueField": "stratum"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 10
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP sync health (1=healthy)",
+      "transformations": [
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "jitter": true,
+              "phase_offset": true,
+              "probe_name": false,
+              "probe_uuid": true,
+              "stratum": true
+            },
+            "includeByName": {},
+            "indexByName": {},
+            "renameByName": {}
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "rowField": "time",
+            "valueField": "sync_health"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 18
+      },
+      "id": 53,
+      "panels": [],
+      "repeat": "ntp_reference",
+      "title": "Reference: $ntp_reference",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ns"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 19
+      },
+      "id": 54,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP phase offset (Phase Offset metric)",
+      "transformations": [
+        {
+          "id": "filterByValue",
+          "options": {
+            "filters": [
+              {
+                "config": {
+                  "id": "equal",
+                  "options": {
+                    "value": "${ntp_reference}"
+                  }
+                },
+                "fieldName": "reference_uuid"
+              }
+            ],
+            "match": "all",
+            "type": "include"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "jitter": true,
+              "metric_type_uuid": true,
+              "probe_uuid": true,
+              "reference_uuid": true,
+              "stratum": true,
+              "sync_health": true
+            },
+            "includeByName": {},
+            "indexByName": {
+              "metric_type_uuid": 5,
+              "probe_name": 0,
+              "probe_uuid": 3,
+              "reference_uuid": 4,
+              "time": 1,
+              "value": 2
+            },
+            "renameByName": {
+              "probe_name": ""
+            }
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "rowField": "time",
+            "valueField": "phase_offset"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "description": "Remote single-packet paths use a conservative jitter estimate from delay and root dispersion when peer RMS jitter is unavailable; local chrony/ntpq snapshots may supply measured jitter.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ns"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 19
+      },
+      "id": 55,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP jitter (delay/dispersion estimate or measured)",
+      "transformations": [
+        {
+          "id": "filterByValue",
+          "options": {
+            "filters": [
+              {
+                "config": {
+                  "id": "equal",
+                  "options": {
+                    "value": "${ntp_reference}"
+                  }
+                },
+                "fieldName": "reference_uuid"
+              }
+            ],
+            "match": "all",
+            "type": "include"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "metric_type_uuid": true,
+              "phase_offset": true,
+              "probe_uuid": true,
+              "reference_uuid": true,
+              "stratum": true,
+              "sync_health": true
+            },
+            "includeByName": {},
+            "indexByName": {
+              "metric_type_uuid": 5,
+              "probe_name": 0,
+              "probe_uuid": 3,
+              "reference_uuid": 4,
+              "time": 1,
+              "value": 2
+            },
+            "renameByName": {}
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "emptyValue": "null",
+            "rowField": "time",
+            "valueField": "jitter"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 27
+      },
+      "id": 56,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP stratum",
+      "transformations": [
+        {
+          "id": "filterByValue",
+          "options": {
+            "filters": [
+              {
+                "config": {
+                  "id": "equal",
+                  "options": {
+                    "value": "${ntp_reference}"
+                  }
+                },
+                "fieldName": "reference_uuid"
+              }
+            ],
+            "match": "all",
+            "type": "include"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "jitter": true,
+              "phase_offset": true,
+              "probe_uuid": true,
+              "sync_health": true
+            },
+            "includeByName": {},
+            "indexByName": {},
+            "renameByName": {}
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "rowField": "time",
+            "valueField": "stratum"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Dashboard --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 27
+      },
+      "id": 57,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.2.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Dashboard --"
+          },
+          "panelId": 51,
+          "refId": "A"
+        }
+      ],
+      "title": "NTP sync health (1=healthy)",
+      "transformations": [
+        {
+          "id": "filterByValue",
+          "options": {
+            "filters": [
+              {
+                "config": {
+                  "id": "equal",
+                  "options": {
+                    "value": "${ntp_reference}"
+                  }
+                },
+                "fieldName": "reference_uuid"
+              }
+            ],
+            "match": "all",
+            "type": "include"
+          }
+        },
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {
+              "jitter": true,
+              "phase_offset": true,
+              "probe_name": false,
+              "probe_uuid": true,
+              "stratum": true
+            },
+            "includeByName": {},
+            "indexByName": {},
+            "renameByName": {}
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "probe_name",
+            "rowField": "time",
+            "valueField": "sync_health"
+          }
+        },
+        {
+          "id": "prepareTimeSeries",
+          "options": {
+            "format": "multi"
+          }
+        }
+      ],
+      "type": "timeseries"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 52
+      },
+      "id": 50,
+      "panels": [
+        {
+          "datasource": {
+            "type": "grafana-postgresql-datasource",
+            "uid": "castdb-datasource"
+          },
+          "description": "Phase metrics use OpenSAMPL’s default reference row (UNKNOWN reference type). NTP **observation** context is the configured server in `ntp_metadata` (not GNSS unless a GNSS-backed probe is present).",
+          "fieldConfig": {
+            "defaults": {
+              "custom": {
+                "align": "auto",
+                "cellOptions": {
+                  "type": "auto"
+                },
+                "footer": {
+                  "reducers": []
+                },
+                "inspect": false
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 24,
+            "x": 0,
+            "y": 87
+          },
+          "id": 5,
+          "options": {
+            "cellHeight": "sm",
+            "showHeader": true,
+            "sortBy": [
+              {
+                "desc": false,
+                "displayName": "probe"
+              }
+            ]
+          },
+          "pluginVersion": "12.2.1",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-postgresql-datasource",
+                "uid": "castdb-datasource"
+              },
+              "editorMode": "code",
+              "format": "table",
+              "rawQuery": true,
+              "rawSql": "SELECT\n  COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS probe,\n  pm.vendor,\n  COALESCE(rt.name, '') AS reference_type,\n  COALESCE(nm.target_host::text, '') AS ntp_server,\n  COALESCE(nm.mode::text, '') AS ntp_mode,\n  COALESCE(nm.reference_id::text, '') AS ntp_ref_id,\n  COALESCE(l.name, '') AS location,\n  COALESCE(pm.public::text, '') AS public\nFROM castdb.probe_metadata pm\nLEFT JOIN castdb.ntp_metadata nm ON nm.probe_uuid = pm.uuid\nLEFT JOIN castdb.locations l ON l.uuid = pm.location_uuid\nLEFT JOIN LATERAL (\n  SELECT pd.reference_uuid FROM castdb.probe_data pd WHERE pd.probe_uuid = pm.uuid LIMIT 1\n) rp ON true\nLEFT JOIN castdb.reference r ON r.uuid = rp.reference_uuid\nLEFT JOIN castdb.reference_type rt ON rt.uuid = r.reference_type_uuid\nWHERE pm.vendor = 'NTP'\n  AND (trim('${ntp_probe:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${ntp_probe:csv}'), ',')))\nORDER BY 1;",
+              "refId": "A"
+            }
+          ],
+          "title": "Probe reference & source (stored metadata)",
+          "type": "table"
+        },
+        {
+          "datasource": {
+            "type": "grafana-postgresql-datasource",
+            "uid": "P55EB97F79F5EB88E"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "thresholds"
+              },
+              "custom": {
+                "align": "auto",
+                "cellOptions": {
+                  "type": "auto"
+                },
+                "footer": {
+                  "reducers": []
+                },
+                "hideFrom": {
+                  "viz": false
+                },
+                "inspect": false
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 24,
+            "x": 0,
+            "y": 96
+          },
+          "id": 51,
+          "options": {
+            "cellHeight": "sm",
+            "frameIndex": 1,
+            "showHeader": true
+          },
+          "pluginVersion": "12.2.1",
+          "targets": [
+            {
+              "dataset": "castdb",
+              "editorMode": "code",
+              "format": "table",
+              "rawQuery": true,
+              "rawSql": "WITH probe_ref AS (\n    SELECT\n        uuid,\n        COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS probe_name\n    FROM castdb.probe_metadata pm\n)\nSELECT\n    time_bucket('1 minute'::interval, pd.time AT TIME ZONE 'UTC') AS time,\n    pd.probe_uuid,\n    pr.probe_name,\n    pd.reference_uuid,\n    AVG(pd.value::float * 1e9) FILTER (WHERE lower(m.name) = 'phase offset') AS phase_offset,\n    AVG(pd.value::float * 1e9)  FILTER (WHERE lower(m.name) = 'jitter') AS jitter,\n    AVG((pd.value)::float) FILTER (WHERE lower(m.name) = 'stratum') AS stratum,\n    AVG((pd.value)::float) FILTER (WHERE lower(m.name) = 'sync health') AS sync_health\nFROM castdb.probe_data pd\nJOIN probe_ref pr\n    ON pd.probe_uuid = pr.uuid\nJOIN castdb.metric_type m\n    ON pd.metric_type_uuid = m.uuid\nWHERE pd.probe_uuid = ANY(ARRAY[${ntp_probe:sqlstring}]::text[]) AND $__timeFilter(pd.time)\nGROUP BY 1, 2, 3, 4\nORDER BY\n    1, 3;",
+              "refId": "A",
+              "sql": {
+                "columns": [
+                  {
+                    "parameters": [],
+                    "type": "function"
+                  }
+                ],
+                "groupBy": [
+                  {
+                    "property": {
+                      "type": "string"
+                    },
+                    "type": "groupBy"
+                  }
+                ],
+                "limit": 50
+              }
+            }
+          ],
+          "title": "source_panel",
+          "type": "table"
+        }
+      ],
+      "title": "Reference & source metadata",
+      "type": "row"
+    }
+  ],
+  "preload": false,
+  "refresh": "30s",
+  "schemaVersion": 42,
+  "tags": [
+    "ntp",
+    "opensampl",
+    "reference"
+  ],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "text": [
+            "All"
+          ],
+          "value": [
+            "$__all"
+          ]
+        },
+        "datasource": {
+          "type": "grafana-postgresql-datasource",
+          "uid": "castdb-datasource"
+        },
+        "definition": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor = 'NTP' ORDER BY 2",
+        "includeAll": true,
+        "multi": true,
+        "name": "ntp_probe",
+        "options": [],
+        "query": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor = 'NTP' ORDER BY 2",
+        "refresh": 1,
+        "regex": "",
+        "sort": 1,
+        "type": "query"
+      },
+      {
+        "current": {
+          "text": "All",
+          "value": [
+            "$__all"
+          ]
+        },
+        "datasource": {
+          "type": "grafana-postgresql-datasource",
+          "uid": "castdb-datasource"
+        },
+        "definition": "SELECT pm.reference_uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.reference_probe_metadata pm WHERE pm.vendor = 'NTP' \nUNION  ALL\nSELECT r.uuid::text AS __value, rt.\"name\" AS __text FROM castdb.reference r JOIN castdb.reference_type rt ON r.reference_type_uuid = rt.\"uuid\" WHERE rt.\"name\" = 'UNKNOWN';",
+        "includeAll": true,
+        "multi": true,
+        "name": "ntp_reference",
+        "options": [],
+        "query": "SELECT pm.reference_uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS __text FROM castdb.reference_probe_metadata pm WHERE pm.vendor = 'NTP' \nUNION  ALL\nSELECT r.uuid::text AS __value, rt.\"name\" AS __text FROM castdb.reference r JOIN castdb.reference_type rt ON r.reference_type_uuid = rt.\"uuid\" WHERE rt.\"name\" = 'UNKNOWN';",
+        "refresh": 1,
+        "regex": "",
+        "sort": 1,
+        "type": "query"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "utc",
+  "title": "NTP probes (NTP server reference path)",
+  "uid": "ntp-opensampl",
+  "version": 17
+}
\ No newline at end of file

From cf9dc3227375f796268f12e17721ea7748c600bc Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Mon, 20 Apr 2026 15:41:29 -0400
Subject: [PATCH 20/22] adding initial pytest-postgresql functionality

---
 pyproject.toml                     |   4 +-
 tests/integration/__init__.py      |   0
 tests/integration/conftest.py      | 332 +++++++++++++++++++++++++++++
 tests/integration/test_db_setup.py | 106 +++++++++
 4 files changed, 441 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration/__init__.py
 create mode 100644 tests/integration/conftest.py
 create mode 100644 tests/integration/test_db_setup.py

diff --git a/pyproject.toml b/pyproject.toml
index b5a4a63..ef33b97 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,6 +100,8 @@ dev = [
     "mkdocs-gen-files",
     "mkdocs-material",
     "mkdocs-click",
+    "psycopg[binary]",
+    "pytest-postgresql"
 ]
 
 [tool.hatch.build.targets.sdist]
@@ -158,4 +160,4 @@ max-args = 10
 quote-style = "double"
 indent-style = "space"
 skip-magic-trailing-comma = false
-docstring-code-format = true
+docstring-code-format = true
\ No newline at end of file
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
new file mode 100644
index 0000000..1ccf890
--- /dev/null
+++ b/tests/integration/conftest.py
@@ -0,0 +1,332 @@
+"""
+tests/conftest.py
+
+Shared pytest fixtures for openSAMPL integration tests.
+
+Prerequisites
+-------------
+- PostgreSQL with PostGIS extension available
+- pytest-postgresql installed:
+    uv add --group dev pytest-postgresql
+- On macOS (Homebrew):
+    brew install postgresql postgis
+
+pytest-postgresql will locate pg_ctl automatically from your PATH. If you
+have multiple Postgres versions installed, point it at the right one via
+pytest.ini or pyproject.toml:
+
+    [tool.pytest.ini_options]
+    postgresql_exec = "/opt/homebrew/opt/postgresql@16/bin/pg_ctl"
+"""
+
+import pytest
+from pytest_postgresql import factories as pg_factories
+from sqlalchemy import create_engine, text
+from sqlalchemy.orm import Session, sessionmaker
+
+from opensampl.db.orm import Base
+from opensampl.db.orm import Defaults as DBDefaults
+from opensampl.db.orm import MetricType as DBMetricType
+from opensampl.db.orm import Reference as DBReference
+from opensampl.db.orm import ReferenceType as DBReferenceType
+from opensampl.metrics import METRICS, MetricType
+from opensampl.references import REF_TYPES, ReferenceType
+
+
+# ---------------------------------------------------------------------------
+# pytest-postgresql process fixture
+#
+# postgresql_proc manages the Postgres server lifetime (session-scoped).
+# We deliberately avoid the postgresql connection fixture so we have no
+# dependency on a specific psycopg version — the project already has
+# psycopg2-binary, and SQLAlchemy handles the connection from here.
+# ---------------------------------------------------------------------------
+
+postgresql_proc = pg_factories.postgresql_proc()
+
+
+# ---------------------------------------------------------------------------
+# Helpers: introspect METRICS / REF_TYPES the same way VENDORS.all() does
+# ---------------------------------------------------------------------------
+
+def _all_metrics() -> list[MetricType]:
+    """All MetricType instances defined on the METRICS class."""
+    return [v for v in METRICS.__dict__.values() if isinstance(v, MetricType)]
+
+
+def _all_ref_types() -> list[ReferenceType]:
+    """All ReferenceType instances defined on REF_TYPES (includes CompoundReferenceType)."""
+    return [v for v in REF_TYPES.__dict__.values() if isinstance(v, ReferenceType)]
+
+
+# ---------------------------------------------------------------------------
+# Session-scoped engine
+# Schema, tables, seed data, and the get_default_uuid_for stub are all
+# created once per test session.
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(scope="session")
+def db_engine(postgresql_proc):
+    """
+    Session-scoped SQLAlchemy engine pointed at the pytest-postgresql instance.
+
+    Connects using psycopg2-binary (already a project dependency) so there is
+    no dependency on psycopg3.  Creates the opensampl_test database on first
+    run via a temporary autocommit connection to the default 'postgres' database.
+
+    Lifecycle:
+        1. Create the opensampl_test database.
+        2. Install PostGIS and create the castdb schema.
+        3. Create all ORM tables via Base.metadata.create_all().
+        4. Seed metric_type, reference_type, reference, and defaults tables.
+        5. Install the get_default_uuid_for() PL/pgSQL stub.
+    """
+    # postgresql_proc exposes plain attributes — no psycopg version dependency
+    host = postgresql_proc.host
+    port = postgresql_proc.port
+    user = postgresql_proc.user
+    test_dbname = "opensampl_test"
+
+    # Connect to the default 'postgres' db to create our test database.
+    # Must use isolation_level=AUTOCOMMIT because CREATE DATABASE cannot run
+    # inside a transaction block.
+    bootstrap_url = f"postgresql+psycopg2://{user}@{host}:{port}/postgres"
+    bootstrap_engine = create_engine(bootstrap_url, isolation_level="AUTOCOMMIT")
+    with bootstrap_engine.connect() as conn:
+        exists = conn.execute(
+            text("SELECT 1 FROM pg_database WHERE datname = :dbname"),
+            {"dbname": test_dbname},
+        ).fetchone()
+        if not exists:
+            conn.execute(text(f'CREATE DATABASE "{test_dbname}"'))
+    bootstrap_engine.dispose()
+
+    db_url = f"postgresql+psycopg2://{user}@{host}:{port}/{test_dbname}"
+    engine = create_engine(db_url, echo=False)
+
+    with engine.begin() as conn:
+        # PostGIS is required by the Locations.geom column (GeoAlchemy2)
+        conn.execute(text("CREATE EXTENSION IF NOT EXISTS postgis"))
+        conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {Base.metadata.schema}"))
+
+    Base.metadata.create_all(engine)
+    _seed_lookup_tables(engine)
+    _install_default_uuid_stub(engine)
+
+    yield engine
+
+    engine.dispose()
+
+
+# ---------------------------------------------------------------------------
+# Seeding helpers (called once from db_engine, not exposed as fixtures)
+# ---------------------------------------------------------------------------
+
+def _seed_lookup_tables(engine) -> None:
+    """
+    Populate metric_type, reference_type, reference, and defaults tables.
+
+    Reads directly from the METRICS and REF_TYPES Python definitions so the
+    test DB always matches what the application expects — no hardcoded values.
+    After inserting the lookup rows, seeds the defaults table with the UUIDs
+    of the UNKNOWN rows, mirroring how production initialises get_default_uuid_for().
+    """
+    SessionLocal = sessionmaker(bind=engine)
+    session = SessionLocal()
+
+    try:
+        # --- metric_type ---
+        for metric in _all_metrics():
+            data = metric.model_dump()  # value_type serialised to str by field_serializer
+            if not session.query(DBMetricType).filter_by(name=data["name"]).first():
+                session.add(DBMetricType(**data))
+
+        # --- reference_type ---
+        # CompoundReferenceType.model_dump() includes reference_table; the column is nullable so plain
+        # ReferenceType rows (no reference_table) are stored with NULL, which is correct.
+        for ref_type in _all_ref_types():
+            data = ref_type.model_dump()
+            if not session.query(DBReferenceType).filter_by(name=data["name"]).first():
+                session.add(DBReferenceType(**data))
+
+        session.flush()
+
+        # --- reference: one default UNKNOWN row --------------------------
+        # get_default_uuid_for('reference') needs at least one reference row to
+        # point at.  We use the UNKNOWN reference type with no compound target.
+        unknown_ref_type = session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one()
+        default_reference = session.query(DBReference).filter_by(
+            reference_type_uuid=unknown_ref_type.uuid,
+            compound_reference_uuid=None,
+        ).first()
+        if not default_reference:
+            default_reference = DBReference(
+                reference_type_uuid=unknown_ref_type.uuid,
+                compound_reference_uuid=None,
+            )
+            session.add(default_reference)
+
+        session.flush()
+
+        # --- defaults table ---------------------------------------------
+        # Maps table/category names to the UUID that get_default_uuid_for()
+        # should return.  Mirrors what the production TimescaleDB init does.
+        unknown_metric = session.query(DBMetricType).filter_by(name=METRICS.UNKNOWN.name).one()
+
+        for table_name, uuid_value in [
+            ("metric_type", unknown_metric.uuid),
+            ("reference", default_reference.uuid),
+        ]:
+            if not session.query(DBDefaults).filter_by(table_name=table_name).first():
+                session.add(DBDefaults(table_name=table_name, uuid=uuid_value))
+
+        session.commit()
+
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+
+
+def _install_default_uuid_stub(engine) -> None:
+    """
+    Install the get_default_uuid_for() PL/pgSQL stub.
+
+    Rather than hardcoding UUIDs, the stub queries the defaults table — the
+    same approach the production TimescaleDB function uses.  This means it
+    automatically returns whatever was seeded above.
+    """
+    schema = Base.metadata.schema
+
+    with engine.begin() as conn:
+        conn.execute(text(f"""
+            CREATE OR REPLACE FUNCTION get_default_uuid_for(entity_type TEXT)
+            RETURNS TEXT AS $$
+            DECLARE
+                result_uuid TEXT;
+            BEGIN
+                SELECT uuid
+                INTO result_uuid
+                FROM {schema}.defaults
+                WHERE table_name = entity_type;
+
+                RETURN result_uuid;
+            END;
+            $$ LANGUAGE plpgsql;
+        """))
+
+
+# ---------------------------------------------------------------------------
+# Per-test session — savepoint rollback keeps tests isolated
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def db_session(db_engine) -> Session:
+    """
+    Function-scoped database session backed by a savepoint.
+
+    Every test starts with the full seeded dataset intact.  Any rows inserted
+    or updated during the test are rolled back when the test ends without
+    disturbing the seeded rows, and without the cost of recreating the schema.
+
+    Usage::
+
+        def test_something(db_session):
+            factory = TableFactory("locations", db_session)
+            factory.write({"name": "test-loc", "lat": 35.9, "lon": -84.3})
+            result = db_session.query(Locations).filter_by(name="test-loc").one()
+            assert result.name == "test-loc"
+            # row is gone after the test
+    """
+    connection = db_engine.connect()
+    outer_transaction = connection.begin()
+    session = Session(bind=connection)
+    session.begin_nested()  # SAVEPOINT — inner rollback target
+
+    yield session
+
+    session.close()
+    outer_transaction.rollback()  # wipes everything written during the test
+    connection.close()
+
+
+# ---------------------------------------------------------------------------
+# Seeded UUIDs — expose the canonical lookup UUIDs tests may need directly
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(scope="session")
+def seeded_uuids(db_engine) -> dict:
+    """
+    Session-scoped dict of UUIDs inserted during seed, keyed by logical name.
+
+    Useful when a test needs to construct ORM objects by hand (e.g. ProbeData)
+    and must reference real FK values.
+
+    Keys
+    ----
+    metric_type.<MetricType.name>   — UUID from the metric_type table
+    reference_type.<ReferenceType.name> — UUID from the reference_type table
+    reference.unknown               — UUID of the default UNKNOWN reference row
+    default.metric_type             — what get_default_uuid_for('metric_type') returns
+    default.reference               — what get_default_uuid_for('reference') returns
+
+    Example::
+
+        def test_probe_data(db_session, seeded_uuids):
+            phase_offset_uuid = seeded_uuids["metric_type.Phase Offset"]
+    """
+    SessionLocal = sessionmaker(bind=db_engine)
+    session = SessionLocal()
+
+    try:
+        uuids: dict = {}
+
+        for row in session.query(DBMetricType).all():
+            uuids[f"metric_type.{row.name}"] = row.uuid
+
+        for row in session.query(DBReferenceType).all():
+            uuids[f"reference_type.{row.name}"] = row.uuid
+
+        unknown_ref_type = session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one()
+        unknown_ref = session.query(DBReference).filter_by(
+            reference_type_uuid=unknown_ref_type.uuid,
+            compound_reference_uuid=None,
+        ).one()
+        uuids["reference.unknown"] = unknown_ref.uuid
+
+        for row in session.query(DBDefaults).all():
+            uuids[f"default.{row.table_name}"] = row.uuid
+
+        return uuids
+
+    finally:
+        session.close()
+
+
+# ---------------------------------------------------------------------------
+# Routing environment — patch BaseConfig for @route-decorated functions
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def db_env(db_engine, monkeypatch) -> None:
+    """
+    Set env vars so that BaseConfig routes directly to the test DB.
+
+    Any test that calls a @route-decorated function (write_to_table,
+    load_time_data, load_probe_metadata, create_new_tables) must include
+    this fixture.  Pass session=db_session explicitly so the route wrapper
+    uses your test session rather than opening a new one.
+
+    Usage::
+
+        def test_write_to_table(db_env, db_session):
+            write_to_table(
+                "locations",
+                {"name": "test-loc", "lat": 35.9, "lon": -84.3},
+                session=db_session,
+            )
+    """
+    monkeypatch.setenv("ROUTE_TO_BACKEND", "false")
+    monkeypatch.setenv("DATABASE_URL", str(db_engine.url))
+    monkeypatch.delenv("BACKEND_URL", raising=False)
\ No newline at end of file
diff --git a/tests/integration/test_db_setup.py b/tests/integration/test_db_setup.py
new file mode 100644
index 0000000..6f1f542
--- /dev/null
+++ b/tests/integration/test_db_setup.py
@@ -0,0 +1,106 @@
+"""
+tests/integration/test_db_setup.py
+
+Smoke tests to verify the test database spun up and seeded correctly.
+"""
+
+import pytest
+from sqlalchemy import text
+
+from opensampl.db.orm import Defaults as DBDefaults
+from opensampl.db.orm import MetricType as DBMetricType
+from opensampl.db.orm import Reference as DBReference
+from opensampl.db.orm import ReferenceType as DBReferenceType
+from opensampl.metrics import METRICS, MetricType
+from opensampl.references import REF_TYPES, ReferenceType
+
+
+def test_schema_exists(db_session):
+    """castdb schema was created."""
+    result = db_session.execute(
+        text("SELECT schema_name FROM information_schema.schemata WHERE schema_name = 'castdb'")
+    ).scalar()
+    assert result == "castdb"
+
+
+def test_postgis_installed(db_session):
+    """PostGIS extension is available (required for Locations.geom)."""
+    result = db_session.execute(text("SELECT extname FROM pg_extension WHERE extname = 'postgis'")).scalar()
+    assert result == "postgis"
+
+
+def test_all_metrics_seeded(db_session):
+    """Every MetricType defined on METRICS is present in the metric_type table."""
+    expected = {v.name for v in METRICS.__dict__.values() if isinstance(v, MetricType)}
+    seeded = {row.name for row in db_session.query(DBMetricType).all()}
+    assert expected == seeded
+
+
+def test_all_reference_types_seeded(db_session):
+    """Every ReferenceType defined on REF_TYPES is present in the reference_type table."""
+    expected = {v.name for v in REF_TYPES.__dict__.values() if isinstance(v, ReferenceType)}
+    seeded = {row.name for row in db_session.query(DBReferenceType).all()}
+    assert expected == seeded
+
+
+def test_default_reference_row_exists(db_session):
+    """A default UNKNOWN reference row exists for get_default_uuid_for('reference')."""
+    unknown_ref_type = db_session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one()
+    ref = db_session.query(DBReference).filter_by(
+        reference_type_uuid=unknown_ref_type.uuid,
+        compound_reference_uuid=None,
+    ).first()
+    assert ref is not None
+
+
+def test_defaults_table_seeded(db_session):
+    """defaults table has entries for both metric_type and reference."""
+    rows = {row.table_name for row in db_session.query(DBDefaults).all()}
+    assert "metric_type" in rows
+    assert "reference" in rows
+
+
+def test_get_default_uuid_for_metric_type(db_session):
+    """Stub function returns the UUID of the UNKNOWN metric type."""
+    result = db_session.execute(text("SELECT get_default_uuid_for('metric_type')")).scalar()
+    expected = db_session.query(DBMetricType.uuid).filter_by(name=METRICS.UNKNOWN.name).scalar()
+    assert result == expected
+
+
+def test_get_default_uuid_for_reference(db_session):
+    """Stub function returns the UUID of the default UNKNOWN reference row."""
+    result = db_session.execute(text("SELECT get_default_uuid_for('reference')")).scalar()
+    unknown_ref_type = db_session.query(DBReferenceType).filter_by(name=REF_TYPES.UNKNOWN.name).one()
+    expected = db_session.query(DBReference.uuid).filter_by(
+        reference_type_uuid=unknown_ref_type.uuid,
+        compound_reference_uuid=None,
+    ).scalar()
+    assert result == expected
+
+
+def test_seeded_uuids_fixture(seeded_uuids):
+    """seeded_uuids convenience fixture has the expected keys."""
+    assert f"metric_type.{METRICS.UNKNOWN.name}" in seeded_uuids
+    assert f"metric_type.{METRICS.PHASE_OFFSET.name}" in seeded_uuids
+    assert f"reference_type.{REF_TYPES.UNKNOWN.name}" in seeded_uuids
+    assert "reference.unknown" in seeded_uuids
+    assert "default.metric_type" in seeded_uuids
+    assert "default.reference" in seeded_uuids
+
+
+def test_session_rollback_isolation(db_session, db_engine):
+    """Writes in one session do not leak — the savepoint rolls back cleanly."""
+    from opensampl.db.orm import TestMetadata
+
+    db_session.add(TestMetadata(name="rollback-canary"))
+    db_session.flush()
+
+    # Row is visible within this session
+    assert db_session.query(TestMetadata).filter_by(name="rollback-canary").one()
+
+    # After the test ends the fixture rolls back, but we can verify the
+    # mechanism works by checking a fresh session sees nothing yet
+    from sqlalchemy.orm import Session
+    with Session(bind=db_engine) as fresh:
+        result = fresh.query(TestMetadata).filter_by(name="rollback-canary").first()
+        assert result is None, "Savepoint did not isolate the write from other sessions"
\ No newline at end of file

From 3ba95cb509f8a6dc96f89032e18c46a2fc769840 Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Mon, 20 Apr 2026 16:01:41 -0400
Subject: [PATCH 21/22] adding the other dash stuff

---
 .../public-timing-dashboard.json              | 65 ++++++++++++++-----
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json b/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json
index 687ceae..24b7a6f 100644
--- a/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json
+++ b/opensampl/server/grafana/grafana-dashboards/public-timing-dashboard.json
@@ -338,7 +338,7 @@
           "group": [],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "select\n    pm.name as \"Clock\",\n    l.name as \"Location Name\",\n    l.latitude,\n    l.longitude,\n    l.campus\nfrom castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\nwhere pm.uuid in (${clock_name:sqlstring});",
+          "rawSql": "select\n    pm.name as \"Clock\",\n    l.name as \"Location Name\",\n    l.latitude,\n    l.longitude,\n    l.campus\nfrom castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\nwhere (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')));",
           "refId": "ClockProbes",
           "select": [
             [
@@ -379,13 +379,13 @@
         {
           "datasource": {
             "type": "grafana-postgresql-datasource",
-            "uid": "P55EB97F79F5EB88E"
+            "uid": "castdb-datasource"
           },
           "editorMode": "code",
           "format": "table",
           "hide": false,
           "rawQuery": true,
-          "rawSql": "SELECT\n    l.latitude,\n    l.longitude,\n    l.campus,\n    sum(\n        CASE\n            when pm.public = True and pm.vendor in ('ADVA', 'MicrochipTP4100') then 1 else 0\n        end \n    ) as visible_clocks,\n    sum(\n        CASE\n            when pm.uuid in (${clock_name:sqlstring}) then 1 else 0\n    end \n    ) as selected_clocks\n    from castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\n    where l.public = True\n    group by\n    l.latitude, l.longitude, l.campus;",
+          "rawSql": "SELECT\n    l.latitude,\n    l.longitude,\n    l.campus,\n    sum(\n        CASE\n            when pm.public = True and pm.vendor in ('ADVA', 'MicrochipTP4100', 'NTP') then 1 else 0\n        end \n    ) as visible_clocks,\n    sum(\n        CASE\n            when (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ','))) then 1 else 0\n    end \n    ) as selected_clocks\n    from castdb.campus_locations l left join castdb.probe_metadata pm on l.uuid = pm.location_uuid\n    where l.public = True\n    group by\n    l.latitude, l.longitude, l.campus;",
           "refId": "A",
           "sql": {
             "columns": [
@@ -465,7 +465,7 @@
           },
           "format": "table",
           "rawQuery": true,
-          "rawSql": "SELECT COUNT(*) as \"Total Clock Probes\" FROM castdb.probe_metadata where uuid in ($clock_name)",
+          "rawSql": "SELECT COUNT(*)::bigint AS \"Total Clock Probes\" FROM castdb.probe_metadata pm WHERE pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP') AND coalesce(pm.public, true) AND (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))",
           "refId": "A"
         }
       ],
@@ -539,7 +539,7 @@
           "editorMode": "code",
           "format": "table",
           "rawQuery": true,
-          "rawSql": "SELECT \n  coalesce(pm.name, concat(pm.ip_address, 'Inteface', pm.probe_id)) as \"Clock Probe\", \n  COUNT(*) as \"Total Records\" \nFROM castdb.probe_data pd\njoin castdb.probe_metadata pm on pd.probe_uuid = pm.uuid \nwhere pm.uuid in (${clock_name:sqlstring})\nAND pd.\"time\" >= $__timeFrom()\nAND pd.\"time\" <= $__timeTo()\ngroup by pm.uuid, pm.name, pm.ip_address, pm.probe_id;",
+          "rawSql": "SELECT \n  coalesce(pm.name, concat(pm.ip_address, ' Interface ', pm.probe_id)) AS \"Clock Probe\", \n  COUNT(*)::bigint AS \"Total Records\" \nFROM castdb.probe_data pd\nJOIN castdb.probe_metadata pm ON pd.probe_uuid = pm.uuid \nWHERE pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n  AND coalesce(pm.public, true)\n  AND (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\n  AND pd.\"time\" >= $__timeFrom()\n  AND pd.\"time\" <= $__timeTo()\nGROUP BY pm.uuid, pm.name, pm.ip_address, pm.probe_id;",
           "refId": "A",
           "sql": {
             "columns": [
@@ -568,7 +568,7 @@
         "type": "grafana-postgresql-datasource",
         "uid": "castdb-datasource"
       },
-      "description": "Average time error \n(averaged on selected resolution)",
+      "description": "Average time error vs stored reference (resolution as selected). GNSS-specific labeling applies only when the probe/reference model is GNSS-backed.",
       "fieldConfig": {
         "defaults": {
           "color": {
@@ -650,7 +650,7 @@
           "editorMode": "code",
           "format": "time_series",
           "rawQuery": true,
-          "rawSql": "SELECT \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n  coalesce(pm.name, concat(pm.ip_address, ' Interface ', pm.probe_id)),\n  AVG(pd.value::FLOAT) * 1e9 AS value\nFROM castdb.probe_data pd join castdb.probe_metadata pm on pd.probe_uuid = pm.uuid\nWHERE\n  $__timeFilter(pd.time)\n  AND pd.probe_uuid IN (${clock_name:sqlstring})\nGROUP BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n  pd.probe_uuid,\n  pm.name,\n  pm.ip_address,\n  pm.probe_id\nORDER BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n",
+          "rawSql": "SELECT \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n  coalesce(pm.name, concat(pm.ip_address, ' Interface ', pm.probe_id)),\n  AVG(pd.value::FLOAT) * 1e9 AS value\nFROM castdb.probe_data pd JOIN castdb.probe_metadata pm ON pd.probe_uuid = pm.uuid\nWHERE\n  $__timeFilter(pd.time)\n  AND pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n  AND coalesce(pm.public, true)\n  AND (trim('${clock_name:csv}') = '' OR pd.probe_uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\nGROUP BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n  pd.probe_uuid,\n  pm.name,\n  pm.ip_address,\n  pm.probe_id\nORDER BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n",
           "refId": "A",
           "sql": {
             "columns": [
@@ -671,7 +671,7 @@
           }
         }
       ],
-      "title": "Time Error - Clock Time vs GNSS",
+      "title": "Time Error - Clock Time vs Reference",
       "transformations": [
         {
           "id": "prepareTimeSeries",
@@ -776,7 +776,7 @@
           "editorMode": "code",
           "format": "time_series",
           "rawQuery": true,
-          "rawSql": "SELECT \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n  COALESCE(name, CONCAT(ip_address, ' Interface ', probe_id)),\n  (MAX(pd.value::FLOAT) - MIN(pd.value::FLOAT)) * 1e9 AS value\nFROM castdb.probe_data pd join castdb.probe_metadata pm on pd.probe_uuid = pm.uuid\nWHERE\n  $__timeFilter(pd.time)\n  AND pd.probe_uuid in (${clock_name:sqlstring})\nGROUP BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n  pd.probe_uuid,\n  pm.name,\n  pm.ip_address,\n  pm.probe_id\nORDER BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n",
+          "rawSql": "SELECT \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC') AS time,\n  COALESCE(pm.name, CONCAT(pm.ip_address, ' Interface ', pm.probe_id)),\n  (MAX(pd.value::FLOAT) - MIN(pd.value::FLOAT)) * 1e9 AS value\nFROM castdb.probe_data pd JOIN castdb.probe_metadata pm ON pd.probe_uuid = pm.uuid\nWHERE\n  $__timeFilter(pd.time)\n  AND pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n  AND coalesce(pm.public, true)\n  AND (trim('${clock_name:csv}') = '' OR pd.probe_uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\nGROUP BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC'),\n  pd.probe_uuid,\n  pm.name,\n  pm.ip_address,\n  pm.probe_id\nORDER BY \n  time_bucket(${resolution:sqlstring}, pd.time AT TIME ZONE 'UTC')\n",
           "refId": "A",
           "sql": {
             "columns": [
@@ -797,7 +797,7 @@
           }
         }
       ],
-      "title": "Maximum Time Interval Error VS GNSS",
+      "title": "Maximum Time Interval Error vs Reference",
       "transformations": [
         {
           "id": "prepareTimeSeries",
@@ -834,7 +834,7 @@
         "type": "grafana-postgresql-datasource",
         "uid": "castdb-datasource"
       },
-      "description": "Average time error \n(averaged on selected resolution)",
+      "description": "Average time error vs stored reference (resolution as selected). GNSS-specific labeling applies only when the probe/reference model is GNSS-backed.",
       "fieldConfig": {
         "defaults": {
           "color": {
@@ -937,7 +937,7 @@
           }
         }
       ],
-      "title": "Time Error - Clock Time vs GNSS",
+      "title": "Time Error - Clock Time vs Reference",
       "type": "timeseries"
     },
     {
@@ -1048,14 +1048,44 @@
           }
         }
       ],
-      "title": "Maximum Time Interval Error",
+      "title": "Maximum Time Interval Error vs Reference",
       "type": "timeseries"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {"h": 1, "w": 24, "x": 0, "y": 29},
+      "id": 101,
+      "panels": [
+        {
+          "datasource": {"type": "grafana-postgresql-datasource", "uid": "castdb-datasource"},
+          "description": "Rows reflect stored `probe_metadata`, `ntp_metadata` (when vendor is NTP), `locations`, and one sample `reference`/`reference_type` from `probe_data` per probe.",
+          "fieldConfig": {"defaults": {}, "overrides": []},
+          "gridPos": {"h": 10, "w": 24, "x": 0, "y": 0},
+          "id": 100,
+          "options": {"cellHeight": "sm", "showHeader": true},
+          "pluginVersion": "12.0.0",
+          "targets": [
+            {
+              "datasource": {"type": "grafana-postgresql-datasource", "uid": "castdb-datasource"},
+              "editorMode": "code",
+              "format": "table",
+              "rawQuery": true,
+              "rawSql": "SELECT\n  COALESCE(pm.name, CONCAT(pm.ip_address, ' ', pm.probe_id)) AS probe,\n  pm.vendor,\n  COALESCE(rt.name, '') AS reference_type,\n  COALESCE(nm.target_host::text, '') AS ntp_server,\n  COALESCE(nm.mode::text, '') AS ntp_mode,\n  COALESCE(nm.reference_id::text, '') AS ntp_ref_id,\n  COALESCE(l.name, '') AS location,\n  COALESCE(pm.public::text, '') AS public\nFROM castdb.probe_metadata pm\nLEFT JOIN castdb.ntp_metadata nm ON nm.probe_uuid = pm.uuid\nLEFT JOIN castdb.locations l ON l.uuid = pm.location_uuid\nLEFT JOIN LATERAL (\n  SELECT pd.reference_uuid FROM castdb.probe_data pd WHERE pd.probe_uuid = pm.uuid LIMIT 1\n) rp ON true\nLEFT JOIN castdb.reference r ON r.uuid = rp.reference_uuid\nLEFT JOIN castdb.reference_type rt ON rt.uuid = r.reference_type_uuid\nWHERE pm.vendor IN ('ADVA', 'MicrochipTP4100', 'NTP')\n  AND coalesce(pm.public, true)\n  AND (trim('${clock_name:csv}') = '' OR pm.uuid = ANY(string_to_array(trim('${clock_name:csv}'), ',')))\nORDER BY 1;",
+              "refId": "A"
+            }
+          ],
+          "title": "Probe reference & source (stored metadata)",
+          "type": "table"
+        }
+      ],
+      "title": "Reference & source metadata",
+      "type": "row"
     }
   ],
   "preload": false,
   "refresh": "",
   "schemaVersion": 41,
-  "tags": [],
+  "tags": ["opensampl", "reference", "geospatial"],
   "templating": {
     "list": [
       {
@@ -1067,12 +1097,12 @@
           "type": "grafana-postgresql-datasource",
           "uid": "castdb-datasource"
         },
-        "definition": "SELECT uuid AS __value, COALESCE(name, CONCAT(ip_address, ' Interface ', probe_id)) AS __text FROM castdb.probe_metadata WHERE vendor in ('ADVA', 'MicrochipTP4100') and public;",
+        "definition": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' Interface ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor in ('ADVA', 'MicrochipTP4100', 'NTP') AND coalesce(pm.public, true) ORDER BY 2;",
         "includeAll": true,
         "multi": true,
         "name": "clock_name",
         "options": [],
-        "query": "SELECT uuid AS __value, COALESCE(name, CONCAT(ip_address, ' Interface ', probe_id)) AS __text FROM castdb.probe_metadata WHERE vendor in ('ADVA', 'MicrochipTP4100') and public;",
+        "query": "SELECT pm.uuid::text AS __value, COALESCE(pm.name, CONCAT(pm.ip_address, ' Interface ', pm.probe_id)) AS __text FROM castdb.probe_metadata pm WHERE pm.vendor in ('ADVA', 'MicrochipTP4100', 'NTP') AND coalesce(pm.public, true) ORDER BY 2;",
         "refresh": 1,
         "regex": "",
         "type": "query"
@@ -1130,7 +1160,8 @@
   },
   "timepicker": {},
   "timezone": "utc",
-  "title": "Public Geospatial and Timing Combined Dashboard",
+  "description": "Geospatial views use stored `locations` geometry. Timing series are relative to each probe\u2019s stored reference (OpenSAMPL `reference` / `reference_type`) and are **not** GNSS-truth unless a GNSS-backed probe supplies that semantics.",
+  "title": "Public Geospatial and Timing (Reference)",
   "uid": "public-geospatial-dashboard",
   "version": 10
 }
\ No newline at end of file

From 03ca8a095d9530088ca4446e257acf5e08f6b51f Mon Sep 17 00:00:00 2001
From: "MacFarland, Midgie" <macfarlandmj@ornl.gov>
Date: Wed, 22 Apr 2026 08:33:56 -0400
Subject: [PATCH 22/22] small tweak

---
 opensampl/mixins/collect.py | 2 +-
 opensampl/vendors/ntp.py    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/opensampl/mixins/collect.py b/opensampl/mixins/collect.py
index ed48d93..9606b9e 100644
--- a/opensampl/mixins/collect.py
+++ b/opensampl/mixins/collect.py
@@ -144,7 +144,7 @@ def _collect_and_save(cls, collect_config: CollectConfig) -> None:
     @classmethod
     def filter_files(cls, files: list[Path]) -> list[Path]:
         """Filter the files found in the input directory when loading this vendor's data files"""
-        return [f for f in files if f.name.startswith(f"{cls.vendor.parser_class}_") and f.stem == ".txt"]
+        return [f for f in files if f.name.startswith(f"{cls.vendor.parser_class}_") and f.suffix == ".txt"]
 
     @classmethod
     def load_metadata(cls, probe_key: ProbeKey, metadata: dict) -> None:
diff --git a/opensampl/vendors/ntp.py b/opensampl/vendors/ntp.py
index 57f835b..0b28fca 100644
--- a/opensampl/vendors/ntp.py
+++ b/opensampl/vendors/ntp.py
@@ -510,8 +510,8 @@ class CollectConfig(CollectMixin.CollectConfig):
         ip_address: str = "127.0.0.1"
         port: int | None = None
         mode: Literal["remote", "local"] = "local"
-        interval: float = 0.0
-        duration: int = 1
+        interval: float = Field(0.0, ge=0.0)
+        duration: int = Field(1, ge=1)
         timeout: float = 3.0
         collection_ip: str = Field(default_factory=collect_ip_factory)
         collection_id: str = Field(default_factory=collect_id_factory)
@@ -540,7 +540,7 @@ class RandomDataConfig(RandomDataMixin.RandomDataConfig):
             description="random.uniform(-1e-12, 1e-12)",
         )
 
-    def __init__(self, input_file: str):
+    def __init__(self, input_file: str, ):
         """Initialize NtpProbe from input file"""
         super().__init__(input_file)
         self.collection_probe = None