diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java index 23c6211a38d..255930d5b90 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java @@ -62,6 +62,7 @@ import org.apache.accumulo.monitor.next.SystemInformation.CompactionTableSummary; import org.apache.accumulo.monitor.next.SystemInformation.FateTransaction; import org.apache.accumulo.monitor.next.SystemInformation.FetchCycleTimes; +import org.apache.accumulo.monitor.next.SystemInformation.InstanceOverview; import org.apache.accumulo.monitor.next.SystemInformation.RecoveryInformation; import org.apache.accumulo.monitor.next.SystemInformation.Scan; import org.apache.accumulo.monitor.next.SystemInformation.TableSummary; @@ -188,7 +189,7 @@ public MonitorStatus getStatus() { } @GET - @Path("instance") + @Path("instance/info") @Produces(MediaType.APPLICATION_JSON) @Description("Returns the instance name, instance id, version, zookeepers, and volumes") public InstanceSummary getInstanceSummary() { @@ -200,6 +201,14 @@ public InstanceSummary getInstanceSummary() { Constants.VERSION); } + @GET + @Path("instance/overview") + @Produces(MediaType.APPLICATION_JSON) + @Description("Returns an overview of instance level activity") + public InstanceOverview getInstanceOverview() { + return monitor.getInformationFetcher().getSummaryForEndpoint().getInstanceOverview(); + } + @GET @Path("compactors/detail/{" + GROUP_PARAM_KEY + "}") @Produces(MediaType.APPLICATION_JSON) diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java index efd7ab05908..0f2006fd6c0 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java @@ -55,6 +55,8 @@ import java.util.stream.Stream; import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.admin.TabletAvailability; import org.apache.accumulo.core.client.admin.TabletInformation; @@ -111,6 +113,159 @@ public class SystemInformation { + public static class InstanceOverview { + private final AtomicLong numKVs = new AtomicLong(0); + private final AtomicLong numFiles = new AtomicLong(0); + private final AtomicLong numTables = new AtomicLong(0); + private final AtomicLong numNamespaces = new AtomicLong(0); + private final AtomicLong numTablets = new AtomicLong(0); + private final AtomicLong totalFileSize = new AtomicLong(0); + private final AtomicLong tabletsAssignedToDeadTServers = new AtomicLong(0); + private final AtomicLong totalSuspendedTablets = new AtomicLong(0); + private final AtomicLong tabletsNeedingRecovery = new AtomicLong(0); + private final AtomicLong compactionsQueued = new AtomicLong(0); + private final AtomicLong compactionsDequeued = new AtomicLong(0); + private final AtomicLong compactionsRunning = new AtomicLong(0); + private final AtomicLong compactionsFailed = new AtomicLong(0); + private final AtomicLong scansTotalInProgress = new AtomicLong(0); + private final AtomicLong scanTotalOpenFiles = new AtomicLong(0); + private final AtomicLong scansTotalKvScanned = new AtomicLong(0); + private final AtomicLong scansTotalKvReturned = new AtomicLong(0); + private final AtomicLong scansTotalKvReturnedBytes = new AtomicLong(0); + private final AtomicLong ingestTotalEntries = new AtomicLong(0); + private final AtomicLong ingestTotalEntriesBytes = new AtomicLong(0); + private final AtomicLong ingestNumTServersHolding = new AtomicLong(0); + private final AtomicLong ingestTotalEntriesInMem = new AtomicLong(0); + private final AtomicLong ingestBulkImportQueued = new AtomicLong(0); + private final AtomicLong ingestBulkImportRunning = new AtomicLong(0); + private final AtomicLong totalMinCQueued = new AtomicLong(0); + private final AtomicLong totalMinCRunning = new AtomicLong(0); + private final AtomicLong totalMinCCompleted = new AtomicLong(0); + private final AtomicLong totalFateSubmitted = new AtomicLong(0); + private final AtomicLong totalFateRunning = new AtomicLong(0); + private final AtomicLong totalServersLowMem = new AtomicLong(0); + + public AtomicLong getNumKVs() { + return numKVs; + } + + public AtomicLong getNumFiles() { + return numFiles; + } + + public AtomicLong getNumTablets() { + return numTablets; + } + + public AtomicLong getNumTables() { + return numTables; + } + + public AtomicLong getNumNamespaces() { + return numNamespaces; + } + + public AtomicLong getTotalFileSize() { + return totalFileSize; + } + + public AtomicLong getTabletsAssignedToDeadTServers() { + return tabletsAssignedToDeadTServers; + } + + public AtomicLong getTotalSuspendedTablets() { + return totalSuspendedTablets; + } + + public AtomicLong getTabletsNeedingRecovery() { + return tabletsNeedingRecovery; + } + + public AtomicLong getCompactionsQueued() { + return compactionsQueued; + } + + public AtomicLong getCompactionsDequeued() { + return compactionsDequeued; + } + + public AtomicLong getCompactionsRunning() { + return compactionsRunning; + } + + public AtomicLong getCompactionsFailed() { + return compactionsFailed; + } + + public AtomicLong getScansTotalInProgress() { + return scansTotalInProgress; + } + + public AtomicLong getScanTotalOpenFiles() { + return scanTotalOpenFiles; + } + + public AtomicLong getScansTotalKvScanned() { + return scansTotalKvScanned; + } + + public AtomicLong getScansTotalKvReturned() { + return scansTotalKvReturned; + } + + public AtomicLong getScansTotalKvReturnedBytes() { + return scansTotalKvReturnedBytes; + } + + public AtomicLong getIngestTotalEntries() { + return ingestTotalEntries; + } + + public AtomicLong getIngestTotalEntriesBytes() { + return ingestTotalEntriesBytes; + } + + public AtomicLong getIngestNumTServersHolding() { + return ingestNumTServersHolding; + } + + public AtomicLong getIngestTotalEntriesInMem() { + return ingestTotalEntriesInMem; + } + + public AtomicLong getIngestBulkImportQueued() { + return ingestBulkImportQueued; + } + + public AtomicLong getIngestBulkImportRunning() { + return ingestBulkImportRunning; + } + + public AtomicLong getTotalMinCQueued() { + return totalMinCQueued; + } + + public AtomicLong getTotalMinCRunning() { + return totalMinCRunning; + } + + public AtomicLong getTotalMinCCompleted() { + return totalMinCCompleted; + } + + public AtomicLong getTotalFateSubmitted() { + return totalFateSubmitted; + } + + public AtomicLong getTotalFateRunning() { + return totalFateRunning; + } + + public AtomicLong getTotalServersLowMem() { + return totalServersLowMem; + } + } + public static class ObfuscatedTabletId extends TabletIdImpl { public ObfuscatedTabletId(KeyExtent ke) { @@ -261,12 +416,16 @@ public String getTableName() { return tableName; } - public void addTablet(TabletInformation info) { + public void addTablet(TabletInformation info, InstanceOverview io) { totalEntries.addAndGet(info.getEstimatedEntries()); + io.getNumKVs().addAndGet(info.getEstimatedEntries()); totalSizeOnDisk.addAndGet(info.getEstimatedSize()); + io.getTotalFileSize().addAndGet(info.getEstimatedSize()); totalFiles.addAndGet(info.getNumFiles()); + io.getNumFiles().addAndGet(info.getNumFiles()); totalWals.addAndGet(info.getNumWalLogs()); totalTablets.addAndGet(1); + io.getNumTablets().incrementAndGet(); switch (info.getTabletAvailability()) { case HOSTED: availableAlways.addAndGet(1); @@ -288,12 +447,14 @@ public void addTablet(TabletInformation info) { break; case ASSIGNED_TO_DEAD_SERVER: totalAssignedToDeadServerTablets.addAndGet(1); + io.getTabletsAssignedToDeadTServers().incrementAndGet(); break; case HOSTED: totalHostedTablets.addAndGet(1); break; case SUSPENDED: totalSuspendedTablets.addAndGet(1); + io.getTotalSuspendedTablets().incrementAndGet(); break; case UNASSIGNED: totalUnassignedTablets.addAndGet(1); @@ -559,6 +720,8 @@ public record FetchCycleTimes(long durationMs, long finishTime) { private final int rgLongRunningCompactionSize; private FetchCycleTimes timing = null; + private final InstanceOverview instanceOverview = new InstanceOverview(); + public SystemInformation(Cache allMetrics, ServerContext ctx) { this.allMetrics = allMetrics; this.ctx = ctx; @@ -827,13 +990,23 @@ public void processResponse(final ServerId server, final MetricResponse response for (ByteBuffer binary : response.getMetrics()) { flatbuffer = FMetric.getRootAsFMetric(binary, flatbuffer); final String metricName = flatbuffer.name(); - if (metricName.equals(Metric.COMPACTOR_MAJC_FAILURES_CONSECUTIVE.getName())) { + if (metricName.equals(Metric.COMPACTOR_MAJC_IN_PROGRESS.getName())) { + long running = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getCompactionsRunning().addAndGet(running); + } else if (metricName.equals(Metric.COMPACTOR_MAJC_FAILED.getName())) { + long failed = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getCompactionsFailed().addAndGet(failed); + } else if (metricName.equals(Metric.LOW_MEMORY.getName())) { + long lowMem = getMetricValue(flatbuffer).longValue(); + if (lowMem > 0) { + this.instanceOverview.getTotalServersLowMem().incrementAndGet(); + } + } else if (metricName.equals(Metric.COMPACTOR_MAJC_FAILURES_CONSECUTIVE.getName())) { boolean failures = getMetricValue(flatbuffer).longValue() > 0; if (failures) { addAlert(Info, Resource, "Compactor has had " + failures + " consecutive failures: " + server.toString()); } - break; } } break; @@ -841,6 +1014,17 @@ public void processResponse(final ServerId server, final MetricResponse response if (gc.get() == null || !gc.get().equals(server)) { gc.set(server); } + for (ByteBuffer binary : response.getMetrics()) { + flatbuffer = FMetric.getRootAsFMetric(binary, flatbuffer); + final String metricName = flatbuffer.name(); + if (metricName.equals(Metric.LOW_MEMORY.getName())) { + long lowMem = getMetricValue(flatbuffer).longValue(); + if (lowMem > 0) { + this.instanceOverview.getTotalServersLowMem().incrementAndGet(); + } + break; + } + } break; case MANAGER: managers.add(server); @@ -851,11 +1035,13 @@ public void processResponse(final ServerId server, final MetricResponse response boolean recovering = getMetricValue(flatbuffer).longValue() > 0; this.recoveries.getOverview().setRootTabletRecovering(recovering); if (recovering) { + this.instanceOverview.getTabletsNeedingRecovery().incrementAndGet(); addAlert(Critical, Table, "The root table requires recovery"); } } else if (metricName.equals(Metric.MANAGER_META_TGW_RECOVERY.getName())) { long tablets = getMetricValue(flatbuffer).longValue(); this.recoveries.getOverview().setMetadataTabletsRecovering(tablets); + this.instanceOverview.getTabletsNeedingRecovery().addAndGet(tablets); if (tablets > 0) { addAlert(Critical, Table, "At least " + tablets + " metadata table tablets require recovery"); @@ -863,6 +1049,7 @@ public void processResponse(final ServerId server, final MetricResponse response } else if (metricName.equals(Metric.MANAGER_USER_TGW_RECOVERY.getName())) { long tablets = getMetricValue(flatbuffer).longValue(); this.recoveries.getOverview().setUserTabletsRecovering(tablets); + this.instanceOverview.getTabletsNeedingRecovery().addAndGet(tablets); if (tablets > 0) { addAlert(High, Table, "At least " + tablets + " user table tablets require recovery"); } @@ -874,6 +1061,18 @@ public void processResponse(final ServerId server, final MetricResponse response addAlert(Critical, Configuration, "A compaction service configuration is invalid. Check the Manager log."); } + } else if (metricName.equals(Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_QUEUED.getName())) { + long queued = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getCompactionsQueued().addAndGet(queued); + } else if (metricName + .equals(Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_DEQUEUED.getName())) { + long dequeued = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getCompactionsDequeued().addAndGet(dequeued); + } else if (metricName.equals(Metric.LOW_MEMORY.getName())) { + long lowMem = getMetricValue(flatbuffer).longValue(); + if (lowMem > 0) { + this.instanceOverview.getTotalServersLowMem().incrementAndGet(); + } } } break; @@ -881,11 +1080,78 @@ public void processResponse(final ServerId server, final MetricResponse response sservers.computeIfAbsent(response.getResourceGroup(), (rg) -> ConcurrentHashMap.newKeySet()) .add(server); updateAggregates(response, totalSServerMetrics, rgSServerMetrics); + for (ByteBuffer binary : response.getMetrics()) { + flatbuffer = FMetric.getRootAsFMetric(binary, flatbuffer); + final String metricName = flatbuffer.name(); + if (metricName.equals(Metric.SCAN_OPEN_FILES.getName())) { + long files = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScanTotalOpenFiles().addAndGet(files); + } else if (metricName.equals(Metric.SCAN_SCANNED_ENTRIES.getName())) { + long scanned = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScansTotalKvScanned().addAndGet(scanned); + } else if (metricName.equals(Metric.SCAN_QUERY_SCAN_RESULTS.getName())) { + long results = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScansTotalKvReturned().addAndGet(results); + } else if (metricName.equals(Metric.SCAN_QUERY_SCAN_RESULTS_BYTES.getName())) { + long resultSize = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScansTotalKvReturnedBytes().addAndGet(resultSize); + } else if (metricName.equals(Metric.LOW_MEMORY.getName())) { + long lowMem = getMetricValue(flatbuffer).longValue(); + if (lowMem > 0) { + this.instanceOverview.getTotalServersLowMem().incrementAndGet(); + } + } + } break; case TABLET_SERVER: tservers.computeIfAbsent(response.getResourceGroup(), (rg) -> ConcurrentHashMap.newKeySet()) .add(server); updateAggregates(response, totalTServerMetrics, rgTServerMetrics); + for (ByteBuffer binary : response.getMetrics()) { + flatbuffer = FMetric.getRootAsFMetric(binary, flatbuffer); + final String metricName = flatbuffer.name(); + if (metricName.equals(Metric.SCAN_OPEN_FILES.getName())) { + long files = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScanTotalOpenFiles().addAndGet(files); + } else if (metricName.equals(Metric.SCAN_SCANNED_ENTRIES.getName())) { + long scanned = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScansTotalKvScanned().addAndGet(scanned); + } else if (metricName.equals(Metric.SCAN_QUERY_SCAN_RESULTS.getName())) { + long results = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScansTotalKvReturned().addAndGet(results); + } else if (metricName.equals(Metric.SCAN_QUERY_SCAN_RESULTS_BYTES.getName())) { + long resultSize = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getScansTotalKvReturnedBytes().addAndGet(resultSize); + } else if (metricName.equals(Metric.TSERVER_HOLD.getName())) { + long held = getMetricValue(flatbuffer).longValue(); + if (held > 0) { + this.instanceOverview.ingestNumTServersHolding.incrementAndGet(); + } + } else if (metricName.equals(Metric.TSERVER_INGEST_ENTRIES.getName())) { + long ingestEntries = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getIngestTotalEntries().addAndGet(ingestEntries); + } else if (metricName.equals(Metric.TSERVER_INGEST_BYTES.getName())) { + long ingestEntriesBytes = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getIngestTotalEntriesBytes().addAndGet(ingestEntriesBytes); + } else if (metricName.equals(Metric.TSERVER_MEM_ENTRIES.getName())) { + long entriesInMem = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getIngestTotalEntriesInMem().addAndGet(entriesInMem); + } else if (metricName.equals(Metric.TSERVER_MINC_QUEUED.getName())) { + long mincQueued = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getTotalMinCQueued().addAndGet(mincQueued); + } else if (metricName.equals(Metric.TSERVER_MINC_RUNNING.getName())) { + long mincRunning = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getTotalMinCRunning().addAndGet(mincRunning); + } else if (metricName.equals(Metric.TSERVER_MINC_TOTAL.getName())) { + long mincCompleted = getMetricValue(flatbuffer).longValue(); + this.instanceOverview.getTotalMinCCompleted().addAndGet(mincCompleted); + } else if (metricName.equals(Metric.LOW_MEMORY.getName())) { + long lowMem = getMetricValue(flatbuffer).longValue(); + if (lowMem > 0) { + this.instanceOverview.getTotalServersLowMem().incrementAndGet(); + } + } + } break; default: LOG.error("Unhandled server type in fetch metric response: {}", response.serverType); @@ -917,7 +1183,8 @@ public void processTabletInformation(TableId tableId, String tableName, TabletIn final SanitizedTabletInformation sti = new SanitizedTabletInformation(info); tablets.computeIfAbsent(tableId, (t) -> Collections.synchronizedList(new ArrayList<>())) .add(sti); - tables.computeIfAbsent(tableId, (t) -> new TableSummary(tableName)).addTablet(sti); + tables.computeIfAbsent(tableId, (t) -> new TableSummary(tableName)).addTablet(sti, + instanceOverview); if (sti.getNumWalLogs() > 0) { String loc = sti.getLocation().orElse(""); int idx = loc.indexOf(':'); @@ -959,9 +1226,22 @@ public void processTabletInformation(TableId tableId, String tableName, TabletIn public void processFateTransactions(List transactions) { transactions.forEach(t -> { + FateOperation op = t.getFateOp(); + TStatus status = t.getStatus(); + if (status == TStatus.SUBMITTED) { + instanceOverview.getTotalFateSubmitted().incrementAndGet(); + if (op == FateOperation.TABLE_BULK_IMPORT2) { + instanceOverview.getIngestBulkImportQueued().incrementAndGet(); + } + } else if (status == TStatus.IN_PROGRESS) { + instanceOverview.getTotalFateRunning().incrementAndGet(); + if (op == FateOperation.TABLE_BULK_IMPORT2) { + instanceOverview.getIngestBulkImportRunning().incrementAndGet(); + } + } fateTransactions - .add(new FateTransaction(t.getInstanceType(), t.getFateOp(), t.getFateId().getTxUUIDStr(), - t.getStatus(), t.getTimeCreated(), t.getHeldLocks(), t.getWaitingLocks(), + .add(new FateTransaction(t.getInstanceType(), op, t.getFateId().getTxUUIDStr(), status, + t.getTimeCreated(), t.getHeldLocks(), t.getWaitingLocks(), t.getLockRange().isInfinite() ? LockRangeType.FULL : LockRangeType.PARTIAL)); }); } @@ -981,6 +1261,7 @@ public void processActiveScans(ServerId server, List scans) { activeScans.add(new Scan(server.toHostPortString(), server.getType().name(), server.getResourceGroup().canonical(), s.getTableId(), s.getScanId(), s.getClient(), s.getUser(), s.getState().name(), s.getType().name(), s.getAge(), s.getIdleTime())); + instanceOverview.getScansTotalInProgress().incrementAndGet(); }); } @@ -1266,6 +1547,12 @@ public void finish(final List failures, final List' + - 'No deployment data is currently available.'); - } else { - $('#deploymentWarning').empty(); - } + var data = JSON.parse(sessionStorage.deployment); + var breakdown = Array.isArray(data.breakdown) ? data.breakdown : []; + deploymentBreakdown = breakdown; - renderDeploymentMatrix(breakdown); - }); + if (breakdown.length === 0) { + $('#deploymentWarning').html(''); + } else { + $('#deploymentWarning').empty(); + } + + renderDeploymentMatrix(breakdown); } function renderDeploymentMatrix(breakdown) { diff --git a/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/overview.ftl b/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/overview.ftl index ada148dddfb..a06db211050 100644 --- a/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/overview.ftl +++ b/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/overview.ftl @@ -24,13 +24,75 @@
-
-
-
-
- - <#include "table_loading.ftl" > -
+
+
+
+
+
+
Server Deployment
+
+
+
+ + <#include "table_loading.ftl" > +
+
+
+
+
+
+
+ +
+
+
+
Instance
+
    +
  • +
    + Loading... +
    + Loading... +
  • +
+
+
+
+
+
Ingest
+
    +
  • +
    + Loading... +
    + Loading... +
  • +
+
+
+
+
+
Scan
+
    +
  • +
    + Loading... +
    + Loading... +
  • +
+
+
+
Compaction
+
    +
  • +
    + Loading... +
    + Loading... +
  • +
+