Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: "Support 'missing' stats count in rollup function for streaming expressions"
type: added
authors:
- name: khushjain
links:
- name: SOLR-18198
url: https://issues.apache.org/jira/browse/SOLR-18198
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ For faster aggregation over low to moderate cardinality fields, the `facet` func
* `StreamExpression` (Mandatory)
* `over`: (Mandatory) A list of fields to group by.
* `metrics`: (Mandatory) The list of metrics to compute.
Currently supported metrics are `sum(col)`, `avg(col)`, `min(col)`, `max(col)`, `count(*)`.
Currently supported metrics are `sum(col)`, `avg(col)`, `min(col)`, `max(col)`, `count(*)`, `missing(col)`.

=== rollup Syntax

Expand All @@ -1465,7 +1465,8 @@ rollup(
max(a_f),
avg(a_i),
avg(a_f),
count(*)
count(*),
missing(a_i)
)
----

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@
import org.apache.solr.client.solrj.io.stream.metrics.MaxMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MeanMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MinMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MissingMetric;
import org.apache.solr.client.solrj.io.stream.metrics.PercentileMetric;
import org.apache.solr.client.solrj.io.stream.metrics.StdMetric;
import org.apache.solr.client.solrj.io.stream.metrics.SumMetric;
Expand Down Expand Up @@ -406,6 +407,7 @@ public static void register(StreamFactory streamFactory) {
.withFunctionName("std", StdMetric.class)
.withFunctionName("count", CountMetric.class)
.withFunctionName("countDist", CountDistinctMetric.class)
.withFunctionName("missing", MissingMetric.class)

// tuple manipulation operations
.withFunctionName("replace", ReplaceOperation.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.solr.client.solrj.io.stream.metrics.MeanMetric;
import org.apache.solr.client.solrj.io.stream.metrics.Metric;
import org.apache.solr.client.solrj.io.stream.metrics.MinMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MissingMetric;
import org.apache.solr.client.solrj.io.stream.metrics.SumMetric;
import org.apache.solr.client.solrj.io.stream.metrics.WeightedSumMetric;

Expand Down Expand Up @@ -133,6 +134,10 @@ default Optional<Metric[]> getRollupMetrics(Metric[] metrics) {
// can't properly rollup mean metrics w/o a count (reqd by WeightedSumMetric)
return Optional.empty();
}
} else if (next instanceof MissingMetric) {
// sum of missing counts
nextRollup = new SumMetric(next.getIdentifier());
nextRollup.outputLong = next.outputLong;
} else if (next instanceof CountDistinctMetric) {
// rollup of count distinct is the max across the tiers
nextRollup = new MaxMetric(next.getIdentifier());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.stream.metrics;

import java.io.IOException;
import java.util.Locale;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;

public class MissingMetric extends Metric {
private String columnName;
private long count;

public MissingMetric(String columnName) {
init("missing", columnName);
}

public MissingMetric(StreamExpression expression, StreamFactory factory) throws IOException {
String functionName = expression.getFunctionName();
String columnName = factory.getValueOperand(expression, 0);

if (null == columnName) {
throw new IOException(
String.format(
Locale.ROOT,
"Invalid expression %s - expected %s(columnName)",
expression,
functionName));
}
if (1 != expression.getParameters().size()) {
throw new IOException(
String.format(Locale.ROOT, "Invalid expression %s - unknown operands found", expression));
}

init(functionName, columnName);
}

private void init(String functionName, String columnName) {
this.columnName = columnName;
this.outputLong = true;
setFunctionName(functionName);
setIdentifier(functionName, "(", columnName, ")");
}

@Override
public String[] getColumns() {
return new String[] {columnName};
}

@Override
public void update(Tuple tuple) {
if (tuple.get(columnName) == null) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could "" ever be a value?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"" is a valid value for a field. tuple.get(columnName) would only be null when the field doesn't exist. Missing counts the number of documents that doesn't have the field itself. The behavior is consistent with how stats component computes missing.

++count;
}
}

@Override
public Long getValue() {
return count;
}

@Override
public Metric newInstance() {
return new MissingMetric(columnName);
}

@Override
public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException {
return new StreamExpression(getFunctionName()).withParameter(columnName);
}
}
Loading