Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
CalciteMultisearchCommandIT.class,
CalciteMultiValueStatsIT.class,
CalciteNewAddedCommandsIT.class,
CalciteNotLikeNullIT.class,
CalciteNowLikeFunctionIT.class,
CalciteObjectFieldOperateIT.class,
CalciteOperatorIT.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.remote;

import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.opensearch.client.Request;
import org.opensearch.client.ResponseException;
import org.opensearch.sql.ppl.PPLIntegTestCase;

/**
* Integration tests for NOT LIKE with null/missing field values. Tests the fix for issue #5169: NOT
* LIKE should exclude rows where the field is null or missing.
*/
public class CalciteNotLikeNullIT extends PPLIntegTestCase {

private static final String TEST_INDEX = "issue5169_not_like_null";

@Override
public void init() throws Exception {
super.init();
enableCalcite();
createTestIndex();
}

private void createTestIndex() throws IOException {
try {
Request deleteIndex = new Request("DELETE", "/" + TEST_INDEX);
client().performRequest(deleteIndex);
} catch (ResponseException e) {
// Index doesn't exist, which is fine
}

Request createIndex = new Request("PUT", "/" + TEST_INDEX);
createIndex.setJsonEntity(
"{\n"
+ " \"settings\": {\"number_of_shards\": 1, \"number_of_replicas\": 0},\n"
+ " \"mappings\": {\n"
+ " \"properties\": {\n"
+ " \"keyword_field\": {\"type\": \"keyword\"},\n"
+ " \"int_field\": {\"type\": \"integer\"}\n"
+ " }\n"
+ " }\n"
+ "}");
client().performRequest(createIndex);

Request bulkRequest = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true");
bulkRequest.setJsonEntity(
"{\"index\":{\"_id\":\"1\"}}\n"
+ "{\"keyword_field\": \"hello\", \"int_field\": 1}\n"
+ "{\"index\":{\"_id\":\"2\"}}\n"
+ "{\"keyword_field\": \"world\", \"int_field\": 2}\n"
+ "{\"index\":{\"_id\":\"3\"}}\n"
+ "{\"keyword_field\": \"\", \"int_field\": 3}\n"
+ "{\"index\":{\"_id\":\"4\"}}\n"
+ "{\"keyword_field\": \"special chars...\", \"int_field\": 4}\n"
+ "{\"index\":{\"_id\":\"5\"}}\n"
+ "{\"keyword_field\": null, \"int_field\": null}\n");
client().performRequest(bulkRequest);
}

@Test
public void testNotLikeExcludesNull() throws IOException {
// NOT LIKE '%ello%' should match 'world', '', 'special chars...' but NOT null
JSONObject result =
executeQuery(
"source="
+ TEST_INDEX
+ " | where NOT keyword_field LIKE '%ello%'"
+ " | sort keyword_field"
+ " | fields keyword_field");
verifyDataRowsInOrder(result, rows(""), rows("special chars..."), rows("world"));
}

@Test
public void testNotLikeWithNoMatch() throws IOException {
// NOT LIKE '%zzz%' should return all non-null rows (4 rows)
JSONObject result =
executeQuery(
"source="
+ TEST_INDEX
+ " | where NOT keyword_field LIKE '%zzz%'"
+ " | sort keyword_field"
+ " | fields keyword_field");
verifyDataRowsInOrder(result, rows(""), rows("hello"), rows("special chars..."), rows("world"));
}

@Test
public void testNotGreaterThanExcludesNull() throws IOException {
// NOT int_field > 2 should return rows with int_field 1, 2 but NOT null
JSONObject result =
executeQuery(
"source="
+ TEST_INDEX
+ " | where NOT int_field > 2"
+ " | sort int_field"
+ " | fields int_field");
verifyDataRowsInOrder(result, rows(1), rows(2));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ calcite:
LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))])
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), PROJECT->[c, dc(UserID), SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), PROJECT->[c, dc(UserID), SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
setup:
- do:
indices.create:
index: issue5169_keyword
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
keyword_field:
type: keyword

- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "issue5169_keyword", "_id": "1"}}'
- '{"keyword_field": "hello"}'
- '{"index": {"_index": "issue5169_keyword", "_id": "2"}}'
- '{"keyword_field": "world"}'
- '{"index": {"_index": "issue5169_keyword", "_id": "3"}}'
- '{"keyword_field": ""}'
- '{"index": {"_index": "issue5169_keyword", "_id": "4"}}'
- '{"keyword_field": "special chars..."}'
- '{"index": {"_index": "issue5169_keyword", "_id": "5"}}'
- '{"keyword_field": null}'

---
teardown:
- do:
indices.delete:
index: issue5169_keyword
ignore_unavailable: true

---
"Issue 5169: NOT LIKE should exclude null/missing field rows":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=issue5169_keyword | where NOT keyword_field LIKE '%ello%' | fields keyword_field

- match: { total: 3 }
- length: { datarows: 3 }
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,8 @@ private QueryExpression prefix(RexCall call) {
throw new PredicateAnalyzerException(message);
}

Expression operandExpr = call.getOperands().get(0).accept(this);
RexNode innerOperand = call.getOperands().get(0);
Expression operandExpr = innerOperand.accept(this);
// Handle NOT(boolean_field) - Calcite simplifies "field = false" to NOT($field).
// In PPL semantics, "field = false" should only match documents where the field is
// explicitly false (not null or missing). This is achieved via term query {value: false}.
Expand All @@ -586,9 +587,36 @@ private QueryExpression prefix(RexCall call) {
return QueryExpression.create(namedField).isFalse();
}
QueryExpression expr = (QueryExpression) operandExpr;
// For null-intolerant predicates (LIKE, comparisons, equality, etc.),
// negation must also exclude documents where the field is NULL/missing.
// Truth-test operators (IS_TRUE, IS_NULL, etc.) already encode null
// semantics and must NOT get an additional exists filter.
if (isNullIntolerantPredicate(innerOperand) && expr instanceof SimpleQueryExpression sqe) {
return sqe.notWithExistsFilter();
}
return expr.not();
}

/** Returns true if the given RexNode is a null-intolerant predicate (value comparison). */
private static boolean isNullIntolerantPredicate(RexNode node) {
if (!(node instanceof RexCall innerCall)) {
return false;
}
return switch (innerCall.getKind()) {
case LIKE,
EQUALS,
NOT_EQUALS,
GREATER_THAN,
GREATER_THAN_OR_EQUAL,
LESS_THAN,
LESS_THAN_OR_EQUAL,
BETWEEN,
SEARCH ->
true;
default -> false;
};
}

private QueryExpression postfix(RexCall call) {
checkArgument(
call.getKind() == SqlKind.IS_TRUE
Expand Down Expand Up @@ -1309,6 +1337,12 @@ public QueryExpression not() {
return this;
}

/** Negate with an exists filter to exclude null/missing documents. */
QueryExpression notWithExistsFilter() {
builder = boolQuery().must(existsQuery(getFieldReference())).mustNot(builder());
return this;
}

@Override
public QueryExpression exists() {
builder = existsQuery(getFieldReference());
Expand Down
Loading
Loading