Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions oak-core/src/main/java/org/apache/jackrabbit/oak/Oak.java
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,10 @@ public Oak with(@NotNull Whiteboard whiteboard) {
LOG.info("Registered optimize XPath union feature: " + QueryEngineSettings.FT_OPTIMIZE_XPATH_UNION);
closer.register(optimizeXPathUnion);
queryEngineSettings.setOptimizeXPathUnion(optimizeXPathUnion);
Feature ignoreLimitInIndexSelection = newFeature(QueryEngineSettings.FT_IGNORE_LIMIT_IN_INDEX_SELECTION, whiteboard);
LOG.info("Registered ignore limit in index selection feature: " + QueryEngineSettings.FT_IGNORE_LIMIT_IN_INDEX_SELECTION);
closer.register(ignoreLimitInIndexSelection);
queryEngineSettings.setIgnoreLimitInIndexSelectionFeature(ignoreLimitInIndexSelection);
}

return this;
Expand Down Expand Up @@ -1021,6 +1025,10 @@ public void setOptimizeXPathUnion(@Nullable Feature feature) {
settings.setOptimizeXPathUnion(feature);
}

public void setIgnoreLimitInIndexSelectionFeature(@Nullable Feature feature) {
settings.setIgnoreLimitInIndexSelectionFeature(feature);
}

@Override
public void setQueryValidatorPattern(String key, String pattern, String comment, boolean failQuery) {
settings.getQueryValidator().setPattern(key, pattern, comment, failQuery);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ public class QueryEngineSettings implements QueryEngineSettingsMBean, QueryLimit

public static final String FT_OPTIMIZE_XPATH_UNION = "FT_OAK-12007";

public static final String FT_IGNORE_LIMIT_IN_INDEX_SELECTION = "FT_OAK-12057";

public static final int DEFAULT_PREFETCH_COUNT = Integer.getInteger(OAK_QUERY_PREFETCH_COUNT, -1);

public static final String OAK_QUERY_FAIL_TRAVERSAL = "oak.queryFailTraversal";
Expand Down Expand Up @@ -130,6 +132,7 @@ public class QueryEngineSettings implements QueryEngineSettingsMBean, QueryLimit
private Feature optimizeInRestrictionsForFunctions;
private Feature sortUnionQueryByScoreFeature;
private Feature optimizeXPathUnion;
private Feature ignoreLimitInIndexSelectionFeature;

private String autoOptionsMappingJson = "{}";
private QueryOptions.AutomaticQueryOptionsMapping autoOptionsMapping = new QueryOptions.AutomaticQueryOptionsMapping(autoOptionsMappingJson);
Expand Down Expand Up @@ -272,6 +275,16 @@ public boolean isOptimizeXPathUnionEnabled() {
return optimizeXPathUnion != null && optimizeXPathUnion.isEnabled();
}

public void setIgnoreLimitInIndexSelectionFeature(@Nullable Feature feature) {
this.ignoreLimitInIndexSelectionFeature = feature;
}

@Override
public boolean isIgnoreLimitInIndexSelection() {
// enabled if the feature toggle is not used
return ignoreLimitInIndexSelectionFeature == null || ignoreLimitInIndexSelectionFeature.isEnabled();
}

public String getStrictPathRestriction() {
return strictPathRestriction.name();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,11 @@
double almostBestCost = Double.POSITIVE_INFINITY;
IndexPlan almostBestPlan = null;

long maxEntryCount = saturatedAdd(offset.orElse(0L), limit.orElse(Long.MAX_VALUE));
// Legacy behavior: cap entry count by offset+limit for index cost calculation
Long maxEntryCount = null;
if (!settings.isIgnoreLimitInIndexSelection()) {
maxEntryCount = saturatedAdd(offset.orElse(0L), limit.orElse(Long.MAX_VALUE));
}

// Sort the indexes according to their minimum cost to be able to skip the remaining indexes if the cost of the
// current index is below the minimum cost of the next index.
Expand Down Expand Up @@ -1117,14 +1121,16 @@
if (p.getSupportsPathRestriction()) {
entryCount = scaleEntryCount(rootState, filter, entryCount);
}
if (sortOrder == null || p.getSortOrder() != null) {
// if the query is unordered, or
// if the query contains "order by" and the index can sort on that,
// then we don't need to read all entries from the index
entryCount = Math.min(maxEntryCount, entryCount);
if (maxEntryCount != null) {
if (sortOrder == null || p.getSortOrder() != null) {
// if the query is unordered, or
// if the query contains "order by" and the index can sort on that,
// then we don't need to read all entries from the index
entryCount = Math.min(maxEntryCount, entryCount);
}
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bhabegger , I think we should not just clear this condition. Take following example:
Lets say we have a query with sort, but only one index support the sorted filter other don't. So the cost estimation now will be based on index size but in fact index which support sort may be better.

The addition of limit brings all indexes at same level because limit is used to get MaxEntry count, may be what we need is to bring in index's entry count to picture when finalising best best index.
long entryCount = p.getEstimatedEntryCount();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue with limit is that the same index will not be chose with or without the limit which in certain cases is surprising.

This said, I completely understand that taking a sorting capable index over one which doesn't support it. So here the challenge is that we have cases which favor one and cases which favor the other. But maybe, couldn't we always favor indices which support sorting systematically rather than only if their is a limit ?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO yes, sorting is something that need complete result set before we can return results. If we already have index solving sorting, it saves all the jcr calls we will have to make to all data and then sorting it in memory.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But maybe, couldn't we always favor indices which support sorting systematically rather than only if their is a limit ?

Most relational databases account for this by adding a "cost to sort". In our case, the indexes return the same cost (which is fine), and report whether they can sort or not. So the query engines task is to add a "cost to sort" (on top of the cost returned by the indexes) for indexes that can not sort. This almost exactly matches the description "favor indices which support sorting systematically" but not 100%. What it means that an index that can not sort can still be cheaper in total than index that can sort. It just depends on the cost of sorting in the query engine.

(In Jackrabbit Oak, the Lucene and Elastic indexes often report wildly inaccurate numbers (orders of magnitude wrong), because we do not have accurate statistics currently. So such improvements will likely not have a big impact currently.)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should split the discussion:

  • General discussion on sorting and limit
  • What we do in this particular PR ?

For this PR I see the following options:

  • We keep it as is to favor the same index selection with or without limit (original ticket)
  • We skip it to favor an speed in case of limit (and accept that a different index maybe selected with or without limit)
  • We change it to add some sorting cost

If we go for the last, what should we add ? What would be the formula ?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What we do in this particular PR ?

I think we agree we don't want to add more to the PR... Yes it's not perfect, but perfection is the enemy of the good...

On the other hand, if we don't have an agreement on whether this PR might be risky or not, I think we could introduce a feature toggle. @tihom88 WDYT?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can go ahead with this approach. The limit checks were also added quite recently.
But we should add cost for sorting logic in a separate jira.

double c = p.getCostPerExecution() + entryCount * p.getCostPerEntry();

Check warning on line 1133 in oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryImpl.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Merge this if statement with the enclosing one.

See more on https://sonarcloud.io/project/issues?id=org.apache.jackrabbit%3Ajackrabbit-oak&issues=AZ13GLbFus-xC6EIPvwV&open=AZ13GLbFus-xC6EIPvwV&pullRequest=2724
if (LOG.isDebugEnabled()) {
String plan = advIndex.getPlanDescription(p, rootState);
String msg = String.format("cost for [%s] of type (%s) with plan [%s] is %1.2f", p.getPlanName(), indexName, plan, c);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@

import java.text.ParseException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.jcr.RepositoryException;

Expand Down Expand Up @@ -88,7 +86,7 @@ public void limitUnionSize() throws ParseException {
+ " AND ((CONTAINS(*, '10') AND ([jcr:uuid] LIKE '11' OR [jcr:uuid] LIKE '12'))\n"
+ " OR (CONTAINS(*, '13') AND ([jcr:uuid] LIKE '14' OR [jcr:uuid] LIKE '15')))";
SQL2Parser parser = SQL2ParserTest.createTestSQL2Parser(
getMappings(), getNodeTypes(), qeSettings);
getMappings(), getNodeTypes());
Query original;
original = parser.parse(query, false);
assertNotNull(original);
Expand Down Expand Up @@ -219,7 +217,7 @@ private static Tree addChildWithProperty(@NotNull Tree father, @NotNull String n
@Test
public void optimise() throws ParseException {
SQL2Parser parser = SQL2ParserTest.createTestSQL2Parser(
getMappings(), getNodeTypes(), qeSettings);
getMappings(), getNodeTypes());
String statement;
Query original, optimised;

Expand Down Expand Up @@ -294,7 +292,7 @@ public void optimiseAndOrAnd() throws ParseException {

private void optimiseAndOrAnd(String statement, String expected) throws ParseException {
SQL2Parser parser = SQL2ParserTest.createTestSQL2Parser(
getMappings(), getNodeTypes(), qeSettings);
getMappings(), getNodeTypes());
Query original;
original = parser.parse(statement, false);
assertNotNull(original);
Expand All @@ -310,7 +308,7 @@ private void optimiseAndOrAnd(String statement, String expected) throws ParseExc
@Test
public void optimizeKeepsQueryOptions() throws ParseException {
SQL2Parser parser = SQL2ParserTest.createTestSQL2Parser(
getMappings(), getNodeTypes(), qeSettings);
getMappings(), getNodeTypes());
Query original;
String statement = "select * from [nt:unstructured] as [c] " +
"where [a]=1 or [b]=2 option(index tag x)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,22 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.BDDMockito.given;
import static org.mockito.Mockito.mock;

import java.text.ParseException;
import java.util.List;

import org.apache.jackrabbit.oak.namepath.NamePathMapper;
import org.apache.jackrabbit.oak.query.ast.NodeTypeInfoProvider;
import org.apache.jackrabbit.oak.query.stats.QueryStatsData;
import org.apache.jackrabbit.oak.query.xpath.XPathToSQL2Converter;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.jetbrains.annotations.NotNull;
import org.junit.Ignore;
import org.junit.Test;

Expand All @@ -40,14 +49,12 @@
private static final SQL2Parser p = createTestSQL2Parser();

public static SQL2Parser createTestSQL2Parser() {
return createTestSQL2Parser(NamePathMapper.DEFAULT, nodeTypes, new QueryEngineSettings());
return createTestSQL2Parser(NamePathMapper.DEFAULT, nodeTypes);
}

public static SQL2Parser createTestSQL2Parser(NamePathMapper mappings, NodeTypeInfoProvider nodeTypes2,
QueryEngineSettings qeSettings) {
public static SQL2Parser createTestSQL2Parser(NamePathMapper mappings, NodeTypeInfoProvider nodeTypes2) {
QueryStatsData data = new QueryStatsData("", "");
return new SQL2Parser(mappings, nodeTypes2, new QueryEngineSettings(),
data.new QueryExecutionStats());
return new SQL2Parser(mappings, nodeTypes2, new QueryEngineSettings(), data.new QueryExecutionStats());
}


Expand Down Expand Up @@ -87,6 +94,60 @@
assertTrue(q.contains(token));
}

/*
* When a query with LIMIT option, it should still select the index with the least entries
* as those might require being traversed during post-filtering.
*
* See OAK-12057
*/
@Test
public void testPlanningWithLimit() throws ParseException {
// Given
var query = "SELECT * \n" +
"FROM [nt:base] AS s \n" +
"WHERE ISDESCENDANTNODE(s, '/content') AND s.[j:c]='/conf/wknd'\n" +
"OPTION (LIMIT 2)";

Check warning on line 109 in oak-core/src/test/java/org/apache/jackrabbit/oak/query/SQL2ParserTest.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Replace this String concatenation with Text block.

See more on https://sonarcloud.io/project/issues?id=org.apache.jackrabbit%3Ajackrabbit-oak&issues=AZ13GLexus-xC6EIPvwW&open=AZ13GLexus-xC6EIPvwW&pullRequest=2724

// - the first available option
var indexA = mock(DummyQueryIndex.class);
var planA = mock(QueryIndex.IndexPlan.class);
given(indexA.getPlans(any(), any(), any())).willReturn(List.of(planA));
given(planA.getPlanName()).willReturn("planA");
given(planA.getEstimatedEntryCount()).willReturn(10000L); // more entries
given(planA.getCostPerEntry()).willReturn(1.0);
given(planA.getCostPerExecution()).willReturn(100.0);

// - the better option
var indexB = mock(DummyQueryIndex.class);
var planB = mock(QueryIndex.IndexPlan.class);
given(indexB.getPlans(any(), any(), any())).willReturn(List.of(planB));
given(planB.getPlanName()).willReturn("planB");
given(planB.getEstimatedEntryCount()).willReturn(100L); // less entries
given(planB.getCostPerEntry()).willReturn(1.0);
given(planB.getCostPerExecution()).willReturn(100.0);
given(indexB.getPlanDescription(eq(planB), any())).willReturn("planB");

var indexProvider = new QueryIndexProvider() {
@Override
public @NotNull List<? extends QueryIndex> getQueryIndexes(NodeState nodeState) {
return List.of(indexA, indexB);
}
};

var context = mock(ExecutionContext.class);
given(context.getIndexProvider()).willReturn(indexProvider);

// When
var parsedQuery = p.parse(query,false);
parsedQuery.init();
parsedQuery.setExecutionContext(context);
parsedQuery.setTraversalEnabled(false);
parsedQuery.prepare();

// Then
assertEquals("[nt:base] as [s] /* planB */", parsedQuery.getPlan());
}

@Test
public void testCoalesce() throws ParseException {
p.parse("SELECT * FROM [nt:base] WHERE COALESCE([j:c/m/d:t], [j:c/j:t])='a'");
Expand Down Expand Up @@ -188,4 +249,14 @@
xpath = "//(element(*, type1) | element(*, type2))[@a='b' or @c='d'] order by @foo";
assertTrue("Converted xpath " + xpath + "doesn't end with 'order by [foo]'", c.convert(xpath).endsWith("order by [foo]"));
}

interface DummyQueryIndex extends QueryIndex, QueryIndex.AdvancedQueryIndex {
/*
* This is needed as AdvancedQueryIndex is what we what to mock but the QueryIndexProvider
* returns QueryIndex instances and, in reality implementation of AdvancedQueryIndex also implement
* QueryIndex, but AdvancedQueryIndex does not explicitly extend QueryIndex.
*
* Making this link explicit would break the spi versioning.
*/
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.jackrabbit.api.JackrabbitSession;
import org.apache.jackrabbit.oak.commons.junit.LogCustomizer;
import org.apache.jackrabbit.oak.jcr.Jcr;
import org.apache.jackrabbit.oak.jcr.query.QueryImpl;
import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexPlan;
import org.apache.jackrabbit.oak.query.QueryEngineSettings;
import org.apache.jackrabbit.oak.query.index.FilterImpl;
Expand All @@ -36,7 +37,6 @@
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import javax.jcr.Node;
import javax.jcr.Repository;
import javax.jcr.SimpleCredentials;
import java.io.File;
Expand All @@ -49,20 +49,12 @@ public class IndexCostEvaluationTest {
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));

private static final String TEST_USER_NAME = "testUserName";

private Repository repository = null;
private JackrabbitSession session = null;
private Node root = null;
private LogCustomizer custom;

@Before
public void before() throws Exception {
custom = LogCustomizer
.forLogger(
"org.apache.jackrabbit.oak.query.QueryImpl")
.enable(Level.DEBUG).create();
custom.starting();


TestIndexProvider testProvider = new TestIndexProvider();
Expand All @@ -75,8 +67,14 @@ public void before() throws Exception {
.with((QueryIndexProvider) testProvider3);

repository = jcr.createRepository();
custom = LogCustomizer
.forLogger(
"org.apache.jackrabbit.oak.query.QueryImpl")
.enable(Level.DEBUG).create();
custom.starting();

session = (JackrabbitSession) repository.login(new SimpleCredentials("admin", "admin".toCharArray()));
root = session.getRootNode();
session.getRootNode();
}


Expand All @@ -93,6 +91,11 @@ public void after() {
// even of cost from previous index is less than min cost of new index.
@Test
public void costEvaluationTest() throws Exception {

// run some non-trival query so that all indexes get a chance to reply
session.getWorkspace().getQueryManager().
createQuery("select * from [nt:base] where [abc] = 1", QueryImpl.JCR_SQL2).execute();

boolean evaluationContinueLogPresent = false;
boolean evaluationSkipLogPresent = false;
for (String log : custom.getLogs()) {
Expand Down Expand Up @@ -171,7 +174,7 @@ public String getIndexName() {
public List<IndexPlan> getPlans(Filter filter, List<OrderEntry> sortOrder, NodeState rootState) {
IndexPlan.Builder b = new IndexPlan.Builder();
Filter f = new FilterImpl(null, "SELECT * FROM [nt:file]", new QueryEngineSettings());
IndexPlan plan1 = b.setEstimatedEntryCount(10).setPlanName("testIndexPlan1").setFilter(f).build();
IndexPlan plan1 = b.setEstimatedEntryCount(1).setPlanName("testIndexPlan1").setFilter(f).build();
List<IndexPlan> indexList = new ArrayList<IndexPlan>();

indexList.add(plan1);
Expand All @@ -188,4 +191,4 @@ public Cursor query(IndexPlan plan, NodeState rootState) {
return null;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.util.stream.Collectors.toMap;
import static org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction;

/**
Expand Down Expand Up @@ -373,9 +372,6 @@ default boolean logWarningForPathFilterMismatch() {
* A builder for index plans.
*/
class Builder {

private static Logger LOG = LoggerFactory.getLogger(QueryIndex.IndexPlan.Builder.class);

protected double costPerExecution = 1.0;
protected double costPerEntry = 1.0;
protected long estimatedEntryCount = 1000000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,14 @@ default boolean isInferenceEnabled(){
return false;
};

/**
* See OAK-12057. By default, LIMIT is ignored when selecting indexes
* (allowing the query engine to select based on the best plan).
*
* @return true to ignore LIMIT in index selection, false for legacy behavior
*/
default boolean isIgnoreLimitInIndexSelection() {
return true;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/**
* This package contains oak query index related classes.
*/
@Version("3.2.0")
@Version("3.3.0")
package org.apache.jackrabbit.oak.spi.query;

import org.osgi.annotation.versioning.Version;
Loading