/* * Copyright 2006 Amazon Technologies, Inc. or its affiliates. * Amazon, Amazon.com and Carbonado are trademarks or registered trademarks * of Amazon Technologies, Inc. or its affiliates. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.amazon.carbonado.qe; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import com.amazon.carbonado.FetchException; import com.amazon.carbonado.RepositoryException; import com.amazon.carbonado.Storable; import com.amazon.carbonado.SupportException; import com.amazon.carbonado.filter.AndFilter; import com.amazon.carbonado.filter.ExistsFilter; import com.amazon.carbonado.filter.Filter; import com.amazon.carbonado.filter.OrFilter; import com.amazon.carbonado.filter.PropertyFilter; import com.amazon.carbonado.filter.Visitor; import com.amazon.carbonado.info.ChainedProperty; import com.amazon.carbonado.info.Direction; import com.amazon.carbonado.info.OrderedProperty; import com.amazon.carbonado.info.StorableIndex; import com.amazon.carbonado.info.StorableInfo; import com.amazon.carbonado.info.StorableIntrospector; import com.amazon.carbonado.info.StorableKey; /** * Analyzes a query specification and determines how it can be executed as a * union of smaller queries. If necessary, the UnionQueryAnalyzer will alter * the query slightly, imposing a total ordering. Internally, an {@link * IndexedQueryAnalyzer} is used for selecting the best indexes. * *

UnionQueryAnalyzer is sharable and thread-safe. An instance for a * particular Storable type can be cached, avoiding repeated construction * cost. In addition, the analyzer caches learned foreign indexes. * * @author Brian S O'Neill */ public class UnionQueryAnalyzer implements QueryExecutorFactory { final IndexedQueryAnalyzer mIndexAnalyzer; final RepositoryAccess mRepoAccess; /** * @param type type of storable being queried * @param access repository access for examing available indexes * @throws IllegalArgumentException if type or indexProvider is null */ public UnionQueryAnalyzer(Class type, RepositoryAccess access) { mIndexAnalyzer = new IndexedQueryAnalyzer(type, access); mRepoAccess = access; } public Class getStorableType() { return mIndexAnalyzer.getStorableType(); } /** * @param filter optional filter which must be {@link Filter#isBound bound} * @param ordering optional properties which define desired ordering * @param hints optional query hints */ public Result analyze(Filter filter, OrderingList ordering, QueryHints hints) throws SupportException, RepositoryException { if (filter != null && !filter.isBound()) { throw new IllegalArgumentException("Filter must be bound"); } if (ordering == null) { ordering = OrderingList.emptyList(); } return buildResult(filter, ordering, hints); } /** * Returns an executor that handles the given query specification. * * @param filter optional filter which must be {@link Filter#isBound bound} * @param ordering optional properties which define desired ordering * @param hints optional query hints */ public QueryExecutor executor(Filter filter, OrderingList ordering, QueryHints hints) throws RepositoryException { return analyze(filter, ordering, hints).createExecutor(); } /** * Splits the filter into sub-results, merges sub-results, and possibly * imposes a total ordering. */ private Result buildResult(Filter filter, OrderingList ordering, QueryHints hints) throws SupportException, RepositoryException { List.Result> subResults; if (filter == null) { subResults = Collections .singletonList(mIndexAnalyzer.analyze(filter, ordering, hints)); } else { subResults = splitIntoSubResults(filter, ordering, hints); } if (subResults.size() <= 1) { // Total ordering not required. return new Result(subResults); } // If any orderings have an unspecified direction, switch to ASCENDING // or DESCENDING, depending on which is more popular. Then build new // sub-results. for (int pos = 0; pos < ordering.size(); pos++) { OrderedProperty op = ordering.get(pos); if (op.getDirection() != Direction.UNSPECIFIED) { continue; } // Find out which direction is most popular for this property. Tally tally = new Tally(op.getChainedProperty()); for (IndexedQueryAnalyzer.Result result : subResults) { tally.increment(findHandledDirection(result, op)); } ordering = ordering.replace(pos, op.direction(tally.getBestDirection())); // Re-calc with specified direction. Only do one property at a time // since one simple change might alter the query plan. subResults = splitIntoSubResults(filter, ordering, hints); if (subResults.size() <= 1) { // Total ordering no longer required. return new Result(subResults); } } // Gather all the keys available. As ordering properties touch key // properties, they are removed from all key sets. When a key set size // reaches zero, total ordering has been achieved. List>> keys = getKeys(); // Check if current ordering is total. for (OrderedProperty op : ordering) { ChainedProperty property = op.getChainedProperty(); if (pruneKeys(keys, property)) { // Found a key which is fully covered, indicating total ordering. return new Result(subResults, ordering); } } // Create a super key which contains all the properties required for // total ordering. The goal here is to append these properties to the // ordering in a fashion that takes advantage of each index's natural // ordering. This in turn should cause any sort operation to operate // over smaller groups. Smaller groups means smaller sort buffers. // Smaller sort buffers makes a merge sort happy. // Super key could be stored simply in a set, but a map makes it // convenient for tracking tallies. Map, Tally> superKey = new LinkedHashMap, Tally>(); for (Set> key : keys) { for (ChainedProperty property : key) { if (!superKey.containsKey(property)) { superKey.put(property, new Tally(property)); } } } // Keep looping until total ordering achieved. while (true) { // For each ordering score, iterate over the entire unused ordering // properties and select the next free property. If property is in // the super key increment a tally associated with property // direction. Choose the property with the best tally and augment // the orderings with it and create new sub-results. Remove the // property from the super key and the key set. If any key is now // fully covered, a total ordering has been achieved. for (IndexedQueryAnalyzer.Result result : subResults) { OrderingScore score = result.getCompositeScore().getOrderingScore(); OrderingList unused = score.getUnusedOrdering(); if (unused.size() > 0) { for (OrderedProperty prop : unused) { ChainedProperty chainedProp = prop.getChainedProperty(); Tally tally = superKey.get(chainedProp); if (tally != null) { tally.increment(prop.getDirection()); } } } OrderingList free = score.getFreeOrdering(); if (free.size() > 0) { OrderedProperty prop = free.get(0); ChainedProperty chainedProp = prop.getChainedProperty(); Tally tally = superKey.get(chainedProp); if (tally != null) { tally.increment(prop.getDirection()); } } } Tally best = bestTally(superKey.values()); ChainedProperty bestProperty = best.getProperty(); // Now augment the orderings and create new sub-results. ordering = ordering.concat(OrderedProperty.get(bestProperty, best.getBestDirection())); subResults = splitIntoSubResults(filter, ordering, hints); if (subResults.size() <= 1) { // Total ordering no longer required. break; } // Remove property from super key and key set... superKey.remove(bestProperty); if (superKey.size() == 0) { break; } if (pruneKeys(keys, bestProperty)) { break; } // Clear the tallies for the next run. for (Tally tally : superKey.values()) { tally.clear(); } } return new Result(subResults, ordering); } /** * Returns a list of all primary and alternate keys, stripped of ordering. */ private List>> getKeys() throws SupportException, RepositoryException { StorableInfo info = StorableIntrospector.examine(mIndexAnalyzer.getStorableType()); List>> keys = new ArrayList>>(); keys.add(stripOrdering(info.getPrimaryKey().getProperties())); for (StorableKey altKey : info.getAlternateKeys()) { keys.add(stripOrdering(altKey.getProperties())); } // Also fold in all unique indexes, just in case they weren't reported // as actual keys. Collection> indexes = mRepoAccess.storageAccessFor(getStorableType()).getAllIndexes(); for (StorableIndex index : indexes) { if (!index.isUnique()) { continue; } int propCount = index.getPropertyCount(); Set> props = new LinkedHashSet>(propCount); for (int i=0; i> stripOrdering(Set> orderedProps) { Set> props = new LinkedHashSet>(orderedProps.size()); for (OrderedProperty ordering : orderedProps) { props.add(ordering.getChainedProperty()); } return props; } /** * Removes the given property from all keys, returning true if any key has * zero properties as a result. */ private boolean pruneKeys(List>> keys, ChainedProperty property) { boolean result = false; for (Set> key : keys) { key.remove(property); if (key.size() == 0) { result = true; continue; } } return result; } private Tally bestTally(Iterable tallies) { Tally best = null; for (Tally tally : tallies) { if (best == null || tally.compareTo(best) > 0) { best = tally; } } return best; } private Direction findHandledDirection(IndexedQueryAnalyzer.Result result, OrderedProperty unspecified) { ChainedProperty chained = unspecified.getChainedProperty(); OrderingScore score = result.getCompositeScore().getOrderingScore(); OrderingList handled = score.getHandledOrdering(); for (OrderedProperty property : handled) { if (chained.equals(property.getChainedProperty())) { return property.getDirection(); } } return Direction.UNSPECIFIED; } /** * Splits the filter into sub-results and possibly merges them. */ private List.Result> splitIntoSubResults(Filter filter, OrderingList ordering, QueryHints hints) throws SupportException, RepositoryException { // Required for split to work. Filter dnfFilter = filter.disjunctiveNormalForm(); Splitter splitter = new Splitter(ordering, hints); RepositoryException e = dnfFilter.accept(splitter, null); if (e != null) { throw e; } List.Result> subResults = splitter.mSubResults; // Check if any sub-result handles nothing. If so, a full scan is the // best option for the entire query and all sub-results merge into a // single sub-result. Any sub-results which filter anything and contain // a join property in the filter are exempt from the merge. This is // because fewer joins are read than if a full scan is performed for // the entire query. The resulting union has both a full scan and an // index scan. IndexedQueryAnalyzer.Result full = null; for (IndexedQueryAnalyzer.Result result : subResults) { if (!result.handlesAnything()) { full = result; break; } if (!result.getCompositeScore().getFilteringScore().hasAnyMatches()) { if (full == null) { // This index is used only for its ordering, and it will be // tentatively selected as the "full scan". If a result is // found doesn't use an index for anything, then it becomes // the "full scan" index. full = result; } } } if (full == null) { // Okay, no full scan needed. return subResults; } List.Result> mergedResults = new ArrayList.Result>(); for (IndexedQueryAnalyzer.Result result : subResults) { if (result == full) { // Add after everything has been merged into it. continue; } boolean exempt = result.getCompositeScore().getFilteringScore().hasAnyMatches(); if (exempt) { // Must also have a join in the filter to be exempt. List> subFilters = PropertyFilterList.get(result.getFilter()); joinCheck: { for (PropertyFilter subFilter : subFilters) { if (subFilter.getChainedProperty().getChainCount() > 0) { // A chain implies a join was followed, so result is exempt. break joinCheck; } } // No joins found, result is not exempt from merging into full scan. exempt = false; } } if (exempt) { mergedResults.add(result); } else { full = full.mergeRemainderFilter(result.getFilter()); } } if (mergedResults.size() == 0) { // Nothing was exempt. Rather than return a result with a dnf // filter, return full scan with a simpler reduced filter. full = full.withRemainderFilter(filter.reduce()); } mergedResults.add(full); return mergedResults; } public class Result { private final List.Result> mSubResults; private final OrderingList mTotalOrdering; Result(List.Result> subResults) { this(subResults, null); } Result(List.Result> subResults, OrderingList totalOrdering) { if (subResults.size() < 1) { throw new IllegalArgumentException(); } mSubResults = Collections.unmodifiableList(subResults); mTotalOrdering = totalOrdering; } /** * Returns results for each sub-query to be executed in the union. If * only one result is returned, then no union needs to be performed. */ public List.Result> getSubResults() { return mSubResults; } /** * Returns a total ordering, if one was imposed. Otherwise, null is returned. */ public OrderingList getTotalOrdering() { return mTotalOrdering; } /** * Creates a QueryExecutor based on this result. */ public QueryExecutor createExecutor() throws SupportException, FetchException, RepositoryException { List.Result> subResults = getSubResults(); int size = subResults.size(); if (size == 1) { return subResults.get(0).createExecutor(); } List> executors = new ArrayList>(size); for (int i=0; i(executors, mTotalOrdering); } } /** * Used to track which property direction is most popular. */ private class Tally implements Comparable { private final ChainedProperty mProperty; private int mAscendingCount; private int mDescendingCount; Tally(ChainedProperty property) { mProperty = property; } ChainedProperty getProperty() { return mProperty; } void increment(Direction dir) { switch (dir) { case UNSPECIFIED: mAscendingCount++; mDescendingCount++; break; case ASCENDING: mAscendingCount++; break; case DESCENDING: mDescendingCount++; break; } } /** * Only returns ASCENDING or DESCENDING. */ Direction getBestDirection() { if (mAscendingCount >= mDescendingCount) { return Direction.ASCENDING; } return Direction.DESCENDING; } int getBestCount() { if (mAscendingCount >= mDescendingCount) { return mAscendingCount; } return mDescendingCount; } void clear() { mAscendingCount = 0; mDescendingCount = 0; } /** * Returns -1 if this tally is worse. */ public int compareTo(Tally other) { int thisBest = getBestCount(); int otherBest = other.getBestCount(); if (thisBest < otherBest) { return -1; } if (thisBest > otherBest) { return 1; } return 0; } @Override public String toString() { return "Tally: {property=" + mProperty + ", asc=" + mAscendingCount + ", desc=" + mDescendingCount + '}'; } } /** * Analyzes a disjunctive normal filter into sub-results over filters that * only contain 'and' operations. */ private class Splitter extends Visitor { private final OrderingList mOrdering; private final QueryHints mHints; final List.Result> mSubResults; Splitter(OrderingList ordering, QueryHints hints) { mOrdering = ordering; mHints = hints; mSubResults = new ArrayList.Result>(); } @Override public RepositoryException visit(OrFilter filter, Object param) { try { Filter left = filter.getLeftFilter(); if (!(left instanceof OrFilter)) { subAnalyze(left); } else { RepositoryException e = left.accept(this, param); if (e != null) { return e; } } Filter right = filter.getRightFilter(); if (!(right instanceof OrFilter)) { subAnalyze(right); } else { RepositoryException e = right.accept(this, param); if (e != null) { return e; } } return null; } catch (RepositoryException e) { return e; } } // This method should only be called if root filter has no 'or' operators. @Override public RepositoryException visit(AndFilter filter, Object param) { try { subAnalyze(filter); return null; } catch (RepositoryException e) { return e; } } // This method should only be called if root filter has no logical operators. @Override public RepositoryException visit(PropertyFilter filter, Object param) { try { subAnalyze(filter); return null; } catch (RepositoryException e) { return e; } } // This method should only be called if root filter has no logical operators. @Override public RepositoryException visit(ExistsFilter filter, Object param) { try { subAnalyze(filter); return null; } catch (RepositoryException e) { return e; } } private void subAnalyze(Filter subFilter) throws SupportException, RepositoryException { IndexedQueryAnalyzer.Result subResult = mIndexAnalyzer.analyze(subFilter, mOrdering, mHints); // Rather than blindly add to mSubResults, try to merge with // another result. This in turn reduces the number of cursors // needed by the union. int size = mSubResults.size(); for (int i=0; i.Result existing = mSubResults.get(i); if (existing.canMergeRemainder(subResult)) { mSubResults.set(i, existing.mergeRemainder(subResult)); return; } } // Couldn't merge, so add a new entry. mSubResults.add(subResult); } } }