From da30ae6ebdf60268de42813c6943da946ff87bad Mon Sep 17 00:00:00 2001 From: "Brian S. O'Neill" Date: Tue, 23 Apr 2013 18:09:45 +0000 Subject: Add index repair options to discard duplicates or only verify. --- .../carbonado/repo/indexed/IndexedRepository.java | 14 +++++++++++ .../repo/indexed/IndexedRepositoryBuilder.java | 19 +++++++++++++++ .../carbonado/repo/indexed/IndexedStorage.java | 5 +++- .../carbonado/repo/indexed/ManagedIndex.java | 27 +++++++++++++++------- 4 files changed, 56 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepository.java b/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepository.java index 26b8ea8..6924d23 100644 --- a/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepository.java +++ b/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepository.java @@ -63,6 +63,8 @@ class IndexedRepository implements Repository, private final String mName; private final boolean mIndexRepairEnabled; private final double mIndexThrottle; + private final boolean mIndexDiscardDuplicates; + private final boolean mIndexRepairVerifyOnly; private final boolean mAllClustered; private final boolean mStrictTriggers; private final StoragePool mStoragePool; @@ -72,6 +74,8 @@ class IndexedRepository implements Repository, Repository repository, boolean indexRepairEnabled, double indexThrottle, + boolean indexDiscardDuplicates, + boolean indexRepairVerifyOnly, boolean allClustered, boolean strictTriggers) { @@ -86,6 +90,8 @@ class IndexedRepository implements Repository, mName = name; mIndexRepairEnabled = indexRepairEnabled; mIndexThrottle = indexThrottle; + mIndexDiscardDuplicates = indexDiscardDuplicates; + mIndexRepairVerifyOnly = indexRepairVerifyOnly; mAllClustered = allClustered; mStrictTriggers = strictTriggers; mIndexAnalysisPool = new IndexAnalysisPool(this); @@ -264,6 +270,14 @@ class IndexedRepository implements Repository, return mIndexThrottle; } + boolean getIndexDiscardDuplicates() { + return mIndexDiscardDuplicates; + } + + boolean getIndexRepairVerifyOnly() { + return mIndexRepairVerifyOnly; + } + boolean isAllClustered() { return mAllClustered; } diff --git a/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepositoryBuilder.java b/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepositoryBuilder.java index 27b8011..2612477 100644 --- a/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepositoryBuilder.java +++ b/src/main/java/com/amazon/carbonado/repo/indexed/IndexedRepositoryBuilder.java @@ -49,6 +49,8 @@ public class IndexedRepositoryBuilder extends AbstractRepositoryBuilder { private RepositoryBuilder mRepoBuilder; private boolean mIndexRepairEnabled = true; private double mIndexThrottle = 1.0; + private boolean mIndexDiscardDuplicates; + private boolean mIndexRepairVerifyOnly; private boolean mAllClustered; private boolean mStrictTriggers; @@ -78,6 +80,8 @@ public class IndexedRepositoryBuilder extends AbstractRepositoryBuilder { Repository repo = new IndexedRepository(rootRef, getName(), wrapped, isIndexRepairEnabled(), getIndexRepairThrottle(), + mIndexDiscardDuplicates, + mIndexRepairVerifyOnly, isAllClustered(), mStrictTriggers); rootRef.set(repo); @@ -171,6 +175,21 @@ public class IndexedRepositoryBuilder extends AbstractRepositoryBuilder { mIndexThrottle = desiredSpeed; } + /** + * Set true to skip collisions during index repair, logging each as a + * warning. + */ + public void setIndexRepairDiscardDuplicates(boolean discardDuplicates) { + mIndexDiscardDuplicates = discardDuplicates; + } + + /** + * Set true to build and verify indexes, but don't apply any changes. + */ + public void setIndexRepairVerifyOnly(boolean verifyOnly) { + mIndexRepairVerifyOnly = verifyOnly; + } + /** * Returns true if all indexes should be identified as clustered. This * affects how indexes are selected by the query analyzer. diff --git a/src/main/java/com/amazon/carbonado/repo/indexed/IndexedStorage.java b/src/main/java/com/amazon/carbonado/repo/indexed/IndexedStorage.java index eec8522..ea131f1 100644 --- a/src/main/java/com/amazon/carbonado/repo/indexed/IndexedStorage.java +++ b/src/main/java/com/amazon/carbonado/repo/indexed/IndexedStorage.java @@ -338,7 +338,10 @@ class IndexedStorage implements Storage, StorageAccess } // New index, so build it. - managedIndex.buildIndex(mRepository.getIndexRepairThrottle()); + + managedIndex.buildIndex(mRepository.getIndexRepairThrottle(), + mRepository.getIndexDiscardDuplicates(), + false); // verifyOnly = false boolean top = true; while (true) { diff --git a/src/main/java/com/amazon/carbonado/repo/indexed/ManagedIndex.java b/src/main/java/com/amazon/carbonado/repo/indexed/ManagedIndex.java index ab46307..f7717b3 100644 --- a/src/main/java/com/amazon/carbonado/repo/indexed/ManagedIndex.java +++ b/src/main/java/com/amazon/carbonado/repo/indexed/ManagedIndex.java @@ -179,7 +179,9 @@ class ManagedIndex implements IndexEntryAccessor { // Required by IndexEntryAccessor interface. public void repair(double desiredSpeed) throws RepositoryException { - buildIndex(desiredSpeed); + buildIndex(desiredSpeed, + mRepository.getIndexDiscardDuplicates(), + mRepository.getIndexRepairVerifyOnly()); } // Required by IndexEntryAccessor interface. @@ -295,7 +297,9 @@ class ManagedIndex implements IndexEntryAccessor { * * @param repo used to enter transactions */ - void buildIndex(double desiredSpeed) throws RepositoryException { + void buildIndex(double desiredSpeed, boolean discardDuplicates, boolean verifyOnly) + throws RepositoryException + { final MergeSortBuffer buffer; final Comparator c; @@ -408,14 +412,15 @@ class ManagedIndex implements IndexEntryAccessor { // _before_ inserting index entries. If there are duplicates, // fail, since unique index cannot be built. - if (log.isInfoEnabled()) { - log.info("Verifying index"); - } + log.info("Verifying index"); Object last = null; for (Object obj : buffer) { - if (last != null) { - if (c.compare(last, obj) == 0) { + if (last != null && c.compare(last, obj) == 0) { + if (discardDuplicates) { + log.warn("Unique index contains duplicates; skipping: " + + this + ", " + last + " == " + obj); + } else { buffer.close(); throw new UniqueConstraintException ("Cannot build unique index because duplicates exist: " @@ -426,6 +431,12 @@ class ManagedIndex implements IndexEntryAccessor { } } + if (verifyOnly) { + log.info("Verification complete"); + buffer.close(); + return; + } + final int bufferSize = buffer.size(); if (log.isInfoEnabled()) { @@ -479,7 +490,7 @@ class ManagedIndex implements IndexEntryAccessor { if (indexEntry != null) { if (indexEntry.tryInsert()) { totalInserted++; - } else { + } else if (!discardDuplicates) { // Couldn't insert because an index entry already exists. Storable existing = indexEntry.copy(); boolean doUpdate = false; -- cgit v1.2.3