You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@usergrid.apache.org by sn...@apache.org on 2014/01/30 16:21:02 UTC
[26/34] update to master
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/9fec2baa/stack/core/src/test/java/org/apache/usergrid/persistence/query/ir/result/UnionIteratorTest.java
----------------------------------------------------------------------
diff --cc stack/core/src/test/java/org/apache/usergrid/persistence/query/ir/result/UnionIteratorTest.java
index f38cb66,0000000..e0e0383
mode 100644,000000..100644
--- a/stack/core/src/test/java/org/apache/usergrid/persistence/query/ir/result/UnionIteratorTest.java
+++ b/stack/core/src/test/java/org/apache/usergrid/persistence/query/ir/result/UnionIteratorTest.java
@@@ -1,379 -1,0 +1,467 @@@
+/*******************************************************************************
+ * Copyright 2012 Apigee Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.usergrid.persistence.query.ir.result;
+
+
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.UUID;
+
+import org.apache.usergrid.persistence.query.ir.result.ScanColumn;
+import org.apache.usergrid.persistence.query.ir.result.UnionIterator;
+import org.apache.usergrid.utils.UUIDUtils;
+import org.junit.Test;
+
+import me.prettyprint.cassandra.serializers.UUIDSerializer;
+import static org.apache.usergrid.persistence.query.ir.result.IteratorHelper.uuidColumn;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+
+/**
+ * @author tnine
+ */
+public class UnionIteratorTest {
+
+ @Test
+ public void testMutipleIterators() {
+
+ UUID id1 = UUIDUtils.minTimeUUID( 1 );
+ UUID id2 = UUIDUtils.minTimeUUID( 2 );
+ UUID id3 = UUIDUtils.minTimeUUID( 3 );
+ UUID id4 = UUIDUtils.minTimeUUID( 4 );
+ UUID id5 = UUIDUtils.minTimeUUID( 5 );
+ UUID id6 = UUIDUtils.minTimeUUID( 6 );
+ UUID id7 = UUIDUtils.minTimeUUID( 7 );
+ UUID id8 = UUIDUtils.minTimeUUID( 8 );
+ UUID id9 = UUIDUtils.minTimeUUID( 9 );
+ UUID id10 = UUIDUtils.minTimeUUID( 10 );
+
+ // we should get intersection on 1, 3, and 8
+ InOrderIterator first = new InOrderIterator( 100 );
+ first.add( id1 );
+ first.add( id2 );
+ first.add( id3 );
+ first.add( id8 );
+ first.add( id9 );
+
+ InOrderIterator second = new InOrderIterator( 100 );
+ second.add( id1 );
+ second.add( id2 );
+ second.add( id3 );
+ second.add( id4 );
+ second.add( id8 );
+ second.add( id10 );
+
+ InOrderIterator third = new InOrderIterator( 100 );
+ third.add( id6 );
+ third.add( id7 );
+ third.add( id1 );
+ third.add( id3 );
+ third.add( id5 );
+ third.add( id8 );
+
+ InOrderIterator fourth = new InOrderIterator( 100 );
+ fourth.add( id1 );
+ fourth.add( id6 );
+ fourth.add( id2 );
+ fourth.add( id3 );
+ fourth.add( id8 );
+ fourth.add( id9 );
+
+
+ UnionIterator iter = new UnionIterator( 100, 0, null );
+ iter.addIterator( first );
+ iter.addIterator( second );
+ iter.addIterator( third );
+ iter.addIterator( fourth );
+
+ Set<ScanColumn> union = iter.next();
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertTrue( union.contains( uuidColumn( id1 ) ) );
+ assertTrue( union.contains( uuidColumn( id2 ) ) );
+ assertTrue( union.contains( uuidColumn( id3 ) ) );
+ assertTrue( union.contains( uuidColumn( id4 ) ) );
+ assertTrue( union.contains( uuidColumn( id5 ) ) );
+ assertTrue( union.contains( uuidColumn( id6 ) ) );
+ assertTrue( union.contains( uuidColumn( id7 ) ) );
+ assertTrue( union.contains( uuidColumn( id8 ) ) );
+ assertTrue( union.contains( uuidColumn( id9 ) ) );
+ assertTrue( union.contains( uuidColumn( id10 ) ) );
+ }
+
+
+ @Test
+ public void testOneIterator() {
+
+ UUID id1 = UUIDUtils.minTimeUUID( 1 );
+ UUID id2 = UUIDUtils.minTimeUUID( 2 );
+ UUID id3 = UUIDUtils.minTimeUUID( 3 );
+ UUID id4 = UUIDUtils.minTimeUUID( 4 );
+
+ // we should get intersection on 1, 3, and 8
+ InOrderIterator first = new InOrderIterator( 100 );
+ first.add( id1 );
+ first.add( id2 );
+ first.add( id3 );
+ first.add( id4 );
+
+ UnionIterator union = new UnionIterator( 100, 0, null );
+ union.addIterator( first );
+
+ Set<ScanColumn> ids = union.next();
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertTrue( ids.contains( uuidColumn( id1 ) ) );
+ assertTrue( ids.contains( uuidColumn( id2 ) ) );
+ assertTrue( ids.contains( uuidColumn( id3 ) ) );
+ assertTrue( ids.contains( uuidColumn( id4 ) ) );
+
+ assertFalse( union.hasNext() );
+ }
+
+
+ @Test
+ public void testEmptyFirstIterator() {
+
+ UUID id1 = UUIDUtils.minTimeUUID( 1 );
+ UUID id2 = UUIDUtils.minTimeUUID( 2 );
+ UUID id3 = UUIDUtils.minTimeUUID( 3 );
+ UUID id4 = UUIDUtils.minTimeUUID( 4 );
+
+ // we should get intersection on 1, 3, and 8
+ InOrderIterator first = new InOrderIterator( 100 );
+
+ InOrderIterator second = new InOrderIterator( 100 );
+ second.add( id1 );
+ second.add( id2 );
+ second.add( id3 );
+ second.add( id4 );
+
+ UnionIterator union = new UnionIterator( 100, 0, null );
+ union.addIterator( first );
+ union.addIterator( second );
+
+ Set<ScanColumn> ids = union.next();
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertTrue( ids.contains( uuidColumn( id1 ) ) );
+ assertTrue( ids.contains( uuidColumn( id2 ) ) );
+ assertTrue( ids.contains( uuidColumn( id3 ) ) );
+ assertTrue( ids.contains( uuidColumn( id4 ) ) );
+
+ assertFalse( union.hasNext() );
+ }
+
+
+ @Test
+ public void testNoIterator() {
+
+ UnionIterator union = new UnionIterator( 100, 0, null );
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertFalse( union.hasNext() );
+ }
+
+
+ @Test
+ public void largeUnionTest() {
+
+ int size = 10000;
+ int firstIntersection = 100;
+ int secondIntersection = 200;
+
+ int pageSize = 20;
+
+ UUID[] firstSet = new UUID[size];
+ UUID[] secondSet = new UUID[size];
+ UUID[] thirdSet = new UUID[size];
+
+ InOrderIterator first = new InOrderIterator( pageSize / 2 );
+ InOrderIterator second = new InOrderIterator( pageSize / 2 );
+ InOrderIterator third = new InOrderIterator( pageSize / 2 );
+
+ Set<UUID> results = new HashSet<UUID>( size );
+
+ for ( int i = 0; i < size; i++ ) {
+ firstSet[i] = UUIDUtils.newTimeUUID();
+ // every 100 elements, set the element equal to the first set. This way we
+ // have intersection
+
+ results.add( firstSet[i] );
+
+ if ( i % firstIntersection == 0 ) {
+ secondSet[i] = firstSet[i];
+ }
+ else {
+ secondSet[i] = UUIDUtils.newTimeUUID();
+ results.add( secondSet[i] );
+ }
+
+ if ( i % secondIntersection == 0 ) {
+ thirdSet[i] = firstSet[i];
+ }
+
+ else {
+ thirdSet[i] = UUIDUtils.newTimeUUID();
+ results.add( thirdSet[i] );
+ }
+ }
+
+ first.add( firstSet );
+
+ reverse( secondSet );
+ // reverse the second
+ second.add( secondSet );
+ third.add( thirdSet );
+
+ // now intersect them and make sure we get all results in a small set
+ UnionIterator union = new UnionIterator( pageSize, 0, null );
+ union.addIterator( first );
+ union.addIterator( second );
+ union.addIterator( third );
+
+
+ while ( union.hasNext() ) {
+
+ // now get the 2nd page
+ Set<ScanColumn> resultSet = union.next();
+
+ for ( ScanColumn col : resultSet ) {
+ boolean existed = results.remove( col.getUUID() );
+
+ assertTrue( "Duplicate element was detected", existed );
+ }
+ }
+
+ assertEquals( 0, results.size() );
+ assertFalse( union.hasNext() );
+ }
+
+
+ @Test
+ public void iterationCompleted() {
+
+ UUID id1 = UUIDUtils.minTimeUUID( 1 );
+ UUID id2 = UUIDUtils.minTimeUUID( 2 );
+ UUID id3 = UUIDUtils.minTimeUUID( 3 );
+ UUID id4 = UUIDUtils.minTimeUUID( 4 );
+ UUID id5 = UUIDUtils.minTimeUUID( 5 );
+
+
+ UnionIterator union = new UnionIterator( 5, 0, null );
+
+ InOrderIterator first = new InOrderIterator( 100 );
+
+ InOrderIterator second = new InOrderIterator( 100 );
+ second.add( id1 );
+ second.add( id2 );
+ second.add( id3 );
+ second.add( id4 );
+ second.add( id5 );
+
+ union.addIterator( first );
+ union.addIterator( second );
+
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertTrue( union.hasNext() );
+
+ Set<ScanColumn> ids = union.next();
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertTrue( ids.contains( uuidColumn( id1 ) ) );
+ assertTrue( ids.contains( uuidColumn( id2 ) ) );
+ assertTrue( ids.contains( uuidColumn( id3 ) ) );
+ assertTrue( ids.contains( uuidColumn( id4 ) ) );
+ assertTrue( ids.contains( uuidColumn( id5 ) ) );
+
+ //now try to get the next page
+ ids = union.next();
+ assertNull( ids );
+ }
+
+
+ @Test
+ public void nullCursorBytes() {
+
+ UUID id1 = UUIDUtils.minTimeUUID( 1 );
+ UUID id2 = UUIDUtils.minTimeUUID( 2 );
+ UUID id3 = UUIDUtils.minTimeUUID( 3 );
+ UUID id4 = UUIDUtils.minTimeUUID( 4 );
+ UUID id5 = UUIDUtils.minTimeUUID( 5 );
+
+
+ InOrderIterator second = new InOrderIterator( 100 );
+ second.add( id1 );
+ second.add( id2 );
+ second.add( id3 );
+ second.add( id4 );
+ second.add( id5 );
+
+ UnionIterator union = new UnionIterator( 100, 1, null );
+
+ union.addIterator( second );
+
+ Set<ScanColumn> ids = union.next();
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertTrue( ids.contains( uuidColumn( id1 ) ) );
+ assertTrue( ids.contains( uuidColumn( id2 ) ) );
+ assertTrue( ids.contains( uuidColumn( id3 ) ) );
+ assertTrue( ids.contains( uuidColumn( id4 ) ) );
+ assertTrue( ids.contains( uuidColumn( id5 ) ) );
+ }
+
+
+ @Test
+ public void validCursorBytes() {
+
+
+ ByteBuffer cursor = UUIDSerializer.get().toByteBuffer( UUIDUtils.minTimeUUID( 4 ) );
+
+ UUID id1 = UUIDUtils.minTimeUUID( 1 );
+ UUID id2 = UUIDUtils.minTimeUUID( 2 );
+ UUID id3 = UUIDUtils.minTimeUUID( 3 );
+ UUID id4 = UUIDUtils.minTimeUUID( 4 );
+ UUID id5 = UUIDUtils.minTimeUUID( 5 );
+
+
+ InOrderIterator second = new InOrderIterator( 100 );
+ second.add( id1 );
+ second.add( id2 );
+ second.add( id3 );
+ second.add( id4 );
+ second.add( id5 );
+
+ UnionIterator union = new UnionIterator( 100, 1, cursor );
+
+ union.addIterator( second );
+
+ Set<ScanColumn> ids = union.next();
+
+ // now make sure it's right, only 1, 3 and 8 intersect
+ assertFalse( ids.contains( uuidColumn( id1 ) ) );
+ assertFalse( ids.contains( uuidColumn( id2 ) ) );
+ assertFalse( ids.contains( uuidColumn( id3 ) ) );
+ assertFalse( ids.contains( uuidColumn( id4 ) ) );
+ assertTrue( ids.contains( uuidColumn( id5 ) ) );
+ }
+
+
++ @Test
++ public void resetCorrect() {
++
++ UUID id1 = UUIDUtils.minTimeUUID( 1 );
++ UUID id2 = UUIDUtils.minTimeUUID( 2 );
++ UUID id3 = UUIDUtils.minTimeUUID( 3 );
++ UUID id4 = UUIDUtils.minTimeUUID( 4 );
++ UUID id5 = UUIDUtils.minTimeUUID( 5 );
++ UUID id6 = UUIDUtils.minTimeUUID( 6 );
++ UUID id7 = UUIDUtils.minTimeUUID( 75 );
++
++
++ UnionIterator union = new UnionIterator( 5, 0, null );
++
++ InOrderIterator first = new InOrderIterator( 100 );
++ first.add( id3 );
++ first.add( id6 );
++ first.add( id4 );
++
++
++ InOrderIterator second = new InOrderIterator( 100 );
++ second.add( id7 );
++ second.add( id1 );
++ second.add( id2 );
++ second.add( id5 );
++
++
++ union.addIterator( first );
++ union.addIterator( second );
++
++
++ // now make sure it's right, only 1, 3 and 8 intersect
++ assertTrue( union.hasNext() );
++
++ Set<ScanColumn> ids = union.next();
++
++
++ assertEquals(5, ids.size());
++
++ // now make sure it's right, only 1, 3 and 8 intersect
++ assertTrue( ids.contains( uuidColumn( id1 ) ) );
++ assertTrue( ids.contains( uuidColumn( id2 ) ) );
++ assertTrue( ids.contains( uuidColumn( id3 ) ) );
++ assertTrue( ids.contains( uuidColumn( id4 ) ) );
++ assertTrue( ids.contains( uuidColumn( id5 ) ) );
++
++ ids = union.next();
++
++
++ assertEquals(2, ids.size());
++
++ assertTrue( ids.contains( uuidColumn( id6 ) ) );
++ assertTrue( ids.contains( uuidColumn( id7 ) ) );
++
++ //now try to get the next page
++ ids = union.next();
++ assertNull( ids );
++
++ //now reset and re-test
++ union.reset();
++
++ ids = union.next();
++
++ assertEquals(5, ids.size());
++
++
++ // now make sure it's right, only 1, 3 and 8 intersect
++ assertTrue( ids.contains( uuidColumn( id1 ) ) );
++ assertTrue( ids.contains( uuidColumn( id2 ) ) );
++ assertTrue( ids.contains( uuidColumn( id3 ) ) );
++ assertTrue( ids.contains( uuidColumn( id4 ) ) );
++ assertTrue( ids.contains( uuidColumn( id5 ) ) );
++
++
++ ids = union.next();
++
++ assertEquals(2, ids.size());
++
++ assertTrue( ids.contains( uuidColumn( id6 ) ) );
++ assertTrue( ids.contains( uuidColumn( id7 ) ) );
++
++
++ //now try to get the next page
++ ids = union.next();
++ assertNull( ids );
++ }
++
++
+ private void reverse( UUID[] array ) {
+
+ UUID temp = null;
+
+ for ( int i = 0; i < array.length / 2; i++ ) {
+ temp = array[i];
+ array[i] = array[array.length - i - 1];
+ array[array.length - i - 1] = temp;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/9fec2baa/stack/tools/pom.xml
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/9fec2baa/stack/tools/src/main/java/org/apache/usergrid/tools/EntityCleanup.java
----------------------------------------------------------------------
diff --cc stack/tools/src/main/java/org/apache/usergrid/tools/EntityCleanup.java
index 4da1c80,0000000..f379568
mode 100644,000000..100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/EntityCleanup.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/EntityCleanup.java
@@@ -1,174 -1,0 +1,174 @@@
+/*******************************************************************************
+ * Copyright 2012 Apigee Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.usergrid.tools;
+
+
+import java.nio.ByteBuffer;
+import java.util.LinkedHashSet;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.UUID;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.usergrid.persistence.Entity;
+import org.apache.usergrid.persistence.IndexBucketLocator;
+import org.apache.usergrid.persistence.Results;
+import org.apache.usergrid.persistence.Schema;
+import org.apache.usergrid.persistence.IndexBucketLocator.IndexType;
+import org.apache.usergrid.persistence.cassandra.CassandraService;
+import org.apache.usergrid.persistence.cassandra.EntityManagerImpl;
+import org.apache.usergrid.persistence.cassandra.index.IndexScanner;
+import org.apache.usergrid.persistence.query.ir.result.ScanColumn;
+import org.apache.usergrid.persistence.query.ir.result.ScanColumnTransformer;
+import org.apache.usergrid.persistence.query.ir.result.SliceIterator;
+import org.apache.usergrid.persistence.query.ir.result.UUIDIndexSliceParser;
+
+import me.prettyprint.cassandra.serializers.ByteBufferSerializer;
+import me.prettyprint.hector.api.Keyspace;
+import me.prettyprint.hector.api.mutation.Mutator;
+import static me.prettyprint.hector.api.factory.HFactory.createMutator;
+import static org.apache.usergrid.persistence.Schema.DICTIONARY_COLLECTIONS;
+import static org.apache.usergrid.persistence.cassandra.ApplicationCF.ENTITY_ID_SETS;
+import static org.apache.usergrid.persistence.cassandra.CassandraPersistenceUtils.addDeleteToMutator;
+import static org.apache.usergrid.persistence.cassandra.CassandraPersistenceUtils.key;
+import static org.apache.usergrid.utils.UUIDUtils.getTimestampInMicros;
+import static org.apache.usergrid.utils.UUIDUtils.newTimeUUID;
+
+
+/**
+ * This is a utility to audit all available entity ids for existing target rows If an entity Id exists in the collection
+ * index with no target entity, the id is removed from the index. This is a cleanup tool as a result of the issue in
+ * USERGRID-323
+ *
+ * @author tnine
+ */
+public class EntityCleanup extends ToolBase {
+
+ /**
+ *
+ */
+ private static final int PAGE_SIZE = 100;
+
+ public static final ByteBufferSerializer be = new ByteBufferSerializer();
+
+ private static final Logger logger = LoggerFactory.getLogger( EntityCleanup.class );
+
+
+ @Override
+ @SuppressWarnings("static-access")
+ public Options createOptions() {
+
+ Option hostOption =
+ OptionBuilder.withArgName( "host" ).hasArg().isRequired( true ).withDescription( "Cassandra host" )
+ .create( "host" );
+
+ Options options = new Options();
+ options.addOption( hostOption );
+
+ return options;
+ }
+
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * org.apache.usergrid.tools.ToolBase#runTool(org.apache.commons.cli.CommandLine)
+ */
+ @Override
+ public void runTool( CommandLine line ) throws Exception {
+ startSpring();
+
+ logger.info( "Starting entity cleanup" );
+
+ Results results = null;
+
+
+ for ( Entry<String, UUID> app : emf.getApplications().entrySet() ) {
+
+ logger.info( "Starting cleanup for app {}", app.getKey() );
+
+ UUID applicationId = app.getValue();
+ EntityManagerImpl em = ( EntityManagerImpl ) emf.getEntityManager( applicationId );
+
+ CassandraService cass = em.getCass();
+ IndexBucketLocator indexBucketLocator = em.getIndexBucketLocator();
+
+ UUID timestampUuid = newTimeUUID();
+ long timestamp = getTimestampInMicros( timestampUuid );
+
+ Set<String> collectionNames = em.getApplicationCollections();
+
+ // go through each collection and audit the value
+ for ( String collectionName : collectionNames ) {
+
+ IndexScanner scanner = cass.getIdList( cass.getApplicationKeyspace( applicationId ),
+ key( applicationId, DICTIONARY_COLLECTIONS, collectionName ), null, null, PAGE_SIZE, false,
- indexBucketLocator, applicationId, collectionName );
++ indexBucketLocator, applicationId, collectionName, false );
+
- SliceIterator itr = new SliceIterator( null, scanner, new UUIDIndexSliceParser(), false );
++ SliceIterator itr = new SliceIterator( null, scanner, new UUIDIndexSliceParser() );
+
+ while ( itr.hasNext() ) {
+
+ // load all entity ids from the index itself.
+
+ Set<ScanColumn> copy = new LinkedHashSet<ScanColumn>( itr.next() );
+
+ results = em.get( ScanColumnTransformer.getIds( copy ) );
+ // nothing to do they're the same size so there's no
+ // orphaned uuid's in the entity index
+ if ( copy.size() == results.size() ) {
+ continue;
+ }
+
+ // they're not the same, we have some orphaned records,
+ // remove them
+
+ for ( Entity returned : results.getEntities() ) {
+ copy.remove( returned.getUuid() );
+ }
+
+ // what's left needs deleted, do so
+
+ logger.info( "Cleaning up {} orphaned entities for app {}", copy.size(), app.getValue() );
+
+ Keyspace ko = cass.getApplicationKeyspace( applicationId );
+ Mutator<ByteBuffer> m = createMutator( ko, be );
+
+ for ( ScanColumn col : copy ) {
+
+ final UUID id = col.getUUID();
+
+ Object collections_key = key( applicationId, Schema.DICTIONARY_COLLECTIONS, collectionName,
+ indexBucketLocator
+ .getBucket( applicationId, IndexType.COLLECTION, id, collectionName ) );
+
+ addDeleteToMutator( m, ENTITY_ID_SETS, collections_key, id, timestamp );
+
+ logger.info( "Deleting entity with id '{}' from collection '{}'", id, collectionName );
+ }
+
+ m.execute();
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/9fec2baa/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
----------------------------------------------------------------------
diff --cc stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
index 6bf9412,0000000..886a934
mode 100644,000000..100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/UniqueIndexCleanup.java
@@@ -1,379 -1,0 +1,379 @@@
+/*******************************************************************************
+ * Copyright 2012 Apigee Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.usergrid.tools;
+
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.UUID;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.usergrid.management.ApplicationInfo;
+import org.apache.usergrid.persistence.Entity;
+import org.apache.usergrid.persistence.EntityManager;
+import org.apache.usergrid.persistence.EntityManagerFactory;
+import org.apache.usergrid.persistence.Identifier;
+import org.apache.usergrid.persistence.IndexBucketLocator;
+import org.apache.usergrid.persistence.IndexBucketLocator.IndexType;
+import org.apache.usergrid.persistence.cassandra.CassandraService;
+import org.apache.usergrid.persistence.cassandra.EntityManagerImpl;
+import org.apache.usergrid.persistence.cassandra.index.IndexScanner;
+import org.apache.usergrid.persistence.entities.Application;
+import org.apache.usergrid.persistence.query.ir.result.ScanColumn;
+import org.apache.usergrid.persistence.query.ir.result.SliceIterator;
+import org.apache.usergrid.persistence.query.ir.result.UUIDIndexSliceParser;
+import org.apache.usergrid.persistence.schema.CollectionInfo;
+
+import me.prettyprint.cassandra.serializers.ByteBufferSerializer;
+import me.prettyprint.hector.api.Keyspace;
+import me.prettyprint.hector.api.beans.AbstractComposite.ComponentEquality;
+import me.prettyprint.hector.api.beans.DynamicComposite;
+import me.prettyprint.hector.api.beans.HColumn;
+import me.prettyprint.hector.api.mutation.Mutator;
+import static me.prettyprint.hector.api.factory.HFactory.createMutator;
+import static org.apache.usergrid.persistence.Schema.DICTIONARY_COLLECTIONS;
+import static org.apache.usergrid.persistence.Schema.getDefaultSchema;
+import static org.apache.usergrid.persistence.cassandra.ApplicationCF.ENTITY_INDEX;
+import static org.apache.usergrid.persistence.cassandra.ApplicationCF.ENTITY_INDEX_ENTRIES;
+import static org.apache.usergrid.persistence.cassandra.CassandraPersistenceUtils.addDeleteToMutator;
+import static org.apache.usergrid.persistence.cassandra.CassandraPersistenceUtils.key;
+import static org.apache.usergrid.persistence.cassandra.CassandraService.INDEX_ENTRY_LIST_COUNT;
+import static org.apache.usergrid.utils.CompositeUtils.setEqualityFlag;
+import static org.apache.usergrid.utils.UUIDUtils.getTimestampInMicros;
+import static org.apache.usergrid.utils.UUIDUtils.newTimeUUID;
+
+
+/**
+ * This is a utility to audit all available entity ids in the secondary index. It then checks to see if any index value
+ * is not present in the Entity_Index_Entries. If it is not, the value from the index is removed, and a forced re-index
+ * is triggered
+ * <p/>
+ * USERGRID-323
+ * <p/>
+ * <p/>
+ * UniqueIndexCleanup -app [appid] -col [collectionname]
+ *
+ * @author tnine
+ */
+public class UniqueIndexCleanup extends ToolBase {
+
+ /**
+ *
+ */
+ private static final int PAGE_SIZE = 100;
+
+ public static final ByteBufferSerializer be = new ByteBufferSerializer();
+
+
+ private static final Logger logger = LoggerFactory.getLogger( UniqueIndexCleanup.class );
+
+ /**
+ *
+ */
+ private static final String APPLICATION_ARG = "app";
+
+ /**
+ *
+ */
+ private static final String COLLECTION_ARG = "col";
+
+
+ @Override
+ @SuppressWarnings("static-access")
+ public Options createOptions() {
+
+
+ Options options = new Options();
+
+ Option hostOption =
+ OptionBuilder.withArgName( "host" ).hasArg().isRequired( true ).withDescription( "Cassandra host" )
+ .create( "host" );
+
+
+ options.addOption( hostOption );
+
+
+ Option appOption = OptionBuilder.withArgName( APPLICATION_ARG ).hasArg().isRequired( false )
+ .withDescription( "application id or app name" ).create( APPLICATION_ARG );
+
+
+ options.addOption( appOption );
+
+ Option collectionOption = OptionBuilder.withArgName( COLLECTION_ARG ).hasArg().isRequired( false )
+ .withDescription( "colleciton name" ).create( COLLECTION_ARG );
+
+ options.addOption( collectionOption );
+
+ return options;
+ }
+
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * org.apache.usergrid.tools.ToolBase#runTool(org.apache.commons.cli.CommandLine)
+ */
+ @Override
+ public void runTool( CommandLine line ) throws Exception {
+ startSpring();
+
+ logger.info( "Starting entity cleanup" );
+
+ Map<String, UUID> apps = getApplications( emf, line );
+
+
+ for ( Entry<String, UUID> app : apps.entrySet() ) {
+
+ logger.info( "Starting cleanup for app {}", app.getKey() );
+
+ UUID applicationId = app.getValue();
+ EntityManagerImpl em = ( EntityManagerImpl ) emf.getEntityManager( applicationId );
+
+ //sanity check for corrupt apps
+ Application appEntity = em.getApplication();
+
+ if ( appEntity == null ) {
+ logger.warn( "Application does not exist in data. {}", app.getKey() );
+ continue;
+ }
+
+ CassandraService cass = em.getCass();
+ IndexBucketLocator indexBucketLocator = em.getIndexBucketLocator();
+
+ Keyspace ko = cass.getApplicationKeyspace( applicationId );
+
+ UUID timestampUuid = newTimeUUID();
+ long timestamp = getTimestampInMicros( timestampUuid );
+
+
+ // go through each collection and audit the values
+ for ( String collectionName : getCollectionNames( em, line ) ) {
+
+
+ IndexScanner scanner = cass.getIdList( cass.getApplicationKeyspace( applicationId ),
+ key( applicationId, DICTIONARY_COLLECTIONS, collectionName ), null, null, PAGE_SIZE, false,
- indexBucketLocator, applicationId, collectionName );
++ indexBucketLocator, applicationId, collectionName, false );
+
- SliceIterator itr = new SliceIterator( null, scanner, new UUIDIndexSliceParser(), false );
++ SliceIterator itr = new SliceIterator( null, scanner, new UUIDIndexSliceParser() );
+
+
+ while ( itr.hasNext() ) {
+
+ Set<ScanColumn> ids = itr.next();
+
+ CollectionInfo collection = getDefaultSchema().getCollection( "application", collectionName );
+
+
+ //We shouldn't have to do this, but otherwise the cursor won't work
+ Set<String> indexed = collection.getPropertiesIndexed();
+
+ // what's left needs deleted, do so
+
+ logger.info( "Auditing {} entities for collection {} in app {}", new Object[] {
+ ids.size(), collectionName, app.getValue()
+ } );
+
+ for ( ScanColumn col : ids ) {
+ final UUID id = col.getUUID();
+ boolean reIndex = false;
+
+ Mutator<ByteBuffer> m = createMutator( ko, be );
+
+ try {
+
+ for ( String prop : indexed ) {
+
+ String bucket =
+ indexBucketLocator.getBucket( applicationId, IndexType.COLLECTION, id, prop );
+
+ Object rowKey = key( applicationId, collection.getName(), prop, bucket );
+
+ List<HColumn<ByteBuffer, ByteBuffer>> indexCols =
+ scanIndexForAllTypes( ko, indexBucketLocator, applicationId, rowKey, id, prop );
+
+ // loop through the indexed values and verify them as present in
+ // our entity_index_entries. If they aren't, we need to delete the
+ // from the secondary index, and mark
+ // this object for re-index via n update
+ for ( HColumn<ByteBuffer, ByteBuffer> index : indexCols ) {
+
+ DynamicComposite secondaryIndexValue =
+ DynamicComposite.fromByteBuffer( index.getName().duplicate() );
+
+ Object code = secondaryIndexValue.get( 0 );
+ Object propValue = secondaryIndexValue.get( 1 );
+ UUID timestampId = ( UUID ) secondaryIndexValue.get( 3 );
+
+ DynamicComposite existingEntryStart =
+ new DynamicComposite( prop, code, propValue, timestampId );
+ DynamicComposite existingEntryFinish =
+ new DynamicComposite( prop, code, propValue, timestampId );
+
+ setEqualityFlag( existingEntryFinish, ComponentEquality.GREATER_THAN_EQUAL );
+
+ // now search our EntityIndexEntry for previous values, see if
+ // they don't match this one
+
+ List<HColumn<ByteBuffer, ByteBuffer>> entries =
+ cass.getColumns( ko, ENTITY_INDEX_ENTRIES, id, existingEntryStart,
+ existingEntryFinish, INDEX_ENTRY_LIST_COUNT, false );
+
+ // we wouldn't find this column in our entity_index_entries
+ // audit. Delete it, then mark this entity for update
+ if ( entries.size() == 0 ) {
+ logger.info(
+ "Could not find reference to value '{}' for property '{}' on entity " +
+ "{} in collection {}. " + " Forcing reindex",
+ new Object[] { propValue, prop, id, collectionName } );
+
+ addDeleteToMutator( m, ENTITY_INDEX, rowKey, index.getName().duplicate(),
+ timestamp );
+
+ reIndex = true;
+ }
+
+ if ( entries.size() > 1 ) {
+ logger.info(
+ "Found more than 1 entity referencing unique index for property '{}' " +
+ "with value " + "'{}'", prop, propValue );
+ reIndex = true;
+ }
+ }
+ }
+
+ //force this entity to be updated
+ if ( reIndex ) {
+ Entity entity = em.get( id );
+
+ //entity may not exist, but we should have deleted rows from the index
+ if ( entity == null ) {
+ logger.warn( "Entity with id {} did not exist in app {}", id, applicationId );
+ //now execute the cleanup. In this case the entity is gone,
+ // so we'll want to remove references from
+ // the secondary index
+ m.execute();
+ continue;
+ }
+
+
+ logger.info( "Reindex complete for entity with id '{} ", id );
+ em.update( entity );
+
+ //now execute the cleanup. This way if the above update fails,
+ // we still have enough data to run again
+ // later
+ m.execute();
+ }
+ }
+ catch ( Exception e ) {
+ logger.error( "Unable to process entity with id '{}'", id, e );
+ }
+ }
+ }
+ }
+ }
+
+ logger.info( "Completed audit of apps" );
+ }
+
+
+ private Map<String, UUID> getApplications( EntityManagerFactory emf, CommandLine line ) throws Exception {
+ String appName = line.getOptionValue( APPLICATION_ARG );
+
+ if ( appName == null ) {
+ return emf.getApplications();
+ }
+
+ ApplicationInfo app = managementService.getApplicationInfo( Identifier.from( appName ) );
+
+ if ( app == null ) {
+ logger.error( "Could not find application with id or name {}", appName );
+ System.exit( 3 );
+ }
+
+
+ Map<String, UUID> apps = new HashMap<String, UUID>();
+
+ apps.put( app.getName(), app.getId() );
+
+ return apps;
+ }
+
+
+ private Set<String> getCollectionNames( EntityManager em, CommandLine line ) throws Exception {
+
+ String collectionName = line.getOptionValue( COLLECTION_ARG );
+
+ if ( collectionName == null ) {
+ return em.getApplicationCollections();
+ }
+
+
+ Set<String> names = new HashSet<String>();
+ names.add( collectionName );
+
+ return names;
+ }
+
+
+ private List<HColumn<ByteBuffer, ByteBuffer>> scanIndexForAllTypes( Keyspace ko,
+ IndexBucketLocator indexBucketLocator,
+ UUID applicationId, Object rowKey,
+ UUID entityId, String prop ) throws Exception {
+
+ //TODO Determine the index bucket. Scan the entire index for properties with this entityId.
+
+
+ DynamicComposite start = null;
+
+ List<HColumn<ByteBuffer, ByteBuffer>> cols;
+
+ List<HColumn<ByteBuffer, ByteBuffer>> results = new ArrayList<HColumn<ByteBuffer, ByteBuffer>>();
+
+
+ do {
+ cols = cass.getColumns( ko, ENTITY_INDEX, rowKey, start, null, 100, false );
+
+ for ( HColumn<ByteBuffer, ByteBuffer> col : cols ) {
+ DynamicComposite secondaryIndexValue = DynamicComposite.fromByteBuffer( col.getName().duplicate() );
+
+ UUID storedId = ( UUID ) secondaryIndexValue.get( 2 );
+
+ //add it to the set. We can't short circuit due to property ordering
+ if ( entityId.equals( storedId ) ) {
+ results.add( col );
+ }
+
+ start = secondaryIndexValue;
+ }
+ }
+ while ( cols.size() == 100 );
+
+ return results;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/9fec2baa/stack/tools/src/main/java/org/apache/usergrid/tools/bean/ExportOrg.java
----------------------------------------------------------------------
diff --cc stack/tools/src/main/java/org/apache/usergrid/tools/bean/ExportOrg.java
index 1a04558,0000000..550a127
mode 100644,000000..100644
--- a/stack/tools/src/main/java/org/apache/usergrid/tools/bean/ExportOrg.java
+++ b/stack/tools/src/main/java/org/apache/usergrid/tools/bean/ExportOrg.java
@@@ -1,58 -1,0 +1,69 @@@
+/*******************************************************************************
+ * Copyright 2012 Apigee Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.usergrid.tools.bean;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.usergrid.management.OrganizationInfo;
+
+
+/** @author tnine */
+public class ExportOrg extends OrganizationInfo {
+
+ private List<String> adminUserNames;
++ private int passwordHistorySize;
+
+
+ public ExportOrg() {
+ adminUserNames = new ArrayList<String>();
+ }
+
+
+ public ExportOrg( OrganizationInfo info ) {
+ setName( info.getName() );
+ setUuid( info.getUuid() );
+ adminUserNames = new ArrayList<String>();
+ }
+
+
+ /** @return the admins */
+ public List<String> getAdmins() {
+ return adminUserNames;
+ }
+
+
+ /** @param admins the admins to set */
+ public void setAdmins( List<String> admins ) {
+ this.adminUserNames = admins;
+ }
+
+
+ public void addAdmin( String username ) {
+ adminUserNames.add( username );
+ }
++
++
++ public int getPasswordHistorySize() {
++ return passwordHistorySize;
++ }
++
++
++ public void setPasswordHistorySize( final int passwordHistorySize ) {
++ this.passwordHistorySize = passwordHistorySize;
++ }
+}