You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@rya.apache.org by ca...@apache.org on 2017/08/21 20:40:34 UTC

[1/3] incubator-rya git commit: RYA-250 Added data duplication detection methods to Smart URI/Entities. These use configured tolerances for each data type to decide if an Entity is considered nearly equal. Also, string terms that are considered equival

Repository: incubator-rya
Updated Branches:
  refs/heads/master 7cd70bf42 -> b319365e8


http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/test/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetectorTest.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/test/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetectorTest.java b/extras/indexing/src/test/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetectorTest.java
new file mode 100644
index 0000000..3385285
--- /dev/null
+++ b/extras/indexing/src/test/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetectorTest.java
@@ -0,0 +1,2053 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.indexing.smarturi.duplication;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.rya.api.domain.RyaTypeUtils.booleanRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.byteRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.dateRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.doubleRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.floatRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.intRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.longRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.shortRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.stringRyaType;
+import static org.apache.rya.api.domain.RyaTypeUtils.uriRyaType;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import org.apache.commons.configuration.ConfigurationException;
+import org.apache.commons.lang.builder.ReflectionToStringBuilder;
+import org.apache.rya.api.domain.RyaSchema;
+import org.apache.rya.api.domain.RyaType;
+import org.apache.rya.api.domain.RyaTypeUtils;
+import org.apache.rya.api.domain.RyaURI;
+import org.apache.rya.api.resolver.RdfToRyaConversions;
+import org.apache.rya.indexing.entity.model.Entity;
+import org.apache.rya.indexing.entity.model.Entity.Builder;
+import org.apache.rya.indexing.entity.model.Property;
+import org.apache.rya.indexing.entity.model.Type;
+import org.apache.rya.indexing.entity.storage.EntityStorage;
+import org.apache.rya.indexing.entity.storage.EntityStorage.EntityStorageException;
+import org.apache.rya.indexing.entity.storage.TypeStorage;
+import org.apache.rya.indexing.entity.storage.TypeStorage.TypeStorageException;
+import org.apache.rya.indexing.entity.storage.mongo.MongoEntityStorage;
+import org.apache.rya.indexing.entity.storage.mongo.MongoITBase;
+import org.apache.rya.indexing.entity.storage.mongo.MongoTypeStorage;
+import org.apache.rya.indexing.mongodb.update.RyaObjectStorage.ObjectStorageException;
+import org.apache.rya.indexing.smarturi.SmartUriException;
+import org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig;
+import org.joda.time.DateTime;
+import org.junit.Test;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.model.impl.ValueFactoryImpl;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+
+/**
+ * Tests the methods of {@link DuplicateDataDetector}.
+ */
+public class DuplicateDataDetectorTest extends MongoITBase {
+    private static final String RYA_INSTANCE_NAME = "testInstance";
+
+    private static final String NAMESPACE = RyaSchema.NAMESPACE;
+    private static final ValueFactory VALUE_FACTORY = ValueFactoryImpl.getInstance();
+
+    // People
+    private static final RyaURI BOB = createRyaUri("Bob");
+
+    // Attributes
+    private static final RyaURI HAS_WEIGHT = createRyaUri("hasWeight");
+    private static final RyaURI HAS_HEIGHT = createRyaUri("hasHeight");
+    private static final RyaURI HAS_SSN = createRyaUri("hasSSN");
+    private static final RyaURI HAS_AGE = createRyaUri("hasAge");
+    private static final RyaURI HAS_INCOME = createRyaUri("hasIncome");
+    private static final RyaURI HAS_NUMBER_OF_CHILDREN = createRyaUri("hasNumberOfChildren");
+    private static final RyaURI HAS_LICENSE_NUMBER = createRyaUri("hasLicenseNumber");
+    private static final RyaURI HAS_EYE_COLOR = createRyaUri("hasEyeColor");
+    private static final RyaURI HAS_HAIR_COLOR = createRyaUri("hasHairColor");
+    private static final RyaURI HAS_DATE_OF_BIRTH = createRyaUri("hasDateOfBirth");
+    private static final RyaURI HAS_EXPIRATION_DATE = createRyaUri("hasExpirationDate");
+    private static final RyaURI HAS_GLASSES = createRyaUri("hasGlasses");
+    private static final RyaURI HAS_EMAIL_ADDRESS = createRyaUri("hasEmailAddress");
+    private static final RyaURI HAS_ATTRIBUTE_SPACE = createRyaUri("has Attribute Space");
+    private static final RyaURI HAS_MOTTO = createRyaUri("hasMotto");
+    private static final RyaURI HAS_BLOOD_TYPE = createRyaUri("hasBloodType");
+    private static final RyaURI HAS_SEX = createRyaUri("hasSex");
+    private static final RyaURI HAS_ADDRESS = createRyaUri("hasAddress");
+    private static final RyaURI HAS_POSITION_TITLE = createRyaUri("hasPositionTitle");
+    private static final RyaURI HAS_WORK_ADDRESS = createRyaUri("hasWorkAddress");
+    private static final RyaURI HAS_EXTENSION = createRyaUri("hasExtension");
+    private static final RyaURI HAS_OFFICE_ROOM_NUMBER = createRyaUri("hasOfficeRoomNumber");
+
+    // Type URIs
+    private static final RyaURI PERSON_TYPE_URI = new RyaURI("urn:example/person");
+    private static final RyaURI EMPLOYEE_TYPE_URI = new RyaURI("urn:example/employee");
+
+    private static final Date NOW = new Date();
+
+    /**
+     * Creates a {@link RyaURI} for the specified local name.
+     * @param localName the URI's local name.
+     * @return the {@link RyraURI}.
+     */
+    private static RyaURI createRyaUri(final String localName) {
+        return createRyaUri(NAMESPACE, localName);
+    }
+
+    /**
+     * Creates a {@link RyaURI} for the specified local name.
+     * @param namespace the namespace.
+     * @param localName the URI's local name.
+     * @return the {@link RyraURI}.
+     */
+    private static RyaURI createRyaUri(final String namespace, final String localName) {
+        return RdfToRyaConversions.convertURI(VALUE_FACTORY.createURI(namespace, localName));
+    }
+
+    private static Entity createBobEntity() {
+        final Entity bobEntity = Entity.builder()
+            .setSubject(BOB)
+            .setExplicitType(PERSON_TYPE_URI)
+            .setExplicitType(EMPLOYEE_TYPE_URI)
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_WEIGHT, floatRyaType(250.75f)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_HEIGHT, doubleRyaType(72.5)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_SSN, stringRyaType("123-45-6789")))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_AGE, shortRyaType((short) 40)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_INCOME, intRyaType(50000)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_NUMBER_OF_CHILDREN, byteRyaType((byte) 2)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_LICENSE_NUMBER, longRyaType(123456789012L)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_EYE_COLOR, stringRyaType("blue")))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_HAIR_COLOR, stringRyaType("brown")))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_DATE_OF_BIRTH, dateRyaType(new DateTime(NOW.getTime()).minusYears(40))))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_EXPIRATION_DATE, dateRyaType(NOW)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_GLASSES, booleanRyaType(true)))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_EMAIL_ADDRESS, uriRyaType(new URIImpl("mailto:bob.smitch00@gmail.com"))))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_ATTRIBUTE_SPACE, stringRyaType("attribute space")))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_MOTTO, stringRyaType("!@#*\\&%20^ smörgåsbord")))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_BLOOD_TYPE, stringRyaType("A+ blood type")))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_SEX, stringRyaType("M")))
+            .setProperty(PERSON_TYPE_URI, new Property(HAS_ADDRESS, stringRyaType("123 Fake St. Washington, DC 20024")))
+            .setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_POSITION_TITLE, stringRyaType("Assistant to the Regional Manager")))
+            .setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_WORK_ADDRESS, stringRyaType("987 Fake Office Rd. Washington, DC 20024")))
+            .setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_EXTENSION, shortRyaType((short) 555)))
+            .setProperty(EMPLOYEE_TYPE_URI, new Property(HAS_OFFICE_ROOM_NUMBER, shortRyaType((short) 9999)))
+            .build();
+
+        return bobEntity;
+    }
+
+    private static Type createPersonType() {
+        final Type personType =
+            new Type(
+                PERSON_TYPE_URI,
+                ImmutableSet.<RyaURI>builder()
+                    .add(HAS_WEIGHT)
+                    .add(HAS_HEIGHT)
+                    .add(HAS_SSN)
+                    .add(HAS_AGE)
+                    .add(HAS_INCOME)
+                    .add(HAS_NUMBER_OF_CHILDREN)
+                    .add(HAS_LICENSE_NUMBER)
+                    .add(HAS_EYE_COLOR)
+                    .add(HAS_HAIR_COLOR)
+                    .add(HAS_DATE_OF_BIRTH)
+                    .add(HAS_EXPIRATION_DATE)
+                    .add(HAS_GLASSES)
+                    .add(HAS_EMAIL_ADDRESS)
+                    .add(HAS_ATTRIBUTE_SPACE)
+                    .add(HAS_MOTTO)
+                    .add(HAS_BLOOD_TYPE)
+                    .add(HAS_SEX)
+                    .add(HAS_ADDRESS)
+                .build()
+            );
+        return personType;
+    }
+
+    private static Type createEmployeeType() {
+        final Type employeeType =
+            new Type(
+                EMPLOYEE_TYPE_URI,
+                ImmutableSet.<RyaURI>builder()
+                    .add(HAS_POSITION_TITLE)
+                    .add(HAS_WORK_ADDRESS)
+                    .add(HAS_EXTENSION)
+                    .add(HAS_OFFICE_ROOM_NUMBER)
+                .build()
+            );
+        return employeeType;
+    }
+
+    private static String createErrorMessage(final Object originalValue, final Object testValue, final boolean expected, final boolean actual, final Tolerance tolerance) {
+        final String message = "The test value \"" + testValue + "\" was " + (expected ? "" : "NOT ") + "supposed to be almost equals to \"" + originalValue + "\" when the tolerance was \"" + tolerance.toString() + "\" but " + (actual ? "was" : "wasn't") + ".";
+        return message;
+    }
+
+    @Test
+    public void testCompareEntities() throws SmartUriException, ConfigurationException {
+        final Entity entity1 = createBobEntity();
+        final Entity entity2 = new Builder(entity1).build();
+
+        final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector();
+        final boolean areDuplicates = duplicateDataDetector.compareEntities(entity1, entity2);
+        assertTrue(areDuplicates);
+    }
+
+    @Test
+    public void testCompareSmartUris() throws SmartUriException, ConfigurationException {
+        final Entity entity1 = createBobEntity();
+        final Entity entity2 = new Builder(entity1).build();
+
+        final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector();
+        final boolean areDuplicates = duplicateDataDetector.compareSmartUris(entity1.getSmartUri(), entity2.getSmartUri());
+        assertTrue(areDuplicates);
+    }
+
+    @Test
+    public void testEntitySubjectsDifferent() throws SmartUriException, ConfigurationException {
+        final Entity entity1 = createBobEntity();
+        final Builder builder = new Builder(entity1);
+        builder.setSubject(createRyaUri("Susan"));
+        final Entity entity2 = builder.build();
+
+        final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector();
+        final boolean areDuplicates = duplicateDataDetector.compareEntities(entity1, entity2);
+        assertTrue(areDuplicates);
+    }
+
+    @Test
+    public void testEntityMissingType() throws SmartUriException, ConfigurationException {
+        final Entity entity1 = createBobEntity();
+        final Builder builder = new Builder(entity1);
+        builder.setExplicitType(new RyaURI("urn:example/manager"));
+        final Entity entity2 = builder.build();
+
+        final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector();
+        final boolean areDuplicates = duplicateDataDetector.compareEntities(entity1, entity2);
+        assertFalse(areDuplicates);
+    }
+
+    @Test
+    public void testEntityMissingProperty() throws SmartUriException, ConfigurationException {
+        final Entity entity1 = createBobEntity();
+        final Builder builder = new Builder(entity1);
+        builder.unsetProperty(PERSON_TYPE_URI, HAS_SSN);
+        final Entity entity2 = builder.build();
+
+        final DuplicateDataDetector duplicateDataDetector = new DuplicateDataDetector();
+        final boolean areDuplicates = duplicateDataDetector.compareEntities(entity1, entity2);
+        assertFalse(areDuplicates);
+    }
+
+    @Test
+    public void testReadConfigFile() throws SmartUriException, ConfigurationException {
+        final DuplicateDataConfig duplicateDataConfig = new DuplicateDataConfig();
+
+        assertNotNull(duplicateDataConfig.getBooleanTolerance());
+        assertNotNull(duplicateDataConfig.getByteTolerance());
+        assertNotNull(duplicateDataConfig.getDateTolerance());
+        assertNotNull(duplicateDataConfig.getDoubleTolerance());
+        assertNotNull(duplicateDataConfig.getFloatTolerance());
+        assertNotNull(duplicateDataConfig.getIntegerTolerance());
+        assertNotNull(duplicateDataConfig.getLongTolerance());
+        assertNotNull(duplicateDataConfig.getShortTolerance());
+        assertNotNull(duplicateDataConfig.getStringTolerance());
+        assertNotNull(duplicateDataConfig.getUriTolerance());
+
+        assertNotNull(duplicateDataConfig.getEquivalentTermsMap());
+        assertNotNull(duplicateDataConfig.isDetectionEnabled());
+    }
+
+    @Test
+    public void testBooleanProperty() throws SmartUriException {
+        System.out.println("Boolean Property Test");
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(false, tolerance, false));
+        builder.add(new TestInput(true, tolerance, true)); // Equals value
+        // Tolerance 1.0
+        tolerance = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(false, tolerance, true));
+        builder.add(new TestInput(true, tolerance, true)); // Equals value
+        // Tolerance 2.0
+        tolerance = new Tolerance(2.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(false, tolerance, true));
+        builder.add(new TestInput(true, tolerance, true)); // Equals value
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(false, tolerance, false));
+        builder.add(new TestInput(true, tolerance, true)); // Equals value
+        // Tolerance 1.0%
+        tolerance = new Tolerance(0.01, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(false, tolerance, true));
+        builder.add(new TestInput(true, tolerance, true)); // Equals value
+        // Tolerance 100.0%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(false, tolerance, true));
+        builder.add(new TestInput(true, tolerance, true)); // Equals value
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_GLASSES);
+    }
+
+    @Test
+    public void testByteProperty() throws SmartUriException {
+        System.out.println("Byte Property Test");
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Byte.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((byte) 0xff, tolerance, false));
+        builder.add(new TestInput((byte) 0x00, tolerance, false));
+        builder.add(new TestInput((byte) 0x01, tolerance, false));
+        builder.add(new TestInput((byte) 0x02, tolerance, true)); // Equals value
+        builder.add(new TestInput((byte) 0x03, tolerance, false));
+        builder.add(new TestInput((byte) 0x04, tolerance, false));
+        builder.add(new TestInput((byte) 0x05, tolerance, false));
+        builder.add(new TestInput((byte) 0x10, tolerance, false));
+        builder.add(new TestInput(Byte.MAX_VALUE, tolerance, false));
+        // Tolerance 1.0
+        tolerance = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Byte.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((byte) 0xff, tolerance, false));
+        builder.add(new TestInput((byte) 0x00, tolerance, false));
+        builder.add(new TestInput((byte) 0x01, tolerance, true));
+        builder.add(new TestInput((byte) 0x02, tolerance, true)); // Equals value
+        builder.add(new TestInput((byte) 0x03, tolerance, true));
+        builder.add(new TestInput((byte) 0x04, tolerance, false));
+        builder.add(new TestInput((byte) 0x05, tolerance, false));
+        builder.add(new TestInput((byte) 0x10, tolerance, false));
+        builder.add(new TestInput(Byte.MAX_VALUE, tolerance, false));
+        // Tolerance 2.0
+        tolerance = new Tolerance(2.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Byte.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((byte) 0xff, tolerance, false));
+        builder.add(new TestInput((byte) 0x00, tolerance, true));
+        builder.add(new TestInput((byte) 0x01, tolerance, true));
+        builder.add(new TestInput((byte) 0x02, tolerance, true)); // Equals value
+        builder.add(new TestInput((byte) 0x03, tolerance, true));
+        builder.add(new TestInput((byte) 0x04, tolerance, true));
+        builder.add(new TestInput((byte) 0x05, tolerance, false));
+        builder.add(new TestInput((byte) 0x10, tolerance, false));
+        builder.add(new TestInput(Byte.MAX_VALUE, tolerance, false));
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Byte.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((byte) 0xff, tolerance, false));
+        builder.add(new TestInput((byte) 0x00, tolerance, false));
+        builder.add(new TestInput((byte) 0x01, tolerance, false));
+        builder.add(new TestInput((byte) 0x02, tolerance, true)); // Equals value
+        builder.add(new TestInput((byte) 0x03, tolerance, false));
+        builder.add(new TestInput((byte) 0x04, tolerance, false));
+        builder.add(new TestInput((byte) 0x05, tolerance, false));
+        builder.add(new TestInput((byte) 0x10, tolerance, false));
+        builder.add(new TestInput(Byte.MAX_VALUE, tolerance, false));
+        // Tolerance 50.0%
+        tolerance = new Tolerance(0.50, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Byte.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((byte) 0xff, tolerance, false));
+        builder.add(new TestInput((byte) 0x00, tolerance, false));
+        builder.add(new TestInput((byte) 0x01, tolerance, true));
+        builder.add(new TestInput((byte) 0x02, tolerance, true)); // Equals value
+        builder.add(new TestInput((byte) 0x03, tolerance, true));
+        builder.add(new TestInput((byte) 0x04, tolerance, false));
+        builder.add(new TestInput((byte) 0x05, tolerance, false));
+        builder.add(new TestInput((byte) 0x10, tolerance, false));
+        builder.add(new TestInput(Byte.MAX_VALUE, tolerance, false));
+        // Tolerance 100.0%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Byte.MIN_VALUE, tolerance, true));
+        builder.add(new TestInput((byte) 0xff, tolerance, true));
+        builder.add(new TestInput((byte) 0x00, tolerance, true));
+        builder.add(new TestInput((byte) 0x01, tolerance, true));
+        builder.add(new TestInput((byte) 0x02, tolerance, true)); // Equals value
+        builder.add(new TestInput((byte) 0x03, tolerance, true));
+        builder.add(new TestInput((byte) 0x04, tolerance, true));
+        builder.add(new TestInput((byte) 0x05, tolerance, true));
+        builder.add(new TestInput((byte) 0x10, tolerance, true));
+        builder.add(new TestInput(Byte.MAX_VALUE, tolerance, true));
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_NUMBER_OF_CHILDREN);
+    }
+
+    @Test
+    public void testDateProperty() throws SmartUriException {
+        System.out.println("Date Property Test");
+        final long ONE_YEAR_IN_MILLIS = 1000L * 60L * 60L * 24L * 365L;
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(new Date(0L), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1), tolerance, false));
+        builder.add(new TestInput(NOW, tolerance, true)); // Equals value
+        builder.add(new TestInput(new Date(NOW.getTime() + 1), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 1.0
+        tolerance = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(new Date(0L), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1), tolerance, true));
+        builder.add(new TestInput(NOW, tolerance, true)); // Equals value
+        builder.add(new TestInput(new Date(NOW.getTime() + 1), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 2.0
+        tolerance = new Tolerance(2.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(new Date(0L), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1), tolerance, true));
+        builder.add(new TestInput(NOW, tolerance, true)); // Equals value
+        builder.add(new TestInput(new Date(NOW.getTime() + 1), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(new Date(0L), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1), tolerance, false));
+        builder.add(new TestInput(NOW, tolerance, true)); // Equals value
+        builder.add(new TestInput(new Date(NOW.getTime() + 1), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 3), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 999), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1001), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + 10000), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 1.0%
+        tolerance = new Tolerance(0.01, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(new Date(0L), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() - ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 0.985)), tolerance, false));
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 0.989)), tolerance, false));
+        // It's tricky near the exact threshold since it might create a fraction
+        // of a time which is rounded. Check if it's fraction and test it with
+        // the floor and ceiling values if it is. Otherwise, use the exact value
+        // if it is not a fraction.
+        final double lowerThresholdFloor = Math.floor(NOW.getTime() * 0.99);
+        final double lowerThresholdCeiling = Math.ceil(NOW.getTime() * 0.99);
+        // If the floor equals the ceiling then it's not a fraction.
+        if (lowerThresholdFloor != lowerThresholdCeiling) {
+           builder.add(new TestInput(new Date((long) lowerThresholdFloor), tolerance, false));
+        }
+        builder.add(new TestInput(new Date((long) lowerThresholdCeiling), tolerance, true));
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 0.991)), tolerance, true));
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 0.995)), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 10000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1001), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 999), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 3), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1), tolerance, true));
+        builder.add(new TestInput(NOW, tolerance, true)); // Equals value
+        builder.add(new TestInput(new Date(NOW.getTime() + 1), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 3), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 999), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1001), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 10000), tolerance, true));
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 1.005)), tolerance, true));
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 1.009)), tolerance, true));
+        // It's tricky near the exact threshold since it might create a fraction
+        // of a time which is rounded. Check if it's fraction and test it with
+        // the floor and ceiling values if it is. Otherwise, use the exact value
+        // if it is not a fraction.
+        final double upperThresholdFloor = Math.floor(NOW.getTime() * 1.01);
+        final double upperThresholdCeiling = Math.ceil(NOW.getTime() * 1.01);
+        builder.add(new TestInput(new Date((long) upperThresholdFloor), tolerance, true));
+        // If the floor equals the ceiling then it's not a fraction.
+        if (upperThresholdFloor != upperThresholdCeiling) {
+           builder.add(new TestInput(new Date((long) upperThresholdCeiling), tolerance, false));
+        }
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 1.011)), tolerance, false));
+        builder.add(new TestInput(new Date((long) (NOW.getTime() * 1.015)), tolerance, false));
+        builder.add(new TestInput(new Date(NOW.getTime() + ONE_YEAR_IN_MILLIS), tolerance, false));
+        builder.add(new TestInput(new Date(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 100.0%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(new Date(0L), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - ONE_YEAR_IN_MILLIS), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 10000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1001), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 999), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 3), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 2), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() - 1), tolerance, true));
+        builder.add(new TestInput(NOW, tolerance, true)); // Equals value
+        builder.add(new TestInput(new Date(NOW.getTime() + 1), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 3), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 999), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 1001), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 2000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + 10000), tolerance, true));
+        builder.add(new TestInput(new Date(NOW.getTime() + ONE_YEAR_IN_MILLIS), tolerance, true));
+        builder.add(new TestInput(new Date(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, true));
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_EXPIRATION_DATE);
+    }
+
+    @Test
+    public void testDateTimeProperty() throws SmartUriException {
+        System.out.println("DateTime Property Test");
+        final DateTime dob = new DateTime(NOW).minusYears(40);
+        final long ONE_YEAR_IN_MILLIS = 1000L * 60L * 60L * 24L * 365L;
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(new DateTime(0L), tolerance, false));
+        builder.add(new TestInput(dob.minusYears(1), tolerance, false));
+        builder.add(new TestInput(dob.minus(10000), tolerance, false));
+        builder.add(new TestInput(dob.minus(2000), tolerance, false));
+        builder.add(new TestInput(dob.minus(1001), tolerance, false));
+        builder.add(new TestInput(dob.minus(1000), tolerance, false));
+        builder.add(new TestInput(dob.minus(999), tolerance, false));
+        builder.add(new TestInput(dob.minus(3), tolerance, false));
+        builder.add(new TestInput(dob.minus(2), tolerance, false));
+        builder.add(new TestInput(dob.minus(1), tolerance, false));
+        builder.add(new TestInput(dob, tolerance, true)); // Equals value
+        builder.add(new TestInput(dob.plus(1), tolerance, false));
+        builder.add(new TestInput(dob.plus(2), tolerance, false));
+        builder.add(new TestInput(dob.plus(3), tolerance, false));
+        builder.add(new TestInput(dob.plus(999), tolerance, false));
+        builder.add(new TestInput(dob.plus(1000), tolerance, false));
+        builder.add(new TestInput(dob.plus(1001), tolerance, false));
+        builder.add(new TestInput(dob.plus(2000), tolerance, false));
+        builder.add(new TestInput(dob.plus(10000), tolerance, false));
+        builder.add(new TestInput(dob.plusYears(1), tolerance, false));
+        builder.add(new TestInput(new DateTime(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 1.0
+        tolerance = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(new DateTime(0L), tolerance, false));
+        builder.add(new TestInput(dob.minusYears(1), tolerance, false));
+        builder.add(new TestInput(dob.minus(10000), tolerance, false));
+        builder.add(new TestInput(dob.minus(2000), tolerance, false));
+        builder.add(new TestInput(dob.minus(1001), tolerance, false));
+        builder.add(new TestInput(dob.minus(1000), tolerance, false));
+        builder.add(new TestInput(dob.minus(999), tolerance, false));
+        builder.add(new TestInput(dob.minus(3), tolerance, false));
+        builder.add(new TestInput(dob.minus(2), tolerance, false));
+        builder.add(new TestInput(dob.minus(1), tolerance, true));
+        builder.add(new TestInput(dob, tolerance, true)); // Equals value
+        builder.add(new TestInput(dob.plus(1), tolerance, true));
+        builder.add(new TestInput(dob.plus(2), tolerance, false));
+        builder.add(new TestInput(dob.plus(3), tolerance, false));
+        builder.add(new TestInput(dob.plus(999), tolerance, false));
+        builder.add(new TestInput(dob.plus(1000), tolerance, false));
+        builder.add(new TestInput(dob.plus(1001), tolerance, false));
+        builder.add(new TestInput(dob.plus(2000), tolerance, false));
+        builder.add(new TestInput(dob.plus(10000), tolerance, false));
+        builder.add(new TestInput(dob.plusYears(1), tolerance, false));
+        builder.add(new TestInput(new DateTime(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 2.0
+        tolerance = new Tolerance(2.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(new DateTime(0L), tolerance, false));
+        builder.add(new TestInput(dob.minusYears(1), tolerance, false));
+        builder.add(new TestInput(dob.minus(10000), tolerance, false));
+        builder.add(new TestInput(dob.minus(2000), tolerance, false));
+        builder.add(new TestInput(dob.minus(1001), tolerance, false));
+        builder.add(new TestInput(dob.minus(1000), tolerance, false));
+        builder.add(new TestInput(dob.minus(999), tolerance, false));
+        builder.add(new TestInput(dob.minus(3), tolerance, false));
+        builder.add(new TestInput(dob.minus(2), tolerance, true));
+        builder.add(new TestInput(dob.minus(1), tolerance, true));
+        builder.add(new TestInput(dob, tolerance, true)); // Equals value
+        builder.add(new TestInput(dob.plus(1), tolerance, true));
+        builder.add(new TestInput(dob.plus(2), tolerance, true));
+        builder.add(new TestInput(dob.plus(3), tolerance, false));
+        builder.add(new TestInput(dob.plus(999), tolerance, false));
+        builder.add(new TestInput(dob.plus(1000), tolerance, false));
+        builder.add(new TestInput(dob.plus(1001), tolerance, false));
+        builder.add(new TestInput(dob.plus(2000), tolerance, false));
+        builder.add(new TestInput(dob.plus(10000), tolerance, false));
+        builder.add(new TestInput(dob.plusYears(1), tolerance, false));
+        builder.add(new TestInput(new DateTime(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(new DateTime(0L), tolerance, false));
+        builder.add(new TestInput(dob.minusYears(1), tolerance, false));
+        builder.add(new TestInput(dob.minus(10000), tolerance, false));
+        builder.add(new TestInput(dob.minus(2000), tolerance, false));
+        builder.add(new TestInput(dob.minus(1001), tolerance, false));
+        builder.add(new TestInput(dob.minus(1000), tolerance, false));
+        builder.add(new TestInput(dob.minus(999), tolerance, false));
+        builder.add(new TestInput(dob.minus(3), tolerance, false));
+        builder.add(new TestInput(dob.minus(2), tolerance, false));
+        builder.add(new TestInput(dob.minus(1), tolerance, false));
+        builder.add(new TestInput(dob, tolerance, true)); // Equals value
+        builder.add(new TestInput(dob.plus(1), tolerance, false));
+        builder.add(new TestInput(dob.plus(2), tolerance, false));
+        builder.add(new TestInput(dob.plus(3), tolerance, false));
+        builder.add(new TestInput(dob.plus(999), tolerance, false));
+        builder.add(new TestInput(dob.plus(1000), tolerance, false));
+        builder.add(new TestInput(dob.plus(1001), tolerance, false));
+        builder.add(new TestInput(dob.plus(2000), tolerance, false));
+        builder.add(new TestInput(dob.plus(10000), tolerance, false));
+        builder.add(new TestInput(dob.plusYears(1), tolerance, false));
+        builder.add(new TestInput(new DateTime(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 1.0%
+        tolerance = new Tolerance(0.01, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(new DateTime(0L), tolerance, false));
+        builder.add(new TestInput(dob.minusYears(1), tolerance, false));
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 0.985)), tolerance, false));
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 0.989)), tolerance, false));
+        // It's tricky near the exact threshold since it might create a fraction
+        // of a time which is rounded. Check if it's fraction and test it with
+        // the floor and ceiling values if it is. Otherwise, use the exact value
+        // if it is not a fraction.
+        final double lowerThresholdFloor = Math.floor(dob.getMillis() * 0.99);
+        final double lowerThresholdCeiling = Math.ceil(dob.getMillis() * 0.99);
+        // If the floor equals the ceiling then it's not a fraction.
+        if (lowerThresholdFloor != lowerThresholdCeiling) {
+           builder.add(new TestInput(new DateTime((long) lowerThresholdFloor), tolerance, false));
+        }
+        builder.add(new TestInput(new DateTime((long) lowerThresholdCeiling), tolerance, true));
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 0.991)), tolerance, true));
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 0.995)), tolerance, true));
+        builder.add(new TestInput(dob.minus(10000), tolerance, true));
+        builder.add(new TestInput(dob.minus(2000), tolerance, true));
+        builder.add(new TestInput(dob.minus(1001), tolerance, true));
+        builder.add(new TestInput(dob.minus(1000), tolerance, true));
+        builder.add(new TestInput(dob.minus(999), tolerance, true));
+        builder.add(new TestInput(dob.minus(3), tolerance, true));
+        builder.add(new TestInput(dob.minus(2), tolerance, true));
+        builder.add(new TestInput(dob.minus(1), tolerance, true));
+        builder.add(new TestInput(dob, tolerance, true)); // Equals value
+        builder.add(new TestInput(dob.plus(1), tolerance, true));
+        builder.add(new TestInput(dob.plus(2), tolerance, true));
+        builder.add(new TestInput(dob.plus(3), tolerance, true));
+        builder.add(new TestInput(dob.plus(999), tolerance, true));
+        builder.add(new TestInput(dob.plus(1000), tolerance, true));
+        builder.add(new TestInput(dob.plus(1001), tolerance, true));
+        builder.add(new TestInput(dob.plus(2000), tolerance, true));
+        builder.add(new TestInput(dob.plus(10000), tolerance, true));
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 1.005)), tolerance, true));
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 1.009)), tolerance, true));
+        // It's tricky near the exact threshold since it might create a fraction
+        // of a time which is rounded. Check if it's fraction and test it with
+        // the floor and ceiling values if it is. Otherwise, use the exact value
+        // if it is not a fraction.
+        final double upperThresholdFloor = Math.floor(dob.getMillis() * 1.01);
+        final double upperThresholdCeiling = Math.ceil(dob.getMillis() * 1.01);
+        builder.add(new TestInput(new DateTime((long) upperThresholdFloor), tolerance, true));
+        // If the floor equals the ceiling then it's not a fraction.
+        if (upperThresholdFloor != upperThresholdCeiling) {
+           builder.add(new TestInput(new DateTime((long) upperThresholdCeiling), tolerance, false));
+        }
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 1.011)), tolerance, false));
+        builder.add(new TestInput(new DateTime((long) (dob.getMillis() * 1.015)), tolerance, false));
+        builder.add(new TestInput(dob.plusYears(1), tolerance, false));
+        builder.add(new TestInput(new DateTime(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, false));
+        // Tolerance 100.0%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(new DateTime(0L), tolerance, true));
+        builder.add(new TestInput(dob.minusYears(1), tolerance, true));
+        builder.add(new TestInput(dob.minus(10000), tolerance, true));
+        builder.add(new TestInput(dob.minus(2000), tolerance, true));
+        builder.add(new TestInput(dob.minus(1001), tolerance, true));
+        builder.add(new TestInput(dob.minus(1000), tolerance, true));
+        builder.add(new TestInput(dob.minus(999), tolerance, true));
+        builder.add(new TestInput(dob.minus(3), tolerance, true));
+        builder.add(new TestInput(dob.minus(2), tolerance, true));
+        builder.add(new TestInput(dob.minus(1), tolerance, true));
+        builder.add(new TestInput(dob, tolerance, true)); // Equals value
+        builder.add(new TestInput(dob.plus(1), tolerance, true));
+        builder.add(new TestInput(dob.plus(2), tolerance, true));
+        builder.add(new TestInput(dob.plus(3), tolerance, true));
+        builder.add(new TestInput(dob.plus(999), tolerance, true));
+        builder.add(new TestInput(dob.plus(1000), tolerance, true));
+        builder.add(new TestInput(dob.plus(1001), tolerance, true));
+        builder.add(new TestInput(dob.plus(2000), tolerance, true));
+        builder.add(new TestInput(dob.plus(10000), tolerance, true));
+        builder.add(new TestInput(dob.plusYears(1), tolerance, true));
+        builder.add(new TestInput(new DateTime(Long.MAX_VALUE - ONE_YEAR_IN_MILLIS), tolerance, true));
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_DATE_OF_BIRTH);
+    }
+
+    @Test
+    public void testDoubleProperty() throws SmartUriException {
+        System.out.println("Double Property Test");
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Double.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0, tolerance, false));
+        builder.add(new TestInput(0.0, tolerance, false));
+        builder.add(new TestInput(0.01, tolerance, false));
+        builder.add(new TestInput(0.02, tolerance, false));
+        builder.add(new TestInput(0.1, tolerance, false));
+        builder.add(new TestInput(0.2, tolerance, false));
+        builder.add(new TestInput(1.0, tolerance, false));
+        builder.add(new TestInput(71, tolerance, false));
+        builder.add(new TestInput(72, tolerance, false));
+        builder.add(new TestInput(72.4, tolerance, false));
+        builder.add(new TestInput(72.47, tolerance, false));
+        builder.add(new TestInput(72.48, tolerance, false));
+        builder.add(new TestInput(72.49, tolerance, false));
+        builder.add(new TestInput(72.5, tolerance, true)); // Equals value
+        builder.add(new TestInput(72.51, tolerance, false));
+        builder.add(new TestInput(72.52, tolerance, false));
+        builder.add(new TestInput(72.53, tolerance, false));
+        builder.add(new TestInput(72.6, tolerance, false));
+        builder.add(new TestInput(73, tolerance, false));
+        builder.add(new TestInput(74, tolerance, false));
+        builder.add(new TestInput(100, tolerance, false));
+        builder.add(new TestInput(Double.MAX_VALUE, tolerance, false));
+        // Tolerance 0.01
+        tolerance = new Tolerance(0.01, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Double.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0, tolerance, false));
+        builder.add(new TestInput(0.0, tolerance, false));
+        builder.add(new TestInput(0.01, tolerance, false));
+        builder.add(new TestInput(0.02, tolerance, false));
+        builder.add(new TestInput(0.1, tolerance, false));
+        builder.add(new TestInput(0.2, tolerance, false));
+        builder.add(new TestInput(1.0, tolerance, false));
+        builder.add(new TestInput(71, tolerance, false));
+        builder.add(new TestInput(72, tolerance, false));
+        builder.add(new TestInput(72.4, tolerance, false));
+        builder.add(new TestInput(72.47, tolerance, false));
+        builder.add(new TestInput(72.48, tolerance, false));
+        builder.add(new TestInput(72.49, tolerance, true));
+        builder.add(new TestInput(72.5, tolerance, true)); // Equals value
+        builder.add(new TestInput(72.51, tolerance, true));
+        builder.add(new TestInput(72.52, tolerance, false));
+        builder.add(new TestInput(72.53, tolerance, false));
+        builder.add(new TestInput(72.6, tolerance, false));
+        builder.add(new TestInput(73, tolerance, false));
+        builder.add(new TestInput(74, tolerance, false));
+        builder.add(new TestInput(100, tolerance, false));
+        builder.add(new TestInput(Double.MAX_VALUE, tolerance, false));
+        // Tolerance 0.02
+        tolerance = new Tolerance(0.02, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Double.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0, tolerance, false));
+        builder.add(new TestInput(0.0, tolerance, false));
+        builder.add(new TestInput(0.01, tolerance, false));
+        builder.add(new TestInput(0.02, tolerance, false));
+        builder.add(new TestInput(0.1, tolerance, false));
+        builder.add(new TestInput(0.2, tolerance, false));
+        builder.add(new TestInput(1.0, tolerance, false));
+        builder.add(new TestInput(71, tolerance, false));
+        builder.add(new TestInput(72, tolerance, false));
+        builder.add(new TestInput(72.4, tolerance, false));
+        builder.add(new TestInput(72.47, tolerance, false));
+        builder.add(new TestInput(72.48, tolerance, true));
+        builder.add(new TestInput(72.49, tolerance, true));
+        builder.add(new TestInput(72.5, tolerance, true)); // Equals value
+        builder.add(new TestInput(72.51, tolerance, true));
+        builder.add(new TestInput(72.52, tolerance, true));
+        builder.add(new TestInput(72.53, tolerance, false));
+        builder.add(new TestInput(72.6, tolerance, false));
+        builder.add(new TestInput(73, tolerance, false));
+        builder.add(new TestInput(74, tolerance, false));
+        builder.add(new TestInput(100, tolerance, false));
+        builder.add(new TestInput(Double.MAX_VALUE, tolerance, false));
+
+        // Tolerance 0%
+        tolerance = new Tolerance(0.0, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Double.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0, tolerance, false));
+        builder.add(new TestInput(0.0, tolerance, false));
+        builder.add(new TestInput(0.01, tolerance, false));
+        builder.add(new TestInput(0.02, tolerance, false));
+        builder.add(new TestInput(0.1, tolerance, false));
+        builder.add(new TestInput(0.2, tolerance, false));
+        builder.add(new TestInput(1.0, tolerance, false));
+        builder.add(new TestInput(71, tolerance, false));
+        builder.add(new TestInput(71.774, tolerance, false));
+        builder.add(new TestInput(71.775, tolerance, false));
+        builder.add(new TestInput(71.776, tolerance, false));
+        builder.add(new TestInput(72, tolerance, false));
+        builder.add(new TestInput(72.4, tolerance, false));
+        builder.add(new TestInput(72.47, tolerance, false));
+        builder.add(new TestInput(72.48, tolerance, false));
+        builder.add(new TestInput(72.49, tolerance, false));
+        builder.add(new TestInput(72.5, tolerance, true)); // Equals value
+        builder.add(new TestInput(72.51, tolerance, false));
+        builder.add(new TestInput(72.52, tolerance, false));
+        builder.add(new TestInput(72.53, tolerance, false));
+        builder.add(new TestInput(72.6, tolerance, false));
+        builder.add(new TestInput(73, tolerance, false));
+        builder.add(new TestInput(73.224, tolerance, false));
+        builder.add(new TestInput(73.225, tolerance, false));
+        builder.add(new TestInput(73.226, tolerance, false));
+        builder.add(new TestInput(73, tolerance, false));
+        builder.add(new TestInput(74, tolerance, false));
+        builder.add(new TestInput(100, tolerance, false));
+        builder.add(new TestInput(Double.MAX_VALUE, tolerance, false));
+        // Tolerance 1%
+        tolerance = new Tolerance(0.01, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Double.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0, tolerance, false));
+        builder.add(new TestInput(0.0, tolerance, false));
+        builder.add(new TestInput(0.01, tolerance, false));
+        builder.add(new TestInput(0.02, tolerance, false));
+        builder.add(new TestInput(0.1, tolerance, false));
+        builder.add(new TestInput(0.2, tolerance, false));
+        builder.add(new TestInput(1.0, tolerance, false));
+        builder.add(new TestInput(71, tolerance, false));
+        builder.add(new TestInput(71.774, tolerance, false));
+        builder.add(new TestInput(71.775, tolerance, true));
+        builder.add(new TestInput(71.776, tolerance, true));
+        builder.add(new TestInput(72, tolerance, true));
+        builder.add(new TestInput(72.4, tolerance, true));
+        builder.add(new TestInput(72.47, tolerance, true));
+        builder.add(new TestInput(72.48, tolerance, true));
+        builder.add(new TestInput(72.49, tolerance, true));
+        builder.add(new TestInput(72.5, tolerance, true)); // Equals value
+        builder.add(new TestInput(72.51, tolerance, true));
+        builder.add(new TestInput(72.52, tolerance, true));
+        builder.add(new TestInput(72.53, tolerance, true));
+        builder.add(new TestInput(72.6, tolerance, true));
+        builder.add(new TestInput(73, tolerance, true));
+        builder.add(new TestInput(73.224, tolerance, true));
+        builder.add(new TestInput(73.225, tolerance, true));
+        builder.add(new TestInput(73.226, tolerance, false));
+        builder.add(new TestInput(74, tolerance, false));
+        builder.add(new TestInput(100, tolerance, false));
+        builder.add(new TestInput(Double.MAX_VALUE, tolerance, false));
+        // Tolerance 100%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Double.MIN_VALUE, tolerance, true));
+        builder.add(new TestInput(-1.0, tolerance, true));
+        builder.add(new TestInput(0.0, tolerance, true));
+        builder.add(new TestInput(0.01, tolerance, true));
+        builder.add(new TestInput(0.02, tolerance, true));
+        builder.add(new TestInput(0.1, tolerance, true));
+        builder.add(new TestInput(0.2, tolerance, true));
+        builder.add(new TestInput(1.0, tolerance, true));
+        builder.add(new TestInput(71, tolerance, true));
+        builder.add(new TestInput(71.774, tolerance, true));
+        builder.add(new TestInput(71.775, tolerance, true));
+        builder.add(new TestInput(71.776, tolerance, true));
+        builder.add(new TestInput(72, tolerance, true));
+        builder.add(new TestInput(72.4, tolerance, true));
+        builder.add(new TestInput(72.47, tolerance, true));
+        builder.add(new TestInput(72.48, tolerance, true));
+        builder.add(new TestInput(72.49, tolerance, true));
+        builder.add(new TestInput(72.5, tolerance, true)); // Equals value
+        builder.add(new TestInput(72.51, tolerance, true));
+        builder.add(new TestInput(72.52, tolerance, true));
+        builder.add(new TestInput(72.53, tolerance, true));
+        builder.add(new TestInput(72.6, tolerance, true));
+        builder.add(new TestInput(73, tolerance, true));
+        builder.add(new TestInput(73.224, tolerance, true));
+        builder.add(new TestInput(73.225, tolerance, true));
+        builder.add(new TestInput(73.226, tolerance, true));
+        builder.add(new TestInput(74, tolerance, true));
+        builder.add(new TestInput(100, tolerance, true));
+        builder.add(new TestInput(Double.MAX_VALUE, tolerance, true));
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_HEIGHT);
+    }
+
+    @Test
+    public void testFloatProperty() throws SmartUriException {
+        System.out.println("Float Property Test");
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Float.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0f, tolerance, false));
+        builder.add(new TestInput(0.0f, tolerance, false));
+        builder.add(new TestInput(0.01f, tolerance, false));
+        builder.add(new TestInput(0.02f, tolerance, false));
+        builder.add(new TestInput(0.1f, tolerance, false));
+        builder.add(new TestInput(0.2f, tolerance, false));
+        builder.add(new TestInput(1.0f, tolerance, false));
+        builder.add(new TestInput(250f, tolerance, false));
+        builder.add(new TestInput(250.7f, tolerance, false));
+        builder.add(new TestInput(250.72f, tolerance, false));
+        builder.add(new TestInput(250.73f, tolerance, false));
+        builder.add(new TestInput(250.74f, tolerance, false));
+        builder.add(new TestInput(250.75f, tolerance, true)); // Equals value
+        builder.add(new TestInput(250.76f, tolerance, false));
+        builder.add(new TestInput(250.77f, tolerance, false));
+        builder.add(new TestInput(250.78f, tolerance, false));
+        builder.add(new TestInput(250.8f, tolerance, false));
+        builder.add(new TestInput(251f, tolerance, false));
+        builder.add(new TestInput(300.0f, tolerance, false));
+        builder.add(new TestInput(Float.MAX_VALUE, tolerance, false));
+        // Tolerance 0.01
+        tolerance = new Tolerance(0.01, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Float.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0f, tolerance, false));
+        builder.add(new TestInput(0.0f, tolerance, false));
+        builder.add(new TestInput(0.01f, tolerance, false));
+        builder.add(new TestInput(0.02f, tolerance, false));
+        builder.add(new TestInput(0.1f, tolerance, false));
+        builder.add(new TestInput(0.2f, tolerance, false));
+        builder.add(new TestInput(1.0f, tolerance, false));
+        builder.add(new TestInput(250f, tolerance, false));
+        builder.add(new TestInput(250.7f, tolerance, false));
+        builder.add(new TestInput(250.72f, tolerance, false));
+        builder.add(new TestInput(250.73f, tolerance, false));
+        builder.add(new TestInput(250.74f, tolerance, true));
+        builder.add(new TestInput(250.75f, tolerance, true)); // Equals value
+        builder.add(new TestInput(250.76f, tolerance, true));
+        builder.add(new TestInput(250.77f, tolerance, false));
+        builder.add(new TestInput(250.78f, tolerance, false));
+        builder.add(new TestInput(250.8f, tolerance, false));
+        builder.add(new TestInput(251f, tolerance, false));
+        builder.add(new TestInput(300.0f, tolerance, false));
+        builder.add(new TestInput(Float.MAX_VALUE, tolerance, false));
+        // Tolerance 0.02
+        tolerance = new Tolerance(0.02, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Float.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0f, tolerance, false));
+        builder.add(new TestInput(0.0f, tolerance, false));
+        builder.add(new TestInput(0.01f, tolerance, false));
+        builder.add(new TestInput(0.02f, tolerance, false));
+        builder.add(new TestInput(0.1f, tolerance, false));
+        builder.add(new TestInput(0.2f, tolerance, false));
+        builder.add(new TestInput(1.0f, tolerance, false));
+        builder.add(new TestInput(250f, tolerance, false));
+        builder.add(new TestInput(250.7f, tolerance, false));
+        builder.add(new TestInput(250.72f, tolerance, false));
+        builder.add(new TestInput(250.73f, tolerance, true));
+        builder.add(new TestInput(250.74f, tolerance, true));
+        builder.add(new TestInput(250.75f, tolerance, true)); // Equals value
+        builder.add(new TestInput(250.76f, tolerance, true));
+        builder.add(new TestInput(250.77f, tolerance, true));
+        builder.add(new TestInput(250.78f, tolerance, false));
+        builder.add(new TestInput(250.8f, tolerance, false));
+        builder.add(new TestInput(251f, tolerance, false));
+        builder.add(new TestInput(300.0f, tolerance, false));
+        builder.add(new TestInput(Float.MAX_VALUE, tolerance, false));
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.0, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Float.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0f, tolerance, false));
+        builder.add(new TestInput(0.0f, tolerance, false));
+        builder.add(new TestInput(0.01f, tolerance, false));
+        builder.add(new TestInput(0.02f, tolerance, false));
+        builder.add(new TestInput(0.1f, tolerance, false));
+        builder.add(new TestInput(0.2f, tolerance, false));
+        builder.add(new TestInput(1.0f, tolerance, false));
+        builder.add(new TestInput(248.2424f, tolerance, false));
+        builder.add(new TestInput(248.2425f, tolerance, false));
+        builder.add(new TestInput(248.2426f, tolerance, false));
+        builder.add(new TestInput(250f, tolerance, false));
+        builder.add(new TestInput(250.7f, tolerance, false));
+        builder.add(new TestInput(250.72f, tolerance, false));
+        builder.add(new TestInput(250.73f, tolerance, false));
+        builder.add(new TestInput(250.74f, tolerance, false));
+        builder.add(new TestInput(250.75f, tolerance, true)); // Equals value
+        builder.add(new TestInput(250.76f, tolerance, false));
+        builder.add(new TestInput(250.77f, tolerance, false));
+        builder.add(new TestInput(250.78f, tolerance, false));
+        builder.add(new TestInput(250.8f, tolerance, false));
+        builder.add(new TestInput(251f, tolerance, false));
+        builder.add(new TestInput(253.2574f, tolerance, false));
+        builder.add(new TestInput(253.2575f, tolerance, false));
+        builder.add(new TestInput(253.2576f, tolerance, false));
+        builder.add(new TestInput(300.0f, tolerance, false));
+        builder.add(new TestInput(Float.MAX_VALUE, tolerance, false));
+        // Tolerance 1.0%
+        tolerance = new Tolerance(0.01, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Float.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1.0f, tolerance, false));
+        builder.add(new TestInput(0.0f, tolerance, false));
+        builder.add(new TestInput(0.01f, tolerance, false));
+        builder.add(new TestInput(0.02f, tolerance, false));
+        builder.add(new TestInput(0.1f, tolerance, false));
+        builder.add(new TestInput(0.2f, tolerance, false));
+        builder.add(new TestInput(1.0f, tolerance, false));
+        builder.add(new TestInput(248.2424f, tolerance, false));
+        builder.add(new TestInput(248.2425f, tolerance, true));
+        builder.add(new TestInput(248.2426f, tolerance, true));
+        builder.add(new TestInput(250f, tolerance, true));
+        builder.add(new TestInput(250.7f, tolerance, true));
+        builder.add(new TestInput(250.72f, tolerance, true));
+        builder.add(new TestInput(250.73f, tolerance, true));
+        builder.add(new TestInput(250.74f, tolerance, true));
+        builder.add(new TestInput(250.75f, tolerance, true)); // Equals value
+        builder.add(new TestInput(250.76f, tolerance, true));
+        builder.add(new TestInput(250.77f, tolerance, true));
+        builder.add(new TestInput(250.78f, tolerance, true));
+        builder.add(new TestInput(250.8f, tolerance, true));
+        builder.add(new TestInput(251f, tolerance, true));
+        builder.add(new TestInput(253.2574f, tolerance, true));
+        builder.add(new TestInput(253.2575f, tolerance, true));
+        builder.add(new TestInput(253.2576f, tolerance, false));
+        builder.add(new TestInput(300.0f, tolerance, false));
+        builder.add(new TestInput(Float.MAX_VALUE, tolerance, false));
+        // Tolerance 100.0%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Float.MIN_VALUE, tolerance, true));
+        builder.add(new TestInput(-1.0f, tolerance, true));
+        builder.add(new TestInput(0.0f, tolerance, true));
+        builder.add(new TestInput(0.01f, tolerance, true));
+        builder.add(new TestInput(0.02f, tolerance, true));
+        builder.add(new TestInput(0.1f, tolerance, true));
+        builder.add(new TestInput(0.2f, tolerance, true));
+        builder.add(new TestInput(1.0f, tolerance, true));
+        builder.add(new TestInput(248.2424f, tolerance, true));
+        builder.add(new TestInput(248.2425f, tolerance, true));
+        builder.add(new TestInput(248.2426f, tolerance, true));
+        builder.add(new TestInput(250f, tolerance, true));
+        builder.add(new TestInput(250.7f, tolerance, true));
+        builder.add(new TestInput(250.72f, tolerance, true));
+        builder.add(new TestInput(250.73f, tolerance, true));
+        builder.add(new TestInput(250.74f, tolerance, true));
+        builder.add(new TestInput(250.75f, tolerance, true)); // Equals value
+        builder.add(new TestInput(250.76f, tolerance, true));
+        builder.add(new TestInput(250.77f, tolerance, true));
+        builder.add(new TestInput(250.78f, tolerance, true));
+        builder.add(new TestInput(250.8f, tolerance, true));
+        builder.add(new TestInput(251f, tolerance, true));
+        builder.add(new TestInput(253.2574f, tolerance, true));
+        builder.add(new TestInput(253.2575f, tolerance, true));
+        builder.add(new TestInput(253.2576f, tolerance, true));
+        builder.add(new TestInput(300.0f, tolerance, true));
+        builder.add(new TestInput(Float.MAX_VALUE, tolerance, true));
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_WEIGHT);
+    }
+
+    @Test
+    public void testIntegerProperty() throws SmartUriException {
+        System.out.println("Integer Property Test");
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Integer.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1, tolerance, false));
+        builder.add(new TestInput(0, tolerance, false));
+        builder.add(new TestInput(1, tolerance, false));
+        builder.add(new TestInput(49997, tolerance, false));
+        builder.add(new TestInput(49998, tolerance, false));
+        builder.add(new TestInput(49999, tolerance, false));
+        builder.add(new TestInput(50000, tolerance, true)); // Equals value
+        builder.add(new TestInput(50001, tolerance, false));
+        builder.add(new TestInput(50002, tolerance, false));
+        builder.add(new TestInput(50003, tolerance, false));
+        builder.add(new TestInput(60000, tolerance, false));
+        builder.add(new TestInput(Integer.MAX_VALUE, tolerance, false));
+        // Tolerance 1.0
+        tolerance = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Integer.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1, tolerance, false));
+        builder.add(new TestInput(0, tolerance, false));
+        builder.add(new TestInput(1, tolerance, false));
+        builder.add(new TestInput(49997, tolerance, false));
+        builder.add(new TestInput(49998, tolerance, false));
+        builder.add(new TestInput(49999, tolerance, true));
+        builder.add(new TestInput(50000, tolerance, true)); // Equals value
+        builder.add(new TestInput(50001, tolerance, true));
+        builder.add(new TestInput(50002, tolerance, false));
+        builder.add(new TestInput(50003, tolerance, false));
+        builder.add(new TestInput(60000, tolerance, false));
+        builder.add(new TestInput(Integer.MAX_VALUE, tolerance, false));
+        // Tolerance 2.0
+        tolerance = new Tolerance(2.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Integer.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1, tolerance, false));
+        builder.add(new TestInput(0, tolerance, false));
+        builder.add(new TestInput(1, tolerance, false));
+        builder.add(new TestInput(49997, tolerance, false));
+        builder.add(new TestInput(49998, tolerance, true));
+        builder.add(new TestInput(49999, tolerance, true));
+        builder.add(new TestInput(50000, tolerance, true)); // Equals value
+        builder.add(new TestInput(50001, tolerance, true));
+        builder.add(new TestInput(50002, tolerance, true));
+        builder.add(new TestInput(50003, tolerance, false));
+        builder.add(new TestInput(60000, tolerance, false));
+        builder.add(new TestInput(Integer.MAX_VALUE, tolerance, false));
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.0, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Integer.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1, tolerance, false));
+        builder.add(new TestInput(0, tolerance, false));
+        builder.add(new TestInput(1, tolerance, false));
+        builder.add(new TestInput(48999, tolerance, false));
+        builder.add(new TestInput(49000, tolerance, false));
+        builder.add(new TestInput(49001, tolerance, false));
+        builder.add(new TestInput(49499, tolerance, false));
+        builder.add(new TestInput(49500, tolerance, false));
+        builder.add(new TestInput(49501, tolerance, false));
+        builder.add(new TestInput(49997, tolerance, false));
+        builder.add(new TestInput(49998, tolerance, false));
+        builder.add(new TestInput(49999, tolerance, false));
+        builder.add(new TestInput(50000, tolerance, true)); // Equals value
+        builder.add(new TestInput(50001, tolerance, false));
+        builder.add(new TestInput(50002, tolerance, false));
+        builder.add(new TestInput(50003, tolerance, false));
+        builder.add(new TestInput(50499, tolerance, false));
+        builder.add(new TestInput(50500, tolerance, false));
+        builder.add(new TestInput(50501, tolerance, false));
+        builder.add(new TestInput(50999, tolerance, false));
+        builder.add(new TestInput(51000, tolerance, false));
+        builder.add(new TestInput(51001, tolerance, false));
+        builder.add(new TestInput(60000, tolerance, false));
+        builder.add(new TestInput(Integer.MAX_VALUE, tolerance, false));
+        // Tolerance 1.0%
+        tolerance = new Tolerance(0.01, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Integer.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1, tolerance, false));
+        builder.add(new TestInput(0, tolerance, false));
+        builder.add(new TestInput(1, tolerance, false));
+        builder.add(new TestInput(48999, tolerance, false));
+        builder.add(new TestInput(49000, tolerance, false));
+        builder.add(new TestInput(49001, tolerance, false));
+        builder.add(new TestInput(49499, tolerance, false));
+        builder.add(new TestInput(49500, tolerance, true));
+        builder.add(new TestInput(49501, tolerance, true));
+        builder.add(new TestInput(49997, tolerance, true));
+        builder.add(new TestInput(49998, tolerance, true));
+        builder.add(new TestInput(49999, tolerance, true));
+        builder.add(new TestInput(50000, tolerance, true)); // Equals value
+        builder.add(new TestInput(50001, tolerance, true));
+        builder.add(new TestInput(50002, tolerance, true));
+        builder.add(new TestInput(50003, tolerance, true));
+        builder.add(new TestInput(50499, tolerance, true));
+        builder.add(new TestInput(50500, tolerance, true));
+        builder.add(new TestInput(50501, tolerance, false));
+        builder.add(new TestInput(50999, tolerance, false));
+        builder.add(new TestInput(51000, tolerance, false));
+        builder.add(new TestInput(51001, tolerance, false));
+        builder.add(new TestInput(60000, tolerance, false));
+        builder.add(new TestInput(Integer.MAX_VALUE, tolerance, false));
+        // Tolerance 100.0%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Integer.MIN_VALUE, tolerance, true));
+        builder.add(new TestInput(-1, tolerance, true));
+        builder.add(new TestInput(0, tolerance, true));
+        builder.add(new TestInput(1, tolerance, true));
+        builder.add(new TestInput(48999, tolerance, true));
+        builder.add(new TestInput(49000, tolerance, true));
+        builder.add(new TestInput(49001, tolerance, true));
+        builder.add(new TestInput(49499, tolerance, true));
+        builder.add(new TestInput(49500, tolerance, true));
+        builder.add(new TestInput(49501, tolerance, true));
+        builder.add(new TestInput(49997, tolerance, true));
+        builder.add(new TestInput(49998, tolerance, true));
+        builder.add(new TestInput(49999, tolerance, true));
+        builder.add(new TestInput(50000, tolerance, true)); // Equals value
+        builder.add(new TestInput(50001, tolerance, true));
+        builder.add(new TestInput(50002, tolerance, true));
+        builder.add(new TestInput(50003, tolerance, true));
+        builder.add(new TestInput(50499, tolerance, true));
+        builder.add(new TestInput(50500, tolerance, true));
+        builder.add(new TestInput(50501, tolerance, true));
+        builder.add(new TestInput(50999, tolerance, true));
+        builder.add(new TestInput(51000, tolerance, true));
+        builder.add(new TestInput(51001, tolerance, true));
+        builder.add(new TestInput(60000, tolerance, true));
+        builder.add(new TestInput(Integer.MAX_VALUE, tolerance, true));
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_INCOME);
+    }
+
+    @Test
+    public void testLongProperty() throws SmartUriException {
+        System.out.println("Long Property Test");
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Long.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1L, tolerance, false));
+        builder.add(new TestInput(0L, tolerance, false));
+        builder.add(new TestInput(1L, tolerance, false));
+        builder.add(new TestInput(123456789009L, tolerance, false));
+        builder.add(new TestInput(123456789010L, tolerance, false));
+        builder.add(new TestInput(123456789011L, tolerance, false));
+        builder.add(new TestInput(123456789012L, tolerance, true)); // Equals value
+        builder.add(new TestInput(123456789013L, tolerance, false));
+        builder.add(new TestInput(123456789014L, tolerance, false));
+        builder.add(new TestInput(123456789015L, tolerance, false));
+        builder.add(new TestInput(223456789012L, tolerance, false));
+        builder.add(new TestInput(Long.MAX_VALUE, tolerance, false));
+        // Tolerance 1.0
+        tolerance = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Long.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1L, tolerance, false));
+        builder.add(new TestInput(0L, tolerance, false));
+        builder.add(new TestInput(1L, tolerance, false));
+        builder.add(new TestInput(123456789009L, tolerance, false));
+        builder.add(new TestInput(123456789010L, tolerance, false));
+        builder.add(new TestInput(123456789011L, tolerance, true));
+        builder.add(new TestInput(123456789012L, tolerance, true)); // Equals value
+        builder.add(new TestInput(123456789013L, tolerance, true));
+        builder.add(new TestInput(123456789014L, tolerance, false));
+        builder.add(new TestInput(123456789015L, tolerance, false));
+        builder.add(new TestInput(223456789012L, tolerance, false));
+        builder.add(new TestInput(Long.MAX_VALUE, tolerance, false));
+        // Tolerance 2.0
+        tolerance = new Tolerance(2.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Long.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1L, tolerance, false));
+        builder.add(new TestInput(0L, tolerance, false));
+        builder.add(new TestInput(1L, tolerance, false));
+        builder.add(new TestInput(123456789009L, tolerance, false));
+        builder.add(new TestInput(123456789010L, tolerance, true));
+        builder.add(new TestInput(123456789011L, tolerance, true));
+        builder.add(new TestInput(123456789012L, tolerance, true));// Equals value
+        builder.add(new TestInput(123456789013L, tolerance, true));
+        builder.add(new TestInput(123456789014L, tolerance, true));
+        builder.add(new TestInput(123456789015L, tolerance, false));
+        builder.add(new TestInput(223456789012L, tolerance, false));
+        builder.add(new TestInput(Long.MAX_VALUE, tolerance, false));
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.0, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Long.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1L, tolerance, false));
+        builder.add(new TestInput(0L, tolerance, false));
+        builder.add(new TestInput(1L, tolerance, false));
+        builder.add(new TestInput(122222221121L, tolerance, false));
+        builder.add(new TestInput(122222221122L, tolerance, false));
+        builder.add(new TestInput(122222221123L, tolerance, false));
+        builder.add(new TestInput(123456789009L, tolerance, false));
+        builder.add(new TestInput(123456789010L, tolerance, false));
+        builder.add(new TestInput(123456789011L, tolerance, false));
+        builder.add(new TestInput(123456789012L, tolerance, true));// Equals value
+        builder.add(new TestInput(123456789013L, tolerance, false));
+        builder.add(new TestInput(123456789014L, tolerance, false));
+        builder.add(new TestInput(123456789015L, tolerance, false));
+        builder.add(new TestInput(124691356901L, tolerance, false));
+        builder.add(new TestInput(124691356902L, tolerance, false));
+        builder.add(new TestInput(124691356903L, tolerance, false));
+        builder.add(new TestInput(223456789012L, tolerance, false));
+        builder.add(new TestInput(Long.MAX_VALUE, tolerance, false));
+        // Tolerance 1.0%
+        tolerance = new Tolerance(0.01, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Long.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput(-1L, tolerance, false));
+        builder.add(new TestInput(0L, tolerance, false));
+        builder.add(new TestInput(1L, tolerance, false));
+        builder.add(new TestInput(122222221121L, tolerance, false));
+        builder.add(new TestInput(122222221122L, tolerance, true));
+        builder.add(new TestInput(122222221123L, tolerance, true));
+        builder.add(new TestInput(123456789009L, tolerance, true));
+        builder.add(new TestInput(123456789010L, tolerance, true));
+        builder.add(new TestInput(123456789011L, tolerance, true));
+        builder.add(new TestInput(123456789012L, tolerance, true));// Equals value
+        builder.add(new TestInput(123456789013L, tolerance, true));
+        builder.add(new TestInput(123456789014L, tolerance, true));
+        builder.add(new TestInput(123456789015L, tolerance, true));
+        builder.add(new TestInput(124691356901L, tolerance, true));
+        builder.add(new TestInput(124691356902L, tolerance, true));
+        builder.add(new TestInput(124691356903L, tolerance, false));
+        builder.add(new TestInput(223456789012L, tolerance, false));
+        builder.add(new TestInput(Long.MAX_VALUE, tolerance, false));
+        // Tolerance 100.0%
+        tolerance = new Tolerance(1.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Long.MIN_VALUE, tolerance, true));
+        builder.add(new TestInput(-1L, tolerance, true));
+        builder.add(new TestInput(0L, tolerance, true));
+        builder.add(new TestInput(1L, tolerance, true));
+        builder.add(new TestInput(122222221121L, tolerance, true));
+        builder.add(new TestInput(122222221122L, tolerance, true));
+        builder.add(new TestInput(122222221123L, tolerance, true));
+        builder.add(new TestInput(123456789009L, tolerance, true));
+        builder.add(new TestInput(123456789010L, tolerance, true));
+        builder.add(new TestInput(123456789011L, tolerance, true));
+        builder.add(new TestInput(123456789012L, tolerance, true));// Equals value
+        builder.add(new TestInput(123456789013L, tolerance, true));
+        builder.add(new TestInput(123456789014L, tolerance, true));
+        builder.add(new TestInput(123456789015L, tolerance, true));
+        builder.add(new TestInput(124691356901L, tolerance, true));
+        builder.add(new TestInput(124691356902L, tolerance, true));
+        builder.add(new TestInput(124691356903L, tolerance, true));
+        builder.add(new TestInput(223456789012L, tolerance, true));
+        builder.add(new TestInput(Long.MAX_VALUE, tolerance, true));
+
+        final ImmutableList<TestInput> testInputs = builder.build();
+
+        testProperty(testInputs, PERSON_TYPE_URI, HAS_LICENSE_NUMBER);
+    }
+
+    @Test
+    public void testShortProperty() throws SmartUriException {
+        System.out.println("Short Property Test");
+        final ImmutableList.Builder<TestInput> builder = ImmutableList.<TestInput>builder();
+        // Tolerance 0.0
+        Tolerance tolerance = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Short.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((short) -1, tolerance, false));
+        builder.add(new TestInput((short) 0, tolerance, false));
+        builder.add(new TestInput((short) 1, tolerance, false));
+        builder.add(new TestInput((short) 37, tolerance, false));
+        builder.add(new TestInput((short) 38, tolerance, false));
+        builder.add(new TestInput((short) 39, tolerance, false));
+        builder.add(new TestInput((short) 40, tolerance, true)); // Equals value
+        builder.add(new TestInput((short) 41, tolerance, false));
+        builder.add(new TestInput((short) 42, tolerance, false));
+        builder.add(new TestInput((short) 43, tolerance, false));
+        builder.add(new TestInput((short) 100, tolerance, false));
+        builder.add(new TestInput(Short.MAX_VALUE, tolerance, false));
+        // Tolerance 1.0
+        tolerance = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Short.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((short) -1, tolerance, false));
+        builder.add(new TestInput((short) 0, tolerance, false));
+        builder.add(new TestInput((short) 1, tolerance, false));
+        builder.add(new TestInput((short) 37, tolerance, false));
+        builder.add(new TestInput((short) 38, tolerance, false));
+        builder.add(new TestInput((short) 39, tolerance, true));
+        builder.add(new TestInput((short) 40, tolerance, true)); // Equals value
+        builder.add(new TestInput((short) 41, tolerance, true));
+        builder.add(new TestInput((short) 42, tolerance, false));
+        builder.add(new TestInput((short) 43, tolerance, false));
+        builder.add(new TestInput((short) 100, tolerance, false));
+        builder.add(new TestInput(Short.MAX_VALUE, tolerance, false));
+        // Tolerance 2.0
+        tolerance = new Tolerance(2.0, ToleranceType.DIFFERENCE);
+        builder.add(new TestInput(Short.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((short) -1, tolerance, false));
+        builder.add(new TestInput((short) 0, tolerance, false));
+        builder.add(new TestInput((short) 1, tolerance, false));
+        builder.add(new TestInput((short) 37, tolerance, false));
+        builder.add(new TestInput((short) 38, tolerance, true));
+        builder.add(new TestInput((short) 39, tolerance, true));
+        builder.add(new TestInput((short) 40, tolerance, true)); // Equals value
+        builder.add(new TestInput((short) 41, tolerance, true));
+        builder.add(new TestInput((short) 42, tolerance, true));
+        builder.add(new TestInput((short) 43, tolerance, false));
+        builder.add(new TestInput((short) 100, tolerance, false));
+        builder.add(new TestInput(Short.MAX_VALUE, tolerance, false));
+
+        // Tolerance 0.0%
+        tolerance = new Tolerance(0.00, ToleranceType.PERCENTAGE);
+        builder.add(new TestInput(Short.MIN_VALUE, tolerance, false));
+        builder.add(new TestInput((short) -1, tolerance, false));
+        builder.add(new TestInput((short) 0, tolerance, false));
+        builder.add(new TestInput((short) 1, tolerance, false));
+        builder.add(new TestInput((short) 37, tolerance, false));


<TRUNCATED>


[2/3] incubator-rya git commit: RYA-250 Added data duplication detection methods to Smart URI/Entities. These use configured tolerances for each data type to decide if an Entity is considered nearly equal. Also, string terms that are considered equival

Posted by ca...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetector.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetector.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetector.java
new file mode 100644
index 0000000..220db30
--- /dev/null
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/DuplicateDataDetector.java
@@ -0,0 +1,1066 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.indexing.smarturi.duplication;
+
+import static java.util.Objects.requireNonNull;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.commons.configuration.ConfigurationException;
+import org.apache.commons.lang.StringUtils;
+import org.apache.rya.api.domain.RyaType;
+import org.apache.rya.api.domain.RyaURI;
+import org.apache.rya.api.resolver.impl.DateTimeRyaTypeResolver;
+import org.apache.rya.indexing.entity.model.Entity;
+import org.apache.rya.indexing.entity.model.Property;
+import org.apache.rya.indexing.smarturi.SmartUriAdapter;
+import org.apache.rya.indexing.smarturi.SmartUriException;
+import org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig;
+import org.calrissian.mango.types.exception.TypeEncodingException;
+import org.joda.time.DateTime;
+import org.openrdf.model.URI;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.model.vocabulary.XMLSchema;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * Detects if two entities contain data that's nearly identical based on a set
+ * tolerance for each field's type. Two entities are considered nearly
+ * identical if all their properties are equal and/or within the specified
+ * tolerance for the property's object type. Setting all object type tolerances
+ * to 0 means that the objects need to be exactly equal to each other to be
+ * considered duplicates. Duplicate data detection can be enabled/disabled
+ * through configuration and each object type can have a tolerance based on
+ * either the difference or the percentage difference between the objects being
+ * compared.
+ */
+public class DuplicateDataDetector {
+    private final Map<URI, ApproxEqualsDetector<?>> uriMap = new HashMap<>();
+    private final Map<Class<?>, ApproxEqualsDetector<?>> classMap = new HashMap<>();
+
+    private boolean isDetectionEnabled;
+
+    /**
+     * Creates a new instance of {@link DuplicateDataDetector} with the
+     * values provided by the configuration file.
+     * @param duplicateDataConfig the {@link DuplicateDataConfig}
+     */
+    public DuplicateDataDetector(final DuplicateDataConfig duplicateDataConfig) {
+        this(duplicateDataConfig.getBooleanTolerance(),
+            duplicateDataConfig.getByteTolerance(),
+            duplicateDataConfig.getDateTolerance(),
+            duplicateDataConfig.getDoubleTolerance(),
+            duplicateDataConfig.getFloatTolerance(),
+            duplicateDataConfig.getIntegerTolerance(),
+            duplicateDataConfig.getLongTolerance(),
+            duplicateDataConfig.getShortTolerance(),
+            duplicateDataConfig.getStringTolerance(),
+            duplicateDataConfig.getUriTolerance(),
+            duplicateDataConfig.getEquivalentTermsMap(),
+            duplicateDataConfig.isDetectionEnabled()
+        );
+    }
+
+    /**
+     * Creates a new instance of {@link DuplicateDataDetector} with the values
+     * from the config.
+     * @throws ConfigurationException
+     */
+    public DuplicateDataDetector() throws ConfigurationException {
+        this(new DuplicateDataConfig());
+    }
+
+    /**
+     * Creates a new instance of {@link DuplicateDataDetector}.
+     * @param tolerance the tolerance to assign to all types.
+     */
+    public DuplicateDataDetector(final double tolerance) {
+        this(new Tolerance(tolerance, ToleranceType.DIFFERENCE), new LinkedHashMap<>());
+    }
+
+    /**
+     * Creates a new instance of {@link DuplicateDataDetector}.
+     * @param tolerance the tolerance to assign to all types.
+     * @param equivalentTermsMap the {@link Map} of terms that are considered
+     * equivalent to each other. (not {@code null})
+     */
+    public DuplicateDataDetector(final Tolerance tolerance, final Map<String, List<String>> equivalentTermsMap) {
+        this(tolerance, tolerance, tolerance, tolerance, tolerance,
+            tolerance, tolerance, tolerance, tolerance, tolerance , equivalentTermsMap, true);
+    }
+
+    /**
+     * Creates a new instance of {@link DuplicateDataDetector}.
+     * @param booleanTolerance the {@link Boolean} tolerance value or
+     * {@code null} if not specified.
+     * @param byteTolerance the {@link Byte} tolerance value or {@code null} if
+     * not specified.
+     * @param dateTolerance the {@link Date} tolerance value or {@code null} if
+     * not specified.
+     * @param doubleTolerance the {@link Double} tolerance value or {@code null}
+     * if not specified.
+     * @param floatTolerance the {@link Float} tolerance value or {@code null}
+     * if not specified.
+     * @param integerTolerance the {@link Integer} tolerance value or
+     * {@code null} if not specified.
+     * @param longTolerance the {@link Long} tolerance value or {@code null} if
+     * not specified.
+     * @param shortTolerance the {@link Short} tolerance value or {@code null}
+     * if not specified.
+     * @param stringTolerance the {@link String} tolerance value or {@code null}
+     * if not specified.
+     * @param uriTolerance the {@link URI} tolerance value or {@code null} if
+     * not specified.
+     * @param equivalentTermsMap the {@link Map} of terms that are considered
+     * equivalent to each other. (not {@code null})
+     * @param isDetectionEnabled {@code true} to enable detection. {@code false}
+     * to disable detection.
+     */
+    public DuplicateDataDetector(final Tolerance booleanTolerance, final Tolerance byteTolerance,
+            final Tolerance dateTolerance, final Tolerance doubleTolerance, final Tolerance floatTolerance,
+            final Tolerance integerTolerance, final Tolerance longTolerance, final Tolerance shortTolerance,
+            final Tolerance stringTolerance, final Tolerance uriTolerance, final Map<String, List<String>> equivalentTermsMap,
+            final boolean isDetectionEnabled)
+    {
+        init(booleanTolerance, byteTolerance, dateTolerance, doubleTolerance, floatTolerance,
+            integerTolerance, longTolerance, shortTolerance, stringTolerance, uriTolerance, equivalentTermsMap, isDetectionEnabled);
+    }
+
+    private void init(final Tolerance booleanTolerance, final Tolerance byteTolerance,
+            final Tolerance dateTolerance, final Tolerance doubleTolerance, final Tolerance floatTolerance,
+            final Tolerance integerTolerance, final Tolerance longTolerance, final Tolerance shortTolerance,
+            final Tolerance stringTolerance, final Tolerance uriTolerance, final Map<String, List<String>> equivalentTermsMap,
+            final boolean isDetectionEnabled)
+    {
+        final List<ApproxEqualsDetector<?>> detectors = new ArrayList<>();
+        detectors.add(new BooleanApproxEqualsDetector(booleanTolerance));
+        detectors.add(new ByteApproxEqualsDetector(byteTolerance));
+        detectors.add(new DateApproxEqualsDetector(dateTolerance));
+        detectors.add(new DateTimeApproxEqualsDetector(dateTolerance));
+        detectors.add(new DoubleApproxEqualsDetector(doubleTolerance));
+        detectors.add(new FloatApproxEqualsDetector(floatTolerance));
+        detectors.add(new IntegerApproxEqualsDetector(integerTolerance));
+        detectors.add(new LongApproxEqualsDetector(longTolerance));
+        detectors.add(new ShortApproxEqualsDetector(shortTolerance));
+        detectors.add(new StringApproxEqualsDetector(stringTolerance, equivalentTermsMap));
+        detectors.add(new UriApproxEqualsDetector(uriTolerance));
+
+        for (final ApproxEqualsDetector<?> approxEqualsDetector : detectors) {
+            uriMap.put(approxEqualsDetector.getXmlSchemaUri(), approxEqualsDetector);
+            classMap.put(approxEqualsDetector.getTypeClass(), approxEqualsDetector);
+        }
+
+        this.isDetectionEnabled = isDetectionEnabled;
+    }
+
+    /**
+     * @return {@code true} to enable detection. {@code false} to disable
+     * detection.
+     */
+    public boolean isDetectionEnabled() {
+        return isDetectionEnabled;
+    }
+
+    /**
+     * Removes any duplicate (nearly identical) entities from the collection
+     * of entities.
+     * @param entities the {@link List} of {@link Entity}s. (not {@code null})
+     * @throws SmartUriException
+     */
+    public void removeDuplicatesFromCollection(final List<Entity> entities) throws SmartUriException {
+        requireNonNull(entities);
+        // Use a Sorted Set in reverse order to hold the indices
+        final Set<Integer> indicesToRemove = new TreeSet<>((a, b) -> Integer.compare(b, a));
+        if (entities != null && entities.size() > 1) {
+            // Compare all entities to each other while avoiding making the
+            // same comparisons again and not comparing an entity to itself.
+            for (int i = 0; i < entities.size() - 1; i++) {
+                final Entity entity1 = entities.get(i);
+                for (int j = entities.size() - 1; j > i; j--) {
+                    final Entity entity2 = entities.get(j);
+                    final boolean areDuplicates = compareEntities(entity1, entity2);
+                    if (areDuplicates) {
+                        indicesToRemove.add(j);
+                    }
+                }
+            }
+        }
+        if (!indicesToRemove.isEmpty()) {
+            // Remove indices in reverse order (already sorted in descending
+            // order so just loop through them)
+            for (final int index : indicesToRemove) {
+                entities.remove(index);
+            }
+        }
+    }
+
+    /**
+     * Compares two Smart URI's to determine if they have nearly identical data.
+     * @param uri1 the first Smart {@link URI}. (not {@code null})
+     * @param uri2 the second Smart {@link URI}. (not {@code null})
+     * @return {@code true} if the two Smart URI's have nearly identical data.
+     * {@code false} otherwise.
+     * @throws SmartUriException
+     */
+    public boolean compareSmartUris(final URI uri1, final URI uri2) throws SmartUriException {
+        requireNonNull(uri1);
+        requireNonNull(uri2);
+        final Entity entity1 = SmartUriAdapter.deserializeUriEntity(uri1);
+        final Entity entity2 = SmartUriAdapter.deserializeUriEntity(uri2);
+        return compareEntities(entity1, entity2);
+    }
+
+    /**
+     * Compares two entities to determine if they have nearly identical data.
+     * @param entity1 the first {@link Entity}. (not {@code null})
+     * @param entity2 the second {@link Entity}. (not {@code null})
+     * @return {@code true} if the two entities have nearly identical data.
+     * {@code false} otherwise.
+     * @throws SmartUriException
+     */
+    public boolean compareEntities(final Entity entity1, final Entity entity2) throws SmartUriException {
+        requireNonNull(entity1);
+        requireNonNull(entity2);
+        boolean allValuesNearlyEqual = true;
+
+        final List<RyaURI> types1 = entity1.getExplicitTypeIds();
+        final List<RyaURI> types2 = entity2.getExplicitTypeIds();
+        final boolean doBothHaveSameTypes = types1.containsAll(types2);
+        if (!doBothHaveSameTypes) {
+            return false;
+        }
+        for (final Entry<RyaURI, ImmutableMap<RyaURI, Property>> entry : entity1.getProperties().entrySet()) {
+            final RyaURI typeIdUri = entry.getKey();
+            for (final Entry<RyaURI, Property> typeProperty : entry.getValue().entrySet()) {
+                final RyaURI propertyNameUri = typeProperty.getKey();
+                final Property property1 = typeProperty.getValue();
+
+                final Optional<Property> p2 = entity2.lookupTypeProperty(typeIdUri, propertyNameUri);
+                if (p2.isPresent()) {
+                    final Property property2 = p2.get();
+                    final RyaType value1 = property1.getValue();
+                    final RyaType value2 = property2.getValue();
+                    final String data1 = value1.getData();
+                    final String data2 = value2.getData();
+                    final URI xmlSchemaUri1 = value1.getDataType();
+                    final ApproxEqualsDetector<?> approxEqualsDetector = uriMap.get(xmlSchemaUri1);
+                    if (approxEqualsDetector == null) {
+                        throw new SmartUriException("No appropriate detector found for the type: " + xmlSchemaUri1);
+                    }
+                    final boolean approxEquals = approxEqualsDetector.areApproxEquals(data1, data2);
+                    if (!approxEquals) {
+                        allValuesNearlyEqual = false;
+                        break;
+                    }
+                } else {
+                    allValuesNearlyEqual = false;
+                    break;
+                }
+            }
+            if (!allValuesNearlyEqual) {
+                break;
+            }
+        }
+        return allValuesNearlyEqual;
+    }
+
+    /**
+     * Gets the appropriate {@link ApproxEqualsDetector} for the specified
+     * class.
+     * @param clazz the {@link Class} to find an {@link ApproxEqualsDetector}
+     * for.
+     * @return the {@link ApproxEqualsDetector} for the class or {@code null} if
+     * none could be found.
+     */
+    public ApproxEqualsDetector<?> getDetectorForType(final Class<?> clazz) {
+        return classMap.get(clazz);
+    }
+
+    private static boolean isOnlyOneNull(final Object lhs, final Object rhs) {
+        return (lhs == null && rhs != null) || (lhs != null && rhs == null);
+    }
+
+    /**
+     * Class to detect if two booleans are considered approximately equal to
+     * each other.
+     */
+    public static class BooleanApproxEqualsDetector implements ApproxEqualsDetector<Boolean> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link BooleanApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public BooleanApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Boolean lhs, final Boolean rhs) {
+            // Should never be almost equals when tolerance is 0, only exactly equals
+            // Otherwise if there's any tolerance specified everything is equal
+            return tolerance.getValue() == 0 ? Objects.equals(lhs, rhs) : true;
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Boolean convertStringToObject(final String string) throws SmartUriException {
+            return Boolean.valueOf(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Boolean.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.BOOLEAN;
+        }
+    }
+
+    /**
+     * Class to detect if two bytes are considered approximately equal to each
+     * other.
+     */
+    public static class ByteApproxEqualsDetector implements ApproxEqualsDetector<Byte> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0, ToleranceType.DIFFERENCE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link ByteApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public ByteApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Byte lhs, final Byte rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Check based on tolerance
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhs == 0) {
+                        return lhs == rhs;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)Math.abs(lhs - rhs) / lhs) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return Math.abs(lhs - rhs) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Byte convertStringToObject(final String string) throws SmartUriException {
+            return Byte.valueOf(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Byte.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.BYTE;
+        }
+    }
+
+    /**
+     * Class to detect if two dates are considered approximately equal to each
+     * other.
+     */
+    public static class DateApproxEqualsDetector implements ApproxEqualsDetector<Date> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(500.0, ToleranceType.DIFFERENCE); // milliseconds
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link DateApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public DateApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Date lhs, final Date rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Check based on tolerance
+            final long lhsTime = lhs.getTime();
+            final long rhsTime = rhs.getTime();
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhsTime == 0) {
+                        return lhsTime == rhsTime;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)Math.abs(lhsTime - rhsTime) / lhsTime) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return Math.abs(lhsTime - rhsTime) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Date convertStringToObject(final String string) throws SmartUriException {
+            DateTime dateTime = null;
+            try {
+                dateTime = DateTime.parse(string, DateTimeRyaTypeResolver.XMLDATETIME_PARSER);
+            } catch (final TypeEncodingException e) {
+                throw new SmartUriException("Exception occurred serializing data[" + string + "]", e);
+            }
+            final Date date = dateTime.toDate();
+            return date;
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Date.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.DATE;
+        }
+    }
+
+    /**
+     * Class to detect if two datetimes are considered approximately equal to
+     * each other.
+     */
+    public static class DateTimeApproxEqualsDetector implements ApproxEqualsDetector<DateTime> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(500.0, ToleranceType.DIFFERENCE); // milliseconds
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link DateTimeApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public DateTimeApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final DateTime lhs, final DateTime rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Check based on tolerance
+            final long lhsTime = lhs.getMillis();
+            final long rhsTime = rhs.getMillis();
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhsTime == 0) {
+                        return lhsTime == rhsTime;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)Math.abs(lhsTime - rhsTime) / lhsTime) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return Math.abs(lhsTime - rhsTime) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public DateTime convertStringToObject(final String string) throws SmartUriException {
+            DateTime dateTime = null;
+            try {
+                dateTime = DateTime.parse(string, DateTimeRyaTypeResolver.XMLDATETIME_PARSER);
+            } catch (final TypeEncodingException e) {
+                throw new SmartUriException("Exception occurred serializing data[" + string + "]", e);
+            }
+            return dateTime;
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return DateTime.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.DATETIME;
+        }
+    }
+
+    /**
+     * Class to detect if two doubles are considered approximately equal to each
+     * other.
+     */
+    public static class DoubleApproxEqualsDetector implements ApproxEqualsDetector<Double> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0001, ToleranceType.PERCENTAGE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link DoubleApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public DoubleApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Double lhs, final Double rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Doubles can be unpredictable with how they store a value
+            // like 0.1. So use BigDecimal with its String constructor
+            // to make things more predictable.
+            final BigDecimal lhsBd = new BigDecimal(String.valueOf(lhs));
+            final BigDecimal rhsBd = new BigDecimal(String.valueOf(rhs));
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhs == 0) {
+                        return lhs == rhs;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    final BigDecimal absDiff = lhsBd.subtract(rhsBd).abs();
+                    try {
+                        final BigDecimal percent = absDiff.divide(lhsBd);
+                        return percent.doubleValue() <= tolerance.getValue();
+                    } catch (final ArithmeticException e) {
+                        // BigDecimal quotient did not have a terminating
+                        // decimal expansion. So, try without BigDecimal.
+                        return (Math.abs(lhs - rhs) / lhs) <= tolerance.getValue();
+                    }
+                case DIFFERENCE:
+                default:
+                    final BigDecimal absDiff1 = lhsBd.subtract(rhsBd).abs();
+                    return absDiff1.doubleValue() <= tolerance.getValue();
+                    //return Math.abs(lhs - rhs) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Double convertStringToObject(final String string) throws SmartUriException {
+            return Double.valueOf(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Double.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.DOUBLE;
+        }
+    }
+
+    /**
+     * Class to detect if two floats are considered approximately equal to each
+     * other.
+     */
+    public static class FloatApproxEqualsDetector implements ApproxEqualsDetector<Float> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0001, ToleranceType.PERCENTAGE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link FloatApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public FloatApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Float lhs, final Float rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Check based on tolerance
+            // Floats can be unpredictable with how they store a value
+            // like 0.1. So use BigDecimal with its String constructor
+            // to make things more predictable.
+            final BigDecimal lhsBd = new BigDecimal(String.valueOf(lhs));
+            final BigDecimal rhsBd = new BigDecimal(String.valueOf(rhs));
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhs == 0) {
+                        return lhs == rhs;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    final BigDecimal absDiff = lhsBd.subtract(rhsBd).abs();
+                    try {
+                        final BigDecimal percent = absDiff.divide(lhsBd);
+                        return percent.floatValue() <= tolerance.getValue();
+                    } catch (final ArithmeticException e) {
+                        // BigDecimal quotient did not have a terminating
+                        // decimal expansion. So, try without BigDecimal.
+                        return ((double)Math.abs(lhs - rhs) / lhs) <= tolerance.getValue();
+                    }
+                case DIFFERENCE:
+                default:
+                    final BigDecimal absDiff1 = lhsBd.subtract(rhsBd).abs();
+                    return absDiff1.floatValue() <= tolerance.getValue();
+                    //return Math.abs(lhs - rhs) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Float convertStringToObject(final String string) throws SmartUriException {
+            return Float.valueOf(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Float.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.FLOAT;
+        }
+    }
+
+    /**
+     * Class to detect if two integers are considered approximately equal to
+     * each other.
+     */
+    public static class IntegerApproxEqualsDetector implements ApproxEqualsDetector<Integer> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link IntegerApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public IntegerApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Integer lhs, final Integer rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Check based on tolerance
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhs == 0) {
+                        return lhs == rhs;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)Math.abs(lhs - rhs) / lhs) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return Math.abs(lhs - rhs) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Integer convertStringToObject(final String string) throws SmartUriException {
+            return Integer.valueOf(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Integer.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.INTEGER;
+        }
+    }
+
+    /**
+     * Class to detect if two longs are considered approximately equal to
+     * each other.
+     */
+    public static class LongApproxEqualsDetector implements ApproxEqualsDetector<Long> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link LongApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public LongApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Long lhs, final Long rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Check based on tolerance
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhs == 0) {
+                        return lhs == rhs;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)Math.abs(lhs - rhs) / lhs) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return Math.abs(lhs - rhs) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Long convertStringToObject(final String string) throws SmartUriException {
+            return Long.valueOf(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Long.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.LONG;
+        }
+    }
+
+    /**
+     * Class to detect if two shorts are considered approximately equal to each
+     * other.
+     */
+    public static class ShortApproxEqualsDetector implements ApproxEqualsDetector<Short> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link ShortApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public ShortApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final Short lhs, final Short rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            // Check based on tolerance
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhs == 0) {
+                        return lhs == rhs;
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)Math.abs(lhs - rhs) / lhs) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return Math.abs(lhs - rhs) <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public Short convertStringToObject(final String string) throws SmartUriException {
+            return Short.valueOf(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return Short.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.SHORT;
+        }
+    }
+
+    /**
+     * Class to detect if two string are considered approximately equal to each
+     * other.
+     */
+    public static class StringApproxEqualsDetector implements ApproxEqualsDetector<String> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.05, ToleranceType.PERCENTAGE);
+        private final Tolerance tolerance;
+        private final Map<String, List<String>> equivalentTermsMap;
+
+        /**
+         * Creates a new instance of {@link StringApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public StringApproxEqualsDetector(final Tolerance tolerance, final Map<String, List<String>> equivalentTermsMap) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+            this.equivalentTermsMap = equivalentTermsMap;
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final String lhs, final String rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (StringUtils.equalsIgnoreCase(lhs, rhs)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+
+            // Only check one-way. Terms are not bi-directionally equivalent
+            // unless specified.
+            final List<String> lhsTermEquivalents = equivalentTermsMap.get(lhs);
+            if (lhsTermEquivalents != null && lhsTermEquivalents.contains(rhs)) {
+                return true;
+            }
+            final int distance = StringUtils.getLevenshteinDistance(lhs, rhs);
+            // Check based on tolerance
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (lhs.length() == 0) {
+                        return lhs.length() == rhs.length();
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)distance / lhs.length()) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return distance <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public String convertStringToObject(final String string) throws SmartUriException {
+            return string;
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return String.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.STRING;
+        }
+    }
+
+    /**
+     * Class to detect if two URIs are considered approximately equal to each
+     * other.
+     */
+    public static class UriApproxEqualsDetector implements ApproxEqualsDetector<URI> {
+        private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE);
+        private final Tolerance tolerance;
+
+        /**
+         * Creates a new instance of {@link UriApproxEqualsDetector}.
+         * @param tolerance the {@link Tolerance}.
+         */
+        public UriApproxEqualsDetector(final Tolerance tolerance) {
+            this.tolerance = tolerance != null ? tolerance : getDefaultTolerance();
+        }
+
+        @Override
+        public boolean areObjectsApproxEquals(final URI lhs, final URI rhs) {
+            if (isOnlyOneNull(lhs, rhs)) {
+                return false;
+            }
+            if (Objects.equals(lhs, rhs)) {
+                return true;
+            }
+            final String uriString1 = lhs.stringValue();
+            final String uriString2 = rhs.stringValue();
+            if (StringUtils.equalsIgnoreCase(uriString1, uriString2)) {
+                // They're exactly equals so get out
+                return true;
+            } else if (tolerance.getValue() == 0) {
+                // If they're not exactly equals with zero tolerance then get out
+                return false;
+            }
+            final int distance = StringUtils.getLevenshteinDistance(uriString1, uriString2);
+            // Check based on tolerance
+            switch (tolerance.getToleranceType()) {
+                case PERCENTAGE:
+                    if (uriString1.length() == 0) {
+                        return uriString1.length() == uriString2.length();
+                    }
+                    if (tolerance.getValue() >= 1) {
+                        return true;
+                    }
+                    return ((double)distance / uriString1.length()) <= tolerance.getValue();
+                case DIFFERENCE:
+                default:
+                    return distance <= tolerance.getValue();
+            }
+        }
+
+        @Override
+        public Tolerance getDefaultTolerance() {
+            return DEFAULT_TOLERANCE;
+        }
+
+        @Override
+        public URI convertStringToObject(final String string) throws SmartUriException {
+            return new URIImpl(string);
+        }
+
+        @Override
+        public Class<?> getTypeClass() {
+            return URI.class;
+        }
+
+        @Override
+        public URI getXmlSchemaUri() {
+            return XMLSchema.ANYURI;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/EntityNearDuplicateException.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/EntityNearDuplicateException.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/EntityNearDuplicateException.java
new file mode 100644
index 0000000..8bdf54f
--- /dev/null
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/EntityNearDuplicateException.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.indexing.smarturi.duplication;
+
+import org.apache.rya.indexing.entity.model.Entity;
+import org.apache.rya.indexing.entity.storage.EntityStorage.EntityStorageException;
+
+/**
+ * An {@link Entity} could not be created because another entity is a nearly
+ * identical duplicate based on the configured tolerances.
+ */
+public class EntityNearDuplicateException extends EntityStorageException {
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * Creates a new instance of {@link EntityNearDuplicateException}.
+     * @param message the message to be displayed by the exception.
+     */
+    public EntityNearDuplicateException(final String message) {
+        super(message);
+    }
+
+    /**
+     * Creates a new instance of {@link EntityNearDuplicateException}.
+     * @param message the message to be displayed by the exception.
+     * @param throwable the source {#link Throwable} cause of the exception.
+     */
+    public EntityNearDuplicateException(final String message, final Throwable throwable) {
+        super(message, throwable);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/Tolerance.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/Tolerance.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/Tolerance.java
new file mode 100644
index 0000000..772522c
--- /dev/null
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/Tolerance.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.indexing.smarturi.duplication;
+
+import static java.util.Objects.requireNonNull;
+
+import java.text.NumberFormat;
+
+/**
+ * The types of methods available to use for calculating tolerance.
+ */
+public class Tolerance {
+    private final Double value;
+    private final ToleranceType toleranceType;
+
+    /**
+     * Creates a new instance of {@link Tolerance}.
+     * @param value the tolerance value. (not {@code null})
+     * @param toleranceType the {@link ToleranceType}. (not {@code null})
+     */
+    public Tolerance(final Double value, final ToleranceType toleranceType) {
+        this.value = requireNonNull(value);
+        this.toleranceType = requireNonNull(toleranceType);
+    }
+
+    /**
+     * @return the tolerance value.
+     */
+    public Double getValue() {
+        return value;
+    }
+
+    /**
+     * @return the {@link ToleranceType}.
+     */
+    public ToleranceType getToleranceType() {
+        return toleranceType;
+    }
+
+    @Override
+    public String toString() {
+        switch (toleranceType) {
+            case PERCENTAGE:
+                return NumberFormat.getPercentInstance().format(value);
+            case DIFFERENCE:
+                return value.toString();
+            default:
+                return "Unknown Tolerance Type with value: " + value.toString();
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ToleranceType.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ToleranceType.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ToleranceType.java
new file mode 100644
index 0000000..29faff1
--- /dev/null
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ToleranceType.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.indexing.smarturi.duplication;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * The types of methods available to use for calculating tolerance.
+ */
+public enum ToleranceType {
+    /**
+     * Indicates that the difference between two values must be within the
+     * specified tolerance value to be accepted.
+     */
+    DIFFERENCE,
+    /**
+     * Indicates that the difference between two values divided by the original
+     * value must fall within the specified tolerance percentage value to be
+     * accepted.
+     */
+    PERCENTAGE;
+
+    /**
+     * Returns the {@link ToleranceType} that matches the specified name.
+     * @param name the name to find.
+     * @return the {@link ToleranceType} or {@code null} if none could be found.
+     */
+    public static ToleranceType getToleranceTypeByName(final String name) {
+        if (StringUtils.isNotBlank(name)) {
+            return ToleranceType.valueOf(name);
+        }
+        return null;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/conf/DuplicateDataConfig.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/conf/DuplicateDataConfig.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/conf/DuplicateDataConfig.java
new file mode 100644
index 0000000..98f65c7
--- /dev/null
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/conf/DuplicateDataConfig.java
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.indexing.smarturi.duplication.conf;
+
+import static java.util.Objects.requireNonNull;
+
+import java.text.NumberFormat;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.configuration.ConfigurationException;
+import org.apache.commons.configuration.XMLConfiguration;
+import org.apache.rya.indexing.smarturi.duplication.Tolerance;
+import org.apache.rya.indexing.smarturi.duplication.ToleranceType;
+
+/**
+ * Configuration options for data duplication.
+ */
+public class DuplicateDataConfig {
+    public static final String DEFAULT_CONFIG_FILE_PATH = "conf/duplicate_data_detection_config.xml";
+
+    private Tolerance booleanTolerance;
+    private Tolerance byteTolerance;
+    private Tolerance dateTolerance;
+    private Tolerance doubleTolerance;
+    private Tolerance floatTolerance;
+    private Tolerance integerTolerance;
+    private Tolerance longTolerance;
+    private Tolerance shortTolerance;
+    private Tolerance stringTolerance;
+    private Tolerance uriTolerance;
+
+    private Map<String, List<String>> equivalentTermsMap;
+
+    private boolean isDetectionEnabled;
+
+    /**
+     * Creates a new instance of {@link DuplicateDataConfig}.
+     * @throws ConfigurationException
+     */
+    public DuplicateDataConfig() throws ConfigurationException {
+        this(new XMLConfiguration(DEFAULT_CONFIG_FILE_PATH));
+    }
+
+    /**
+     * Creates a new instance of {@link DuplicateDataConfig}.
+     * @param xmlFilePath the config's XML file path. (not {@code null})
+     * @throws ConfigurationException
+     */
+    public DuplicateDataConfig(final String xmlFileLocation) throws ConfigurationException {
+        this(new XMLConfiguration(requireNonNull(xmlFileLocation)));
+    }
+
+    /**
+     * Creates a new instance of {@link DuplicateDataConfig}.
+     * @param xmlConfig the {@link XMLConfiguration} file. (not {@code null})
+     * @throws ConfigurationException
+     */
+    public DuplicateDataConfig(final XMLConfiguration xmlConfig) throws ConfigurationException {
+        requireNonNull(xmlConfig);
+
+        final Tolerance booleanTolerance = parseTolerance("tolerances.booleanTolerance", xmlConfig);
+        final Tolerance byteTolerance = parseTolerance("tolerances.byteTolerance", xmlConfig);
+        final Tolerance dateTolerance = parseTolerance("tolerances.dateTolerance", xmlConfig);
+        final Tolerance doubleTolerance = parseTolerance("tolerances.doubleTolerance", xmlConfig);
+        final Tolerance floatTolerance = parseTolerance("tolerances.floatTolerance", xmlConfig);
+        final Tolerance integerTolerance = parseTolerance("tolerances.integerTolerance", xmlConfig);
+        final Tolerance longTolerance = parseTolerance("tolerances.longTolerance", xmlConfig);
+        final Tolerance shortTolerance = parseTolerance("tolerances.shortTolerance", xmlConfig);
+        final Tolerance stringTolerance = parseTolerance("tolerances.stringTolerance", xmlConfig);
+        final Tolerance uriTolerance = parseTolerance("tolerances.uriTolerance", xmlConfig);
+
+        final Map<String, List<String>> equivalentTermsMap = parseEquivalentTermsMap(xmlConfig);
+
+        final boolean isDetectionEnabled = xmlConfig.getBoolean("enableDetection", false);
+        init(booleanTolerance, byteTolerance, dateTolerance, doubleTolerance, floatTolerance, integerTolerance, longTolerance, shortTolerance, stringTolerance, uriTolerance, equivalentTermsMap, isDetectionEnabled);
+    }
+
+    /**
+     * Creates a new instance of {@link DuplicateDataConfig}.
+     * @param booleanTolerance the {@link Boolean} tolerance value or
+     * {@code null} if not specified.
+     * @param byteTolerance the {@link Byte} tolerance value or {@code null} if
+     * not specified.
+     * @param dateTolerance the {@link Date} tolerance value or {@code null} if
+     * not specified.
+     * @param doubleTolerance the {@link Double} tolerance value or {@code null}
+     * if not specified.
+     * @param floatTolerance the {@link Float} tolerance value or {@code null}
+     * if not specified.
+     * @param integerTolerance the {@link Integer} tolerance value or
+     * {@code null} if not specified.
+     * @param longTolerance the {@link Long} tolerance value or {@code null} if
+     * not specified.
+     * @param shortTolerance the {@link Short} tolerance value or {@code null}
+     * if not specified.
+     * @param stringTolerance the {@link String} tolerance value or {@code null}
+     * if not specified.
+     * @param uriTolerance the {@link URI} tolerance value or {@code null} if
+     * not specified.
+     * @param equivalentTermsMap the {@link Map} of terms that are considered
+     * equivalent to each other. (not {@code null})
+     * @param isDetectionEnabled {@code true} to enable detection. {@code false}
+     * to disable detection.
+     */
+    public DuplicateDataConfig(final Tolerance booleanTolerance, final Tolerance byteTolerance,
+        final Tolerance dateTolerance, final Tolerance doubleTolerance, final Tolerance floatTolerance,
+        final Tolerance integerTolerance, final Tolerance longTolerance, final Tolerance shortTolerance,
+        final Tolerance stringTolerance, final Tolerance uriTolerance, final Map<String, List<String>> equivalentTermsMap,
+        final boolean isDetectionEnabled)
+    {
+        init(booleanTolerance, byteTolerance, dateTolerance, doubleTolerance, floatTolerance, integerTolerance, longTolerance, shortTolerance, stringTolerance, uriTolerance, equivalentTermsMap, isDetectionEnabled);
+    }
+
+    private void init(final Tolerance booleanTolerance, final Tolerance byteTolerance,
+        final Tolerance dateTolerance, final Tolerance doubleTolerance, final Tolerance floatTolerance,
+        final Tolerance integerTolerance, final Tolerance longTolerance, final Tolerance shortTolerance,
+        final Tolerance stringTolerance, final Tolerance uriTolerance, final Map<String, List<String>> equivalentTermsMap,
+        final boolean isDetectionEnabled)
+    {
+        this.booleanTolerance = booleanTolerance;
+        this.byteTolerance = byteTolerance;
+        this.dateTolerance= dateTolerance;
+        this.doubleTolerance = doubleTolerance;
+        this.floatTolerance = floatTolerance;
+        this.integerTolerance = integerTolerance;
+        this.longTolerance = longTolerance;
+        this.shortTolerance = shortTolerance;
+        this.stringTolerance = stringTolerance;
+        this.uriTolerance = uriTolerance;
+        this.equivalentTermsMap = requireNonNull(equivalentTermsMap);
+        this.isDetectionEnabled = isDetectionEnabled;
+    }
+
+    private static Tolerance parseTolerance(final String key, final XMLConfiguration xmlConfig) throws ConfigurationException {
+        final String type = xmlConfig.getString(key + ".type", null);
+        final ToleranceType toleranceType = ToleranceType.getToleranceTypeByName(type);
+        Double doubleValue = null;
+        if (toleranceType != null) {
+            switch (toleranceType) {
+                case PERCENTAGE:
+                    final String value = xmlConfig.getString(key + ".value", null);
+                    if (value != null && value.contains("%")) {
+                        try {
+                            final Number number = NumberFormat.getPercentInstance().parse(value);
+                            doubleValue = number.doubleValue();
+                        } catch (final ParseException e) {
+                            throw new ConfigurationException(e);
+                        }
+                    } else {
+                        doubleValue = xmlConfig.getDouble(key + ".value", null);
+                    }
+                    if (doubleValue != null) {
+                        if (doubleValue < 0) {
+                            throw new ConfigurationException("The " + toleranceType + " tolerance type for \"" + key + "\" must be a positive value. Found this value: " + doubleValue);
+                        }
+                        if (doubleValue > 1) {
+                            throw new ConfigurationException("The " + toleranceType + " tolerance type for \"" + key + "\" can NOT be greater than 100%. Found this value: " + doubleValue);
+                        }
+                    }
+                    break;
+                case DIFFERENCE:
+                    doubleValue = xmlConfig.getDouble(key + ".value", null);
+                    if (doubleValue != null && doubleValue < 0) {
+                        throw new ConfigurationException("The " + toleranceType + " tolerance type for \"" + key + "\" must be a positive value. Found this value: " + doubleValue);
+                    }
+                    break;
+                default:
+                    throw new ConfigurationException("Unknown Tolerance Type specified in config for <" + type + ">: " + toleranceType);
+            }
+            if (doubleValue != null) {
+                return new Tolerance(doubleValue, toleranceType);
+            }
+        }
+        return null;
+    }
+
+    private static Map<String, List<String>> parseEquivalentTermsMap(final XMLConfiguration xmlConfig) {
+        final Map<String, List<String>> equivalentTermsMap = new LinkedHashMap<>();
+        final Object prop = xmlConfig.getProperty("termMappings.termMapping.term");
+        if (prop != null) {
+            if (prop instanceof Collection) {
+                final int size = ((Collection<?>) prop).size();
+                for (int i = 0; i < size; i++) {
+                    final String termElement = "termMappings.termMapping(" + i + ")";
+                    parseTermMapping(termElement, xmlConfig, equivalentTermsMap);
+                }
+            } else {
+                final String termElement = "termMappings.termMapping";
+                parseTermMapping(termElement, xmlConfig, equivalentTermsMap);
+            }
+        }
+        return equivalentTermsMap;
+    }
+
+    private static void parseTermMapping(final String termElement, final XMLConfiguration xmlConfig, final Map<String, List<String>> equivalentTermsMap) {
+        final String term = xmlConfig.getString(termElement + ".term");
+        final Object equivalentProp = xmlConfig.getString(termElement + ".equivalents.equivalent");
+        if (equivalentProp instanceof Collection) {
+            final int equivalentSize = ((Collection<?>) equivalentProp).size();
+            if (term != null && equivalentSize > 1) {
+                final List<String> equivalents = new ArrayList<>();
+                for (int j = 0; j < equivalentSize; j++) {
+                    final String equivalent = xmlConfig.getString(termElement + ".equivalents.equivalent(" + j + ")");
+                    if (equivalent != null) {
+                        equivalents.add(equivalent);
+                    }
+                }
+                equivalentTermsMap.put(term, equivalents);
+            }
+        } else {
+            final List<String> equivalents = new ArrayList<>();
+            final String equivalent = xmlConfig.getString(termElement + ".equivalents.equivalent");
+            if (equivalent != null) {
+                equivalents.add(equivalent);
+                if (term != null) {
+                    equivalentTermsMap.put(term, equivalents);
+                }
+            }
+        }
+    }
+
+    /**
+     * @return the {@link Boolean} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getBooleanTolerance() {
+        return booleanTolerance;
+    }
+
+    /**
+     * @return the {@link Byte} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getByteTolerance() {
+        return byteTolerance;
+    }
+
+    /**
+     * @return the {@link Date} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getDateTolerance() {
+        return dateTolerance;
+    }
+
+    /**
+     * @return the {@link Double} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getDoubleTolerance() {
+        return doubleTolerance;
+    }
+
+    /**
+     * @return the {@link Float} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getFloatTolerance() {
+        return floatTolerance;
+    }
+
+    /**
+     * @return the {@link Integer} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getIntegerTolerance() {
+        return integerTolerance;
+    }
+
+    /**
+     * @return the {@link Long} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getLongTolerance() {
+        return longTolerance;
+    }
+
+    /**
+     * @return the {@link Short} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getShortTolerance() {
+        return shortTolerance;
+    }
+
+    /**
+     * @return the {@link String} tolerance value or {@code null} if not
+     * specified.
+     */
+    public Tolerance getStringTolerance() {
+        return stringTolerance;
+    }
+
+    /**
+     * @return the {@link URI} tolerance value or {@code null} if not specified.
+     */
+    public Tolerance getUriTolerance() {
+        return uriTolerance;
+    }
+
+    /**
+     * @return the {@link Map} of terms that are considered equivalent to each
+     * other.
+     */
+    public Map<String, List<String>> getEquivalentTermsMap() {
+        return equivalentTermsMap;
+    }
+
+    /**
+     * @return {@code true} to enable detection. {@code false} to disable
+     * detection.
+     */
+    public boolean isDetectionEnabled() {
+        return isDetectionEnabled;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/test/java/org/apache/rya/indexing/mongo/MongoDbSmartUriTest.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/test/java/org/apache/rya/indexing/mongo/MongoDbSmartUriTest.java b/extras/indexing/src/test/java/org/apache/rya/indexing/mongo/MongoDbSmartUriTest.java
index 60efbed..dff271f 100644
--- a/extras/indexing/src/test/java/org/apache/rya/indexing/mongo/MongoDbSmartUriTest.java
+++ b/extras/indexing/src/test/java/org/apache/rya/indexing/mongo/MongoDbSmartUriTest.java
@@ -245,7 +245,6 @@ public class MongoDbSmartUriTest {
         final Entity resultEntity = SmartUriAdapter.deserializeUriEntity(smartUri);
         System.out.println(resultEntity);
         assertEquals(BOB_ENTITY.getSubject(), resultEntity.getSubject());
-        //assertTrue(Paths.get(BOB_ENTITY.getSubject().getData()).equals(Paths.get(resultEntity.getSubject().getData())));
     }
 
     @Test


[3/3] incubator-rya git commit: RYA-250 Added data duplication detection methods to Smart URI/Entities. These use configured tolerances for each data type to decide if an Entity is considered nearly equal. Also, string terms that are considered equival

Posted by ca...@apache.org.
RYA-250 Added data duplication detection methods to Smart URI/Entities.  These use configured tolerances for each data type to decide if an Entity is considered nearly equal.  Also, string terms that are considered equivalent can be configured. Closes #153.


Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/b319365e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/b319365e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/b319365e

Branch: refs/heads/master
Commit: b319365e8c06fe5f19c6ada611f596be2695930c
Parents: 7cd70bf
Author: eric.white <Er...@parsons.com>
Authored: Wed Apr 12 11:15:03 2017 -0400
Committer: Caleb Meier <ca...@parsons.com>
Committed: Mon Aug 21 13:39:38 2017 -0700

----------------------------------------------------------------------
 .../org/apache/rya/api/domain/RyaTypeUtils.java |   54 +-
 extras/indexing/README.md                       |  212 ++
 .../conf/duplicate_data_detection_config.xml    |   80 +
 .../indexing/entity/EntityIndexOptimizer.java   |    7 +-
 .../rya/indexing/entity/model/Entity.java       |    2 +-
 .../storage/mongo/MongoEntityStorage.java       |  131 +-
 .../entity/update/BaseEntityIndexer.java        |    9 +-
 .../indexing/entity/update/EntityIndexer.java   |    4 +-
 .../entity/update/mongo/MongoEntityIndexer.java |    3 +-
 .../rya/indexing/mongodb/MongoDbSmartUri.java   |    5 +-
 .../rya/indexing/smarturi/SmartUriAdapter.java  |   88 +-
 .../indexing/smarturi/SmartUriException.java    |    8 +
 .../duplication/ApproxEqualsDetector.java       |   78 +
 .../duplication/DuplicateDataDetector.java      | 1066 +++++++++
 .../EntityNearDuplicateException.java           |   47 +
 .../smarturi/duplication/Tolerance.java         |   67 +
 .../smarturi/duplication/ToleranceType.java     |   50 +
 .../duplication/conf/DuplicateDataConfig.java   |  337 +++
 .../rya/indexing/mongo/MongoDbSmartUriTest.java |    1 -
 .../duplication/DuplicateDataDetectorTest.java  | 2053 ++++++++++++++++++
 20 files changed, 4210 insertions(+), 92 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaTypeUtils.java
----------------------------------------------------------------------
diff --git a/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaTypeUtils.java b/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaTypeUtils.java
index 8ca65fc..6f9902e 100644
--- a/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaTypeUtils.java
+++ b/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaTypeUtils.java
@@ -24,12 +24,44 @@ import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 import org.joda.time.format.ISODateTimeFormat;
 import org.openrdf.model.URI;
+import org.openrdf.model.impl.URIImpl;
 import org.openrdf.model.vocabulary.XMLSchema;
 
+import com.google.common.collect.ImmutableMap;
+
 /**
  * Utility methods for using {@link RyaType}.
  */
 public final class RyaTypeUtils {
+    private static final ImmutableMap<Class<?>, RyaTypeMethod> METHOD_MAP =
+        ImmutableMap.<Class<?>, RyaTypeMethod>builder()
+            .put(Boolean.class, (v) -> booleanRyaType((Boolean) v))
+            .put(Byte.class, (v) -> byteRyaType((Byte) v))
+            .put(Date.class, (v) -> dateRyaType((Date) v))
+            .put(DateTime.class, (v) -> dateRyaType((DateTime) v))
+            .put(Double.class, (v) -> doubleRyaType((Double) v))
+            .put(Float.class, (v) -> floatRyaType((Float) v))
+            .put(Integer.class, (v) -> intRyaType((Integer) v))
+            .put(Long.class, (v) -> longRyaType((Long) v))
+            .put(Short.class, (v) -> shortRyaType((Short) v))
+            .put(String.class, (v) -> stringRyaType((String) v))
+            .put(URI.class, (v) -> uriRyaType((URI) v))
+            .put(URIImpl.class, (v) -> uriRyaType((URIImpl) v))
+            .build();
+
+    /**
+     * Represents a method inside the {@link RyaTypeUtils} class that can be
+     * called.
+     */
+    private static interface RyaTypeMethod {
+        /**
+         * Calls the method within {@link RyaTypeUtils} with the supplied value.
+         * @param value the object value.
+         * @return the {@link RyaType}.
+         */
+        public RyaType callRyaTypeMethod(final Object value);
+    }
+
     /**
      * Private constructor to prevent instantiation.
      */
@@ -66,7 +98,10 @@ public final class RyaTypeUtils {
      */
     public static RyaType dateRyaType(final Date value) {
         final DateTime dateTime = new DateTime(value.getTime());
-        return dateRyaType(dateTime);
+        final StringBuffer sb = new StringBuffer();
+        ISODateTimeFormat.dateTime().withZone(DateTimeZone.UTC).printTo(sb, dateTime.getMillis());
+        final String formattedDate = sb.toString();
+        return new RyaType(XMLSchema.DATE, formattedDate);
     }
 
     /**
@@ -154,4 +189,21 @@ public final class RyaTypeUtils {
     public static RyaType uriRyaType(final URI value) {
         return new RyaType(XMLSchema.ANYURI, value.stringValue());
     }
+
+    /**
+     * Calls the appropriate {@link RyaTypeUtils} method based on the class
+     * specified and initializes it with the supplied value.
+     * @param classType the {@link Class} of {@link RyaType} to find.
+     * @param value the value to initialize the {@link RyaType} with.
+     * @return the {@link RyaType} or {@code null} if none could be found for
+     * the specified {@code classType}.
+     */
+    public static RyaType getRyaTypeForClass(final Class<?> classType, final Object value) {
+        final RyaTypeMethod method = METHOD_MAP.get(classType);
+        RyaType ryaType = null;
+        if (method != null) {
+            ryaType = method.callRyaTypeMethod(value);
+        }
+        return ryaType;
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/README.md
----------------------------------------------------------------------
diff --git a/extras/indexing/README.md b/extras/indexing/README.md
new file mode 100644
index 0000000..a2f7497
--- /dev/null
+++ b/extras/indexing/README.md
@@ -0,0 +1,212 @@
+<!-- Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. -->
+
+# Rya Indexing
+
+___
+
+This project contains implementations of Rya's Indexing components.
+
+Rya Indexing supports the following datastores:
+
+  * Accumulo
+
+  * MongoDB
+
+## Entity Indexing
+An Entity is a named concept that has at least one defined structure
+and a bunch of values that fit within each of those structures. A structure is
+defined by a Type. A value that fits within that Type is a Property.
+</p>
+For example, suppose we want to represent a type of icecream as an Entity.
+First we must define what properties an icecream entity may have:
+```
+    Type ID: <urn:icecream>
+ Properties: <urn:brand>
+             <urn:flavor>
+             <urn:ingredients>
+             <urn:nutritionalInformation>
+```
+Now we can represent our icecream whose brand is "Awesome Icecream" and whose
+flavor is "Chocolate", but has no ingredients or nutritional information, as
+an Entity by doing the following:
+```
+final Entity entity = Entity.builder()
+             .setSubject(new RyaURI("urn:GTIN-14/00012345600012"))
+             .setExplicitType(new RyaURI("urn:icecream"))
+             .setProperty(new RyaURI("urn:icecream"), new Property(new RyaURI("urn:brand"), new RyaType(XMLSchema.STRING, "Awesome Icecream")))
+             .setProperty(new RyaURI("urn:icecream"), new Property(new RyaURI("urn:flavor"), new RyaType(XMLSchema.STRING, "Chocolate")))
+             .build();
+```
+The two types of Entities that may be created are implicit and explicit.
+An implicit Entity is one who has at least one Property that matches
+the Type, but nothing has explicitly indicated it is of  that Type.
+Once something has done so, it is an explicitly typed Entity.
+
+### Smart URI
+A Smart URI is an indentifier of a specific Entity with all its data fields and associated values. Smart URI's are only currently supported by MongoDB instances of Rya.
+
+The Smart URI format:
+
+[subject URI]?ryaTypes=[encoded map of type URIs to type names]&[type1.propertyName1]=[value1]&[type1.propertyName2]=[value2]&[type2.propertyName1]=[value3]
+
+The "subject URI" can be any URI.  The Entities' properties are expressed as query parameters in the Smart URI.  Since the Smart URI may represent an Entity of multiple Types, the "ryaTypes" query parameter is used to keep track of which property belongs to which Type.  The Type's mapped short name is appended to each property name and each name/value pair has their characters escaped properly to produce a valid URI.  This means all foreign character and special characters will will be encoded so that they conform to [RFC-3986](https://www.ietf.org/rfc/rfc3986.txt).
+
+Using the "icecream" Type Entity example from the "Entity Indexing" section above, the Smart URI representation would be:
+
+`urn://GTIN-14/00012345600012?ryaTypes=urn%3A%2F%2FentityTypeMap%3Furn%253Aicecream%3Dicecream&icecream.brand=Awesome+Icecream&icecream.flavor=Chocolate`
+
+As an example of an Entity with multiple Types, lets consider the Entity as also being part of the following "dessert" type:
+```xml
+    Type ID: <urn:dessert>
+ Properties: <urn:storageNeeded>
+             <urn:utensilUsed>
+```
+
+The Smart URI representation would be:
+
+`urn://GTIN-14/00012345600012?ryaTypes=urn%3A%2F%2FentityTypeMap%3Furn%253Aicecream%3Dicecream%26urn%253Adessert%3Ddessert&dessert.storageNeeded=Freezer&dessert.utensilUsed=Spoon&icecream.brand=Awesome+Icecream&icecream.flavor=Chocolate`
+
+#### Smart URI Entity Duplication Detection
+
+In some cases, data that is close enough together to be considered nearly identical should be treated as duplicate Entities.  Duplicate data detection can be enabled so that newly found Entities that appear close enough to existing Entities should not be created.
+
+##### Configuring Duplication Detection
+
+This sections discusses how to configure the various options of Smart URI Entity Duplication Detection.  To edit Duplication Detection, create or modify the `duplicate_data_detection_config.xml` in the `conf` directory.
+
+It should look similar to this:
+```xml
+<duplicateDataDetectionConfiguration>
+    <enableDetection>true</enableDetection>
+    <tolerances>
+        <booleanTolerance>
+            <value>0</value>
+            <type>DIFFERENCE</type>
+        </booleanTolerance>
+        <byteTolerance>
+            <value>0</value>
+            <type>DIFFERENCE</type>
+        </byteTolerance>
+        <dateTolerance>
+            <value>500</value>
+            <type>DIFFERENCE</type>
+        </dateTolerance>
+        <doubleTolerance>
+            <value>0.01%</value>
+            <type>PERCENTAGE</type>
+        </doubleTolerance>
+        <floatTolerance>
+            <value>0.01%</value>
+            <type>PERCENTAGE</type>
+        </floatTolerance>
+        <integerTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </integerTolerance>
+        <longTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </longTolerance>
+        <shortTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </shortTolerance>
+        <stringTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </stringTolerance>
+        <uriTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </uriTolerance>
+    </tolerances>
+    <termMappings>
+        <termMapping>
+            <term>example</term>
+            <equivalents>
+                <equivalent>sample</equivalent>
+                <equivalent>case</equivalent>
+            </equivalents>
+        </termMapping>
+        <termMapping>
+            <term>test</term>
+            <equivalents>
+                <equivalent>exam</equivalent>
+                <equivalent>quiz</equivalent>
+            </equivalents>
+        </termMapping>
+    </termMappings>
+</duplicateDataDetectionConfiguration>
+```
+
+###### Enabling/Disabling Duplication Detection
+
+To enable detection, set `<enableDetection>` to "true".  Setting to "false" will disable it.
+
+###### Tolerance Type Configuration
+
+Each data type can have a tolerance type set for it (either "PERCENTAGE" or "DIFFERENCE").  If "DIFFERENCE" is selected then the data types are considered duplicates if they are within the `<value>` specified.  The `<value>` must be positive.  So, the two integer values 50000 and 50001 are considered duplicates when configured like this:
+```xml
+        <integerTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </integerTolerance>
+```
+
+If "PERCENTAGE" is selected then the values must be within a certain percent to one another to be considered nearly identical duplicates.  This is useful when dealing with really small numbers or really large numbers where an exact absolute difference is hard to determine to consider them nearly equal.  The `<value>` can be expressed in percent or decimal form.  So, one percent can be entered as "1%" or "0.01" in the `<value>` field.  The `<value>` must be positive and cannot be greater than 100%.
+
+If a Type's tolerance is not specified it defers to a default value.
+The default values are:
+
+| **Class** | **Tolerance Type** | **Value** |
+|-----------|--------------------|-----------|
+| boolean   | DIFFERENCE         | 0         |
+| byte      | DIFFERENCE         | 0         |
+| date      | DIFFERENCE         | 500 (ms)  |
+| double    | PERCENTAGE         | 0.01%     |
+| float     | PERCENTAGE         | 0.01%     |
+| integer   | DIFFERENCE         | 1         |
+| long      | DIFFERENCE         | 1         |
+| short     | DIFFERENCE         | 1         |
+| string    | PERCENTAGE         | 5%        |
+| uri       | DIFFERENCE         | 1         |
+
+###### Equivalent Terms Configuration
+
+Words that one wants to consider equivalent can be inserted into a map under the `<termMappings>` part of the configuration file.  An example `<termMapping>` entry is shown below:
+
+```xml
+        <termMapping>
+            <term>example</term>
+            <equivalents>
+                <equivalent>sample</equivalent>
+                <equivalent>case</equivalent>
+            </equivalents>
+        </termMapping>
+```
+
+This `<termMapping>` means that a new Entity with a field value of "sample" or "case" would be equivalent to an existing field value of "example" but this relationship is only one way.  If a field value of "case" already exists then a new field value "example" is not equivalent.  The relationship can be made bidirectional by adding another `<termMapping>`.
+
+```xml
+        <termMapping>
+            <term>case</term>
+            <equivalents>
+                <equivalent>example</equivalent>
+            </equivalents>
+        </termMapping>
+```

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/conf/duplicate_data_detection_config.xml
----------------------------------------------------------------------
diff --git a/extras/indexing/conf/duplicate_data_detection_config.xml b/extras/indexing/conf/duplicate_data_detection_config.xml
new file mode 100644
index 0000000..7855ca7
--- /dev/null
+++ b/extras/indexing/conf/duplicate_data_detection_config.xml
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. -->
+<duplicateDataDetectionConfiguration>
+    <enableDetection>true</enableDetection>
+    <tolerances>
+        <booleanTolerance>
+            <value>0</value>
+            <type>DIFFERENCE</type>
+        </booleanTolerance>
+        <byteTolerance>
+            <value>0</value>
+            <type>DIFFERENCE</type>
+        </byteTolerance>
+        <dateTolerance>
+            <value>500</value>
+            <type>DIFFERENCE</type>
+        </dateTolerance>
+        <doubleTolerance>
+            <value>0.01%</value>
+            <type>PERCENTAGE</type>
+        </doubleTolerance>
+        <floatTolerance>
+            <value>0.01%</value>
+            <type>PERCENTAGE</type>
+        </floatTolerance>
+        <integerTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </integerTolerance>
+        <longTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </longTolerance>
+        <shortTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </shortTolerance>
+        <stringTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </stringTolerance>
+        <uriTolerance>
+            <value>1</value>
+            <type>DIFFERENCE</type>
+        </uriTolerance>
+    </tolerances>
+    <!-- Uncomment to define equivalent terms
+    <termMappings>
+        <termMapping>
+            <term>example</term>
+            <equivalents>
+                <equivalent>sample</equivalent>
+                <equivalent>case</equivalent>
+            </equivalents>
+        </termMapping>
+        <termMapping>
+            <term>test</term>
+            <equivalents>
+                <equivalent>exam</equivalent>
+                <equivalent>quiz</equivalent>
+            </equivalents>
+        </termMapping>
+    </termMappings>
+    -->
+</duplicateDataDetectionConfiguration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/entity/EntityIndexOptimizer.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/EntityIndexOptimizer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/EntityIndexOptimizer.java
index 5a45dc8..cd5278e 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/EntityIndexOptimizer.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/EntityIndexOptimizer.java
@@ -26,6 +26,7 @@ import org.apache.log4j.Logger;
 import org.apache.rya.indexing.entity.model.Entity;
 import org.apache.rya.indexing.entity.query.EntityQueryNode;
 import org.apache.rya.indexing.entity.storage.EntityStorage;
+import org.apache.rya.indexing.entity.storage.EntityStorage.EntityStorageException;
 import org.apache.rya.indexing.entity.storage.TypeStorage;
 import org.apache.rya.indexing.entity.update.mongo.MongoEntityIndexer;
 import org.apache.rya.indexing.external.matching.AbstractExternalSetOptimizer;
@@ -71,7 +72,11 @@ public class EntityIndexOptimizer extends AbstractExternalSetOptimizer<EntityQue
         indexer.setConf(conf);
 
         typeStorage = indexer.getTypeStorage(conf);
-        entityStorage = indexer.getEntityStorage(conf);
+        try {
+            entityStorage = indexer.getEntityStorage(conf);
+        } catch (final EntityStorageException e) {
+            log.error("Error getting entity storage", e);
+        }
 
         provider = new EntityIndexSetProvider(typeStorage, entityStorage);
     }

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/entity/model/Entity.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/model/Entity.java b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/model/Entity.java
index a90e469..3804de4 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/model/Entity.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/model/Entity.java
@@ -208,7 +208,7 @@ public class Entity {
         final ImmutableMap<RyaURI, Property> typePropertyMap = properties.get(typeRyaUri);
         Optional<Property> property = Optional.empty();
         if (typePropertyMap != null) {
-            property = Optional.of(typePropertyMap.get(propertyRyaUri));
+            property = Optional.ofNullable(typePropertyMap.get(propertyRyaUri));
         }
         return property;
     }

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/entity/storage/mongo/MongoEntityStorage.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/storage/mongo/MongoEntityStorage.java b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/storage/mongo/MongoEntityStorage.java
index a71d673..87634d7 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/storage/mongo/MongoEntityStorage.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/storage/mongo/MongoEntityStorage.java
@@ -20,25 +20,35 @@ package org.apache.rya.indexing.entity.storage.mongo;
 
 import static java.util.Objects.requireNonNull;
 
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import org.apache.commons.configuration.ConfigurationException;
+import org.apache.log4j.Logger;
 import org.apache.rya.api.domain.RyaURI;
 import org.apache.rya.indexing.entity.model.Entity;
 import org.apache.rya.indexing.entity.model.Property;
 import org.apache.rya.indexing.entity.model.Type;
 import org.apache.rya.indexing.entity.model.TypedEntity;
 import org.apache.rya.indexing.entity.storage.EntityStorage;
+import org.apache.rya.indexing.entity.storage.TypeStorage.TypeStorageException;
 import org.apache.rya.indexing.entity.storage.mongo.ConvertingCursor.Converter;
 import org.apache.rya.indexing.entity.storage.mongo.DocumentConverter.DocumentConverterException;
 import org.apache.rya.indexing.entity.storage.mongo.key.MongoDbSafeKey;
+import org.apache.rya.indexing.smarturi.SmartUriException;
+import org.apache.rya.indexing.smarturi.duplication.DuplicateDataDetector;
+import org.apache.rya.indexing.smarturi.duplication.EntityNearDuplicateException;
 import org.bson.Document;
 import org.bson.conversions.Bson;
 
 import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
 import com.mongodb.ErrorCategory;
 import com.mongodb.MongoClient;
 import com.mongodb.MongoException;
@@ -54,6 +64,7 @@ import edu.umd.cs.findbugs.annotations.NonNull;
  */
 @DefaultAnnotation(NonNull.class)
 public class MongoEntityStorage implements EntityStorage {
+    private static final Logger log = Logger.getLogger(MongoEntityStorage.class);
 
     protected static final String COLLECTION_NAME = "entity-entities";
 
@@ -69,15 +80,40 @@ public class MongoEntityStorage implements EntityStorage {
      */
     protected final String ryaInstanceName;
 
+    private final DuplicateDataDetector duplicateDataDetector;
+    private MongoTypeStorage mongoTypeStorage = null;
+
+    /**
+     * Constructs an instance of {@link MongoEntityStorage}.
+     *
+     * @param mongo - A client connected to the Mongo instance that hosts the Rya instance. (not null)
+     * @param ryaInstanceName - The name of the Rya instance the {@link TypedEntity}s are for. (not null)
+     * @throws ConfigurationException
+     */
+    public MongoEntityStorage(final MongoClient mongo, final String ryaInstanceName) throws EntityStorageException {
+        this(mongo, ryaInstanceName, null);
+    }
+
     /**
      * Constructs an instance of {@link MongoEntityStorage}.
      *
      * @param mongo - A client connected to the Mongo instance that hosts the Rya instance. (not null)
      * @param ryaInstanceName - The name of the Rya instance the {@link TypedEntity}s are for. (not null)
+     * @param duplicateDataDetector - The {@link DuplicateDataDetector}.
+     * @throws EntityStorageException
      */
-    public MongoEntityStorage(final MongoClient mongo, final String ryaInstanceName) {
+    public MongoEntityStorage(final MongoClient mongo, final String ryaInstanceName, final DuplicateDataDetector duplicateDataDetector) throws EntityStorageException {
         this.mongo = requireNonNull(mongo);
         this.ryaInstanceName = requireNonNull(ryaInstanceName);
+        if (duplicateDataDetector == null) {
+            try {
+                this.duplicateDataDetector = new DuplicateDataDetector();
+            } catch (final ConfigurationException e) {
+                throw new EntityStorageException("Could not create duplicate data detector.", e);
+            }
+        } else {
+            this.duplicateDataDetector = duplicateDataDetector;
+        }
     }
 
     @Override
@@ -85,10 +121,15 @@ public class MongoEntityStorage implements EntityStorage {
         requireNonNull(entity);
 
         try {
-            mongo.getDatabase(ryaInstanceName)
-                .getCollection(COLLECTION_NAME)
-                .insertOne( ENTITY_CONVERTER.toDocument(entity) );
-
+            final boolean hasDuplicate = detectDuplicates(entity);
+
+            if (!hasDuplicate) {
+                mongo.getDatabase(ryaInstanceName)
+                    .getCollection(COLLECTION_NAME)
+                    .insertOne( ENTITY_CONVERTER.toDocument(entity) );
+            } else {
+                throw new EntityNearDuplicateException("Duplicate data found and will not be inserted for Entity with Subject: "  + entity);
+            }
         } catch(final MongoException e) {
             final ErrorCategory category = ErrorCategory.fromErrorCode( e.getCode() );
             if(category == ErrorCategory.DUPLICATE_KEY) {
@@ -242,4 +283,84 @@ public class MongoEntityStorage implements EntityStorage {
 
         return Stream.of(dataTypeFilter, valueFilter);
     }
+
+    private boolean detectDuplicates(final Entity entity) throws EntityStorageException {
+        boolean hasDuplicate = false;
+        if (duplicateDataDetector.isDetectionEnabled()) {
+            // Grab all entities that have all the same explicit types as our
+            // original Entity.
+            final List<Entity> comparisonEntities = searchHasAllExplicitTypes(entity.getExplicitTypeIds());
+
+            // Now that we have our set of potential duplicates, compare them.
+            // We can stop when we find one duplicate.
+            for (final Entity compareEntity : comparisonEntities) {
+                try {
+                    hasDuplicate = duplicateDataDetector.compareEntities(entity, compareEntity);
+                } catch (final SmartUriException e) {
+                    throw new EntityStorageException("Encountered an error while comparing entities.", e);
+                }
+                if (hasDuplicate) {
+                    break;
+                }
+            }
+        }
+        return hasDuplicate;
+    }
+
+    /**
+     * Searches the Entity storage for all Entities that contain all the
+     * specified explicit type IDs.
+     * @param explicitTypeIds the {@link ImmutableList} of {@link RyaURI}s that
+     * are being searched for.
+     * @return the {@link List} of {@link Entity}s that have all the specified
+     * explicit type IDs. If nothing was found an empty {@link List} is
+     * returned.
+     * @throws EntityStorageException
+     */
+    private List<Entity> searchHasAllExplicitTypes(final ImmutableList<RyaURI> explicitTypeIds) throws EntityStorageException {
+        final List<Entity> hasAllExplicitTypesEntities = new ArrayList<>();
+        if (!explicitTypeIds.isEmpty()) {
+            // Grab the first type from the explicit type IDs.
+            final RyaURI firstType = explicitTypeIds.get(0);
+
+            // Check if that type exists anywhere in storage.
+            final List<RyaURI> subjects = new ArrayList<>();
+            Optional<Type> type;
+            try {
+                if (mongoTypeStorage == null) {
+                    mongoTypeStorage = new MongoTypeStorage(mongo, ryaInstanceName);
+                }
+                type = mongoTypeStorage.get(firstType);
+            } catch (final TypeStorageException e) {
+                throw new EntityStorageException("Unable to get entity type: " + firstType, e);
+            }
+            if (type.isPresent()) {
+                // Grab the subjects for all the types we found matching "firstType"
+                final ConvertingCursor<TypedEntity> cursor = search(Optional.empty(), type.get(), Collections.emptySet());
+                while (cursor.hasNext()) {
+                    final TypedEntity typedEntity = cursor.next();
+                    final RyaURI subject = typedEntity.getSubject();
+                    subjects.add(subject);
+                }
+            }
+
+            // Now grab all the Entities that have the subjects we found.
+            for (final RyaURI subject : subjects) {
+                final Optional<Entity> entityFromSubject = get(subject);
+                if (entityFromSubject.isPresent()) {
+                    final Entity candidateEntity = entityFromSubject.get();
+                    // Filter out any entities that don't have all the same
+                    // types associated with them as our original list of
+                    // explicit type IDs. We already know the entities we found
+                    // have "firstType" but now we have access to all the other
+                    // types they have.
+                    if (candidateEntity.getExplicitTypeIds().containsAll(explicitTypeIds)) {
+                        hasAllExplicitTypesEntities.add(candidateEntity);
+                    }
+                }
+            }
+        }
+
+        return hasAllExplicitTypesEntities;
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/BaseEntityIndexer.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/BaseEntityIndexer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/BaseEntityIndexer.java
index 84b0bdc..e73eeb3 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/BaseEntityIndexer.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/BaseEntityIndexer.java
@@ -33,6 +33,7 @@ import java.util.Set;
 import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.Logger;
 import org.apache.rya.api.domain.RyaStatement;
 import org.apache.rya.api.domain.RyaType;
 import org.apache.rya.api.domain.RyaURI;
@@ -40,6 +41,7 @@ import org.apache.rya.indexing.entity.model.Entity;
 import org.apache.rya.indexing.entity.model.Property;
 import org.apache.rya.indexing.entity.model.Type;
 import org.apache.rya.indexing.entity.storage.EntityStorage;
+import org.apache.rya.indexing.entity.storage.EntityStorage.EntityStorageException;
 import org.apache.rya.indexing.entity.storage.TypeStorage;
 import org.apache.rya.indexing.entity.storage.TypeStorage.TypeStorageException;
 import org.apache.rya.indexing.entity.storage.mongo.ConvertingCursor;
@@ -60,6 +62,7 @@ import edu.umd.cs.findbugs.annotations.NonNull;
  */
 @DefaultAnnotation(NonNull.class)
 public abstract class BaseEntityIndexer implements EntityIndexer, MongoSecondaryIndex {
+    private static final Logger log = Logger.getLogger(BaseEntityIndexer.class);
 
     /**
      * When this URI is the Predicate of a Statement, it indicates a {@link Type} for an {@link Entity}.
@@ -73,7 +76,11 @@ public abstract class BaseEntityIndexer implements EntityIndexer, MongoSecondary
     @Override
     public void setConf(final Configuration conf) {
         requireNonNull(conf);
-        entities.set( getEntityStorage(conf) );
+        try {
+            entities.set( getEntityStorage(conf) );
+        } catch (final EntityStorageException e) {
+            log.error("Unable to set entity storage.");
+        }
         types.set( getTypeStorage(conf) );
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/EntityIndexer.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/EntityIndexer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/EntityIndexer.java
index 48cb0b1..aeb5a41 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/EntityIndexer.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/EntityIndexer.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.rya.api.domain.RyaStatement;
 import org.apache.rya.api.persist.index.RyaSecondaryIndexer;
 import org.apache.rya.indexing.entity.storage.EntityStorage;
+import org.apache.rya.indexing.entity.storage.EntityStorage.EntityStorageException;
 import org.apache.rya.indexing.entity.storage.TypeStorage;
 
 import edu.umd.cs.findbugs.annotations.Nullable;
@@ -37,8 +38,9 @@ public interface EntityIndexer extends RyaSecondaryIndexer {
      *
      * @param conf - Indicates how the {@link EntityStorage} is initialized. (not null)
      * @return The {@link EntityStorage} that will be used by this indexer.
+     * @throws EntityStorageException
      */
-    public @Nullable EntityStorage getEntityStorage(Configuration conf);
+    public @Nullable EntityStorage getEntityStorage(Configuration conf) throws EntityStorageException;
 
     /**
      * Creates the {@link TypeStorage} that will be used by the indexer.

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/mongo/MongoEntityIndexer.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/mongo/MongoEntityIndexer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/mongo/MongoEntityIndexer.java
index 84eebaa..1ab48b6 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/mongo/MongoEntityIndexer.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/entity/update/mongo/MongoEntityIndexer.java
@@ -20,6 +20,7 @@ package org.apache.rya.indexing.entity.update.mongo;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.rya.indexing.entity.storage.EntityStorage;
+import org.apache.rya.indexing.entity.storage.EntityStorage.EntityStorageException;
 import org.apache.rya.indexing.entity.storage.TypeStorage;
 import org.apache.rya.indexing.entity.storage.mongo.MongoEntityStorage;
 import org.apache.rya.indexing.entity.storage.mongo.MongoTypeStorage;
@@ -41,7 +42,7 @@ public class MongoEntityIndexer extends BaseEntityIndexer {
     private MongoClient client;
 
     @Override
-    public EntityStorage getEntityStorage(final Configuration conf) {
+    public EntityStorage getEntityStorage(final Configuration conf) throws EntityStorageException {
         final MongoDBRdfConfiguration mongoConf = (MongoDBRdfConfiguration) conf;
         if (client == null) {
             if(mongoConf.getMongoClient() != null) {

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/mongodb/MongoDbSmartUri.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/mongodb/MongoDbSmartUri.java b/extras/indexing/src/main/java/org/apache/rya/indexing/mongodb/MongoDbSmartUri.java
index cbc8796..b40c9b6 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/mongodb/MongoDbSmartUri.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/mongodb/MongoDbSmartUri.java
@@ -135,7 +135,7 @@ public class MongoDbSmartUri implements SmartUriStorage {
         if (!isInit) {
             try {
                 setupClient(conf);
-            } catch (final UnknownHostException | MongoException e) {
+            } catch (final UnknownHostException | MongoException | EntityStorageException e) {
                 throw new SmartUriException("Failed to setup MongoDB client", e);
             }
         }
@@ -146,8 +146,9 @@ public class MongoDbSmartUri implements SmartUriStorage {
      * @param conf the {@link Configuration}.
      * @throws UnknownHostException
      * @throws MongoException
+     * @throws EntityStorageException
      */
-    private void setupClient(final Configuration conf) throws UnknownHostException, MongoException {
+    private void setupClient(final Configuration conf) throws UnknownHostException, MongoException, EntityStorageException {
         final MongoDBRdfConfiguration mongoConf = (MongoDBRdfConfiguration) conf;
         mongoClient = mongoConf.getMongoClient();
         if (mongoClient == null) {

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriAdapter.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriAdapter.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriAdapter.java
index d6a5e8a..f637d0d 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriAdapter.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriAdapter.java
@@ -21,8 +21,6 @@ package org.apache.rya.indexing.smarturi;
 import java.io.UnsupportedEncodingException;
 import java.net.URISyntaxException;
 import java.net.URLDecoder;
-import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -98,40 +96,23 @@ public class SmartUriAdapter {
     }
 
     private static String getShortNameForType(final RyaURI type) throws SmartUriException {
-        String typeUriString;
-        try {
-            typeUriString = new java.net.URI(type.getData()).getRawSchemeSpecificPart();
-        } catch (final URISyntaxException e) {
-            throw new SmartUriException("Unable to get create URI for type", e);
-        }
-        final Path path = Paths.get(typeUriString);
-        final String shortName = path.getFileName().toString();
+        final String shortName = new URIImpl(type.getData()).getLocalName();
         return shortName;
     }
 
 
     private static String addTypePrefixToUri(final String uriString, final String typePrefix) {
-        int location = StringUtils.lastIndexOf(uriString, "#");
-        if (location == - 1) {
-            location = StringUtils.lastIndexOf(uriString, "/");
-        }
-
-        final String lastSegment = uriString.substring(location + 1);
-
-        final String formattedUriString = uriString.substring(0, location + 1) + typePrefix + lastSegment;
+        final String localName = new URIImpl(uriString).getLocalName();
+        final String beginning = StringUtils.removeEnd(uriString, localName);
+        final String formattedUriString = beginning + typePrefix + localName;
         return formattedUriString;
     }
 
     private static String removeTypePrefixFromUri(final String uriString, final String typePrefix) {
-        int location = StringUtils.lastIndexOf(uriString, "#");
-        if (location == - 1) {
-            location = StringUtils.lastIndexOf(uriString, "/");
-        }
-
-        final String lastSegment = uriString.substring(location + 1);
-        final String replacement = lastSegment.replaceFirst(typePrefix + ".", "");
-
-        final String formattedUriString = uriString.substring(0, location + 1) + replacement;
+        final String localName = new URIImpl(uriString).getLocalName();
+        final String beginning = StringUtils.removeEnd(uriString, localName);
+        final String replacement = localName.replaceFirst(typePrefix + ".", "");
+        final String formattedUriString = beginning + replacement;
         return formattedUriString;
     }
 
@@ -310,7 +291,7 @@ public class SmartUriAdapter {
 
                 String formattedKey = key.getData();
                 if (StringUtils.isNotBlank(typeShortName)) {
-                    formattedKey = addTypePrefixToUri(key.getData(), typeShortName);
+                    formattedKey = addTypePrefixToUri(formattedKey, typeShortName);
                 }
                 final URI uri = new URIImpl(formattedKey);
                 objectMap.put(uri, value);
@@ -343,7 +324,7 @@ public class SmartUriAdapter {
             if (fragmentPosition > -1) {
                 uriBuilder = new URIBuilder(new java.net.URI("urn://" + fragment));
             } else {
-                uriBuilder = new URIBuilder(new java.net.URI(subjectData));
+                uriBuilder = new URIBuilder(new java.net.URI(subjectData.replaceFirst(":", "://")));
             }
         } catch (final URISyntaxException e) {
             throw new SmartUriException("Unable to serialize a Smart URI from the provided properties", e);
@@ -447,55 +428,6 @@ public class SmartUriAdapter {
         return map;
     }
 
-//    public static Map<URI, Value> deserializeUri(final URI uri) throws SmartUriException {
-//        final String uriString = uri.stringValue();
-//        final int fragmentPosition = uriString.indexOf("#");
-//        String prefix = uriString.substring(0, fragmentPosition + 1);
-//        if (fragmentPosition == -1) {
-//            prefix = uriString.split("\\?", 2)[0];
-//        }
-//        final String fragment = uriString.substring(fragmentPosition + 1, uriString.length());
-//        java.net.URI queryUri;
-//
-//        URIBuilder uriBuilder = null;
-//        try {
-//             if (fragmentPosition > -1) {
-//                 queryUri = new java.net.URI("urn://" + fragment);
-//             } else {
-//                 queryUri = new java.net.URI(uriString);
-//             }
-//            uriBuilder = new URIBuilder(queryUri);
-//        } catch (final URISyntaxException e) {
-//            throw new SmartUriException("Unable to deserialize Smart URI", e);
-//        }
-//        final Map<URI, Value> map = new HashMap<>();
-//        final List<NameValuePair> parameters = uriBuilder.getQueryParams();
-//        Map<RyaURI, String> entityTypeMap = new LinkedHashMap<>();
-//        for (final NameValuePair pair : parameters) {
-//            final String keyString = pair.getName();
-//            final String valueString = pair.getValue();
-//
-//            final URI keyUri = new URIImpl(prefix + keyString);
-//            final URI type = TypeDeterminer.determineType(valueString);
-//            if (type == XMLSchema.ANYURI) {
-//                final String decoded;
-//                try {
-//                    decoded = URLDecoder.decode(valueString, Charsets.UTF_8.name());
-//                } catch (final UnsupportedEncodingException e) {
-//                    throw new SmartUriException("", e);
-//                }
-//                entityTypeMap = convertUriToTypeMap(new URIImpl(decoded));
-//            } else {
-//                final RyaType ryaType = new RyaType(type, valueString);
-//
-//                final Value value = RyaToRdfConversions.convertValue(ryaType);
-//
-//                map.put(keyUri, value);
-//            }
-//        }
-//        return map;
-//    }
-
     public static Entity deserializeUriEntity(final URI uri) throws SmartUriException {
         final String uriString = uri.stringValue();
         final int fragmentPosition = uriString.indexOf("#");

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriException.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriException.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriException.java
index 979b6b9..6ec1e40 100644
--- a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriException.java
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/SmartUriException.java
@@ -28,6 +28,14 @@ public class SmartUriException extends Exception {
     /**
      * Creates a new instance of {@link SmartUriException}.
      * @param message the message to be displayed by the exception.
+     */
+    public SmartUriException(final String message) {
+        super(message);
+    }
+
+    /**
+     * Creates a new instance of {@link SmartUriException}.
+     * @param message the message to be displayed by the exception.
      * @param throwable the source {#link Throwable} cause of the exception.
      */
     public SmartUriException(final String message, final Throwable throwable) {

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/b319365e/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ApproxEqualsDetector.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ApproxEqualsDetector.java b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ApproxEqualsDetector.java
new file mode 100644
index 0000000..c450951
--- /dev/null
+++ b/extras/indexing/src/main/java/org/apache/rya/indexing/smarturi/duplication/ApproxEqualsDetector.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.rya.indexing.smarturi.duplication;
+
+import org.apache.rya.indexing.smarturi.SmartUriException;
+import org.openrdf.model.URI;
+
+/**
+ * Interface for detecting if two objects of type {@code T} are considered
+ * approximately equal to each other.
+ * @param <T> the type of object the implementation of
+ * {@link ApproxEqualsDetector} handles.
+ */
+public interface ApproxEqualsDetector<T> {
+    /**
+     * Checks if two objects are approximately equal.
+     * @param lhs the left hand side object.
+     * @param rhs the right hand side object.
+     * @return {@code true} if the two objects are considered approximately
+     * equals. {@code false} otherwise.
+     */
+    public boolean areObjectsApproxEquals(final T lhs, final T rhs);
+
+    /**
+     * @return the default tolerance for the type.
+     */
+    public Tolerance getDefaultTolerance();
+
+    /**
+     * Converts a string representation of the object into the object
+     * represented by the class {@link #getTypeClass()}.
+     * @param string the {@link String} to convert to an object.
+     * @return the object.
+     * @throws SmartUriException
+     */
+    public T convertStringToObject(final String string) throws SmartUriException;
+
+    /**
+     * @return the object {@link Class} this detector is used for.
+     */
+    public Class<?> getTypeClass();
+
+    /**
+     * @return the {@link URI} for the XML schema type this detector is used
+     * for.
+     */
+    public URI getXmlSchemaUri();
+
+    /**
+     * Checks if two string representations of objects are approximately equal.
+     * @param lhs the left hand side string object representation.
+     * @param rhs the right hand side string object representation.
+     * @return {@code true} if the two string object representations are
+     * considered approximately equals. {@code false} otherwise.
+     * @throws SmartUriException
+     */
+    public default boolean areApproxEquals(final String lhs, final String rhs) throws SmartUriException {
+        final T object1 = convertStringToObject(lhs);
+        final T object2 = convertStringToObject(rhs);
+        return areObjectsApproxEquals(object1, object2);
+    }
+}
\ No newline at end of file