You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by th...@apache.org on 2014/02/05 10:27:23 UTC

svn commit: r1564687 [2/6] - in /jackrabbit/trunk: ./ jackrabbit-aws-ext/ jackrabbit-aws-ext/src/main/java/org/apache/jackrabbit/aws/ext/ jackrabbit-aws-ext/src/main/java/org/apache/jackrabbit/aws/ext/ds/ jackrabbit-aws-ext/src/test/java/org/apache/jac...

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+
+
+
+/**
+ * The interface defines the backend which can be plugged into
+ * {@link CachingDataStore}.
+ */
+public interface Backend {
+
+    /**
+     * This method initialize backend with the configuration.
+     * 
+     * @param store {@link CachingDataStore}
+     * @param homeDir path of repository home dir.
+     * @param config path of config property file.
+     * @throws DataStoreException
+     */
+    void init(CachingDataStore store, String homeDir, String config)
+            throws DataStoreException;
+
+    /**
+     * Return inputstream of record identified by identifier.
+     * 
+     * @param identifier identifier of record.
+     * @return inputstream of the record.
+     * @throws DataStoreException if record not found or any error.
+     */
+    InputStream read(DataIdentifier identifier) throws DataStoreException;
+
+    /**
+     * Return length of record identified by identifier.
+     * 
+     * @param identifier identifier of record.
+     * @return length of the record.
+     * @throws DataStoreException if record not found or any error.
+     */
+    long getLength(DataIdentifier identifier) throws DataStoreException;
+
+    /**
+     * Return lastModified of record identified by identifier.
+     * 
+     * @param identifier identifier of record.
+     * @return lastModified of the record.
+     * @throws DataStoreException if record not found or any error.
+     */
+    long getLastModified(DataIdentifier identifier) throws DataStoreException;
+
+    /**
+     * Stores file to backend with identifier used as key. If key pre-exists, it
+     * updates the timestamp of the key.
+     * 
+     * @param identifier key of the file 
+     * @param file file that would be stored in backend.
+     * @throws DataStoreException for any error.
+     */
+    void write(DataIdentifier identifier, File file) throws DataStoreException;
+
+    /**
+     * Returns identifiers of all records that exists in backend. 
+     * @return iterator consisting of all identifiers
+     * @throws DataStoreException
+     */
+    Iterator<DataIdentifier> getAllIdentifiers() throws DataStoreException;
+
+    /**
+     * Update timestamp of record identified by identifier if minModifiedDate is
+     * greater than record's lastModified else no op.
+     * 
+     * @throws DataStoreException if record not found.
+     */
+    void touch(DataIdentifier identifier, long minModifiedDate)
+            throws DataStoreException;
+    /**
+     * This method check the existence of record in backend. 
+     * @param identifier identifier to be checked. 
+     * @return true if records exists else false.
+     * @throws DataStoreException
+     */
+    boolean exists(DataIdentifier identifier) throws DataStoreException;
+
+    /**
+     * Close backend and release resources like database connection if any.
+     * @throws DataStoreException
+     */
+    void close() throws DataStoreException;
+
+    /**
+     * Delete all records which are older than timestamp.
+     * @param timestamp
+     * @return list of identifiers which are deleted. 
+     * @throws DataStoreException
+     */
+    List<DataIdentifier> deleteAllOlderThan(long timestamp) throws DataStoreException;
+
+    /**
+     * Delete record identified by identifier. No-op if identifier not found.
+     * @param identifier
+     * @throws DataStoreException
+     */
+    void deleteRecord(DataIdentifier identifier) throws DataStoreException;
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.data;
+
+import java.io.InputStream;
+
+
+/**
+ * CachingDataRecord which stores reference to {@link CachingDataStore}. This
+ * class doesn't store any references to attributes but attributes are fetched
+ * on demand from {@link CachingDataStore}.
+ */
+public class CachingDataRecord extends AbstractDataRecord {
+
+    private final CachingDataStore store;
+
+    public CachingDataRecord(CachingDataStore store, DataIdentifier identifier) {
+        super(store, identifier);
+        this.store = store;
+    }
+
+    @Override
+    public long getLastModified() {
+        try {
+            return store.getLastModified(getIdentifier());
+        } catch (DataStoreException dse) {
+            return 0;
+        }
+    }
+
+    @Override
+    public long getLength() throws DataStoreException {
+        return store.getLength(getIdentifier());
+    }
+
+    @Override
+    public InputStream getStream() throws DataStoreException {
+        return store.getStream(getIdentifier());
+    }
+
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,605 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.lang.ref.WeakReference;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A caching data store that consists of {@link LocalCache} and {@link Backend}.
+ * {@link Backend} is single source of truth. All methods first try to fetch
+ * information from {@link LocalCache}. If record is not available in
+ * {@link LocalCache}, then it is fetched from {@link Backend} and saved to
+ * {@link LocalCache} for further access. This class is designed to work without
+ * {@link LocalCache} and then all information is fetched from {@link Backend}.
+ * To disable {@link LocalCache} set {@link #setCacheSize(long)} to 0. *
+ * Configuration:
+ * 
+ * <pre>
+ * &lt;DataStore class="org.apache.jackrabbit.aws.ext.ds.CachingDataStore">
+ * 
+ *     &lt;param name="{@link #setPath(String) path}" value="/data/datastore"/>
+ *     &lt;param name="{@link #setConfig(String) config}" value="${rep.home}/backend.properties"/>
+ *     &lt;param name="{@link #setCacheSize(long) cacheSize}" value="68719476736"/>
+ *     &lt;param name="{@link #setSecret(String) secret}" value="123456"/>
+ *     &lt;param name="{@link #setCachePurgeTrigFactor(double)}" value="0.95d"/>
+ *     &lt;param name="{@link #setCachePurgeResizeFactor(double) cacheSize}" value="0.85d"/>
+ *     &lt;param name="{@link #setMinRecordLength(int) minRecordLength}" value="1024"/>
+ * &lt/DataStore>
+ */
+public abstract class CachingDataStore extends AbstractDataStore implements
+        MultiDataStoreAware {
+
+    /**
+     * Logger instance.
+     */
+    private static final Logger LOG = LoggerFactory.getLogger(CachingDataStore.class);
+
+    /**
+     * The digest algorithm used to uniquely identify records.
+     */
+    private static final String DIGEST = "SHA-1";
+
+    private static final String DS_STORE = ".DS_Store";
+
+    /**
+     * Name of the directory used for temporary files. Must be at least 3
+     * characters.
+     */
+    private static final String TMP = "tmp";
+
+    /**
+     * All data identifiers that are currently in use are in this set until they
+     * are garbage collected.
+     */
+    protected Map<DataIdentifier, WeakReference<DataIdentifier>> inUse =
+            Collections.synchronizedMap(new WeakHashMap<DataIdentifier,
+                    WeakReference<DataIdentifier>>());
+
+    protected Backend backend;
+
+    /**
+     * The minimum size of an object that should be stored in this data store.
+     */
+    private int minRecordLength = 16 * 1024;
+
+    private String path;
+
+    private File directory;
+
+    private File tmpDir;
+
+    private String secret;
+
+    /**
+     * The optional backend configuration.
+     */
+    private String config;
+
+    /**
+     * The minimum modified date. If a file is accessed (read or write) with a
+     * modified date older than this value, the modified date is updated to the
+     * current time.
+     */
+    private long minModifiedDate;
+
+    /**
+     * Cache purge trigger factor. Cache will undergo in auto-purge mode if
+     * cache current size is greater than cachePurgeTrigFactor * cacheSize
+     */
+    private double cachePurgeTrigFactor = 0.95d;
+
+    /**
+     * Cache resize factor. After auto-purge mode, cache current size would just
+     * greater than cachePurgeResizeFactor * cacheSize cacheSize
+     */
+    private double cachePurgeResizeFactor = 0.85d;
+
+    /**
+     * The number of bytes in the cache. The default value is 64 GB.
+     */
+    private long cacheSize = 64L * 1024 * 1024 * 1024;
+
+    /**
+     * The local file system cache.
+     */
+    private LocalCache cache;
+
+    protected abstract Backend createBackend();
+
+    protected abstract String getMarkerFile();
+
+    /**
+     * Initialized the data store. If the path is not set, &lt;repository
+     * home&gt;/repository/datastore is used. This directory is automatically
+     * created if it does not yet exist. During first initialization, it upload
+     * all files from local datastore to backed and local datastore act as a
+     * local cache.
+     */
+    @Override
+    public void init(String homeDir) throws RepositoryException {
+        if (path == null) {
+            path = homeDir + "/repository/datastore";
+        }
+        directory = new File(path);
+        try {
+            mkdirs(directory);
+        } catch (IOException e) {
+            throw new DataStoreException("Could not create directory "
+                    + directory.getAbsolutePath(), e);
+        }
+        tmpDir = new File(homeDir, "/repository/s3tmp");
+        try {
+            if (!mkdirs(tmpDir)) {
+                FileUtils.cleanDirectory(tmpDir);
+                LOG.info("tmp = " + tmpDir.getPath() + " cleaned");
+            }
+        } catch (IOException e) {
+            throw new DataStoreException("Could not create directory "
+                    + tmpDir.getAbsolutePath(), e);
+        }
+        LOG.info("cachePurgeTrigFactor = " + cachePurgeTrigFactor
+                + ", cachePurgeResizeFactor = " + cachePurgeResizeFactor);
+        backend = createBackend();
+        backend.init(this, path, config);
+        String markerFileName = getMarkerFile();
+        if (markerFileName != null) {
+            // create marker file in homeDir to avoid deletion in cache cleanup.
+            File markerFile = new File(homeDir, markerFileName);
+            if (!markerFile.exists()) {
+                LOG.info("load files from local cache");
+                loadFilesFromCache();
+                try {
+                    markerFile.createNewFile();
+                } catch (IOException e) {
+                    throw new DataStoreException(
+                            "Could not create marker file "
+                                    + markerFile.getAbsolutePath(), e);
+                }
+            } else {
+                LOG.info("marker file = " + markerFile.getAbsolutePath()
+                        + " exists");
+            }
+        }
+        cache = new LocalCache(path, tmpDir.getAbsolutePath(), cacheSize,
+                cachePurgeTrigFactor, cachePurgeResizeFactor);
+    }
+
+    /**
+     * Creates a new data record in {@link Backend}. The stream is first
+     * consumed and the contents are saved in a temporary file and the SHA-1
+     * message digest of the stream is calculated. If a record with the same
+     * SHA-1 digest (and length) is found then it is returned. Otherwise new
+     * record is created in {@link Backend} and the temporary file is moved in
+     * place to {@link LocalCache}.
+     * 
+     * @param input
+     *            binary stream
+     * @return {@link CachingDataRecord}
+     * @throws DataStoreException
+     *             if the record could not be created.
+     */
+    @Override
+    public DataRecord addRecord(InputStream input) throws DataStoreException {
+        File temporary = null;
+        try {
+            temporary = newTemporaryFile();
+            DataIdentifier tempId = new DataIdentifier(temporary.getName());
+            usesIdentifier(tempId);
+            // Copy the stream to the temporary file and calculate the
+            // stream length and the message digest of the stream
+            MessageDigest digest = MessageDigest.getInstance(DIGEST);
+            OutputStream output = new DigestOutputStream(new FileOutputStream(
+                    temporary), digest);
+            try {
+                IOUtils.copyLarge(input, output);
+            } finally {
+                output.close();
+            }
+            DataIdentifier identifier = new DataIdentifier(
+                    encodeHexString(digest.digest()));
+            synchronized (this) {
+                usesIdentifier(identifier);
+                backend.write(identifier, temporary);
+                String fileName = getFileName(identifier);
+                cache.store(fileName, temporary);
+            }
+            // this will also make sure that
+            // tempId is not garbage collected until here
+            inUse.remove(tempId);
+            return new CachingDataRecord(this, identifier);
+        } catch (NoSuchAlgorithmException e) {
+            throw new DataStoreException(DIGEST + " not available", e);
+        } catch (IOException e) {
+            throw new DataStoreException("Could not add record", e);
+        } finally {
+            if (temporary != null) {
+                // try to delete - but it's not a big deal if we can't
+                temporary.delete();
+            }
+        }
+    }
+
+    /**
+     * Get a data record for the given identifier or null it data record doesn't
+     * exist in {@link Backend}
+     * 
+     * @param identifier
+     *            identifier of record.
+     * @return the {@link CachingDataRecord} or null.
+     */
+    @Override
+    public DataRecord getRecordIfStored(DataIdentifier identifier)
+            throws DataStoreException {
+        synchronized (this) {
+            usesIdentifier(identifier);
+            if (!backend.exists(identifier)) {
+                return null;
+            }
+            backend.touch(identifier, minModifiedDate);
+            return new CachingDataRecord(this, identifier);
+        }
+    }
+
+    @Override
+    public void updateModifiedDateOnAccess(long before) {
+        LOG.info("minModifiedDate set to: " + before);
+        minModifiedDate = before;
+    }
+
+    /**
+     * Retrieves all identifiers from {@link Backend}.
+     */
+    @Override
+    public Iterator<DataIdentifier> getAllIdentifiers()
+            throws DataStoreException {
+        return backend.getAllIdentifiers();
+    }
+
+    /**
+     * This method deletes record from {@link Backend} and then from
+     * {@link LocalCache}
+     */
+    @Override
+    public void deleteRecord(DataIdentifier identifier)
+            throws DataStoreException {
+        String fileName = getFileName(identifier);
+        synchronized (this) {
+            backend.deleteRecord(identifier);
+            cache.delete(fileName);
+        }
+    }
+
+    @Override
+    public synchronized int deleteAllOlderThan(long min)
+            throws DataStoreException {
+        List<DataIdentifier> diList = backend.deleteAllOlderThan(min);
+        // remove entries from local cache
+        for (DataIdentifier identifier : diList) {
+            cache.delete(getFileName(identifier));
+        }
+        return diList.size();
+    }
+
+    /**
+     * Get stream of record from {@link LocalCache}. If record is not available
+     * in {@link LocalCache}, this method fetches record from {@link Backend}
+     * and stores it to {@link LocalCache}. Stream is then returned from cached
+     * record.
+     */
+    InputStream getStream(DataIdentifier identifier) throws DataStoreException {
+        InputStream in = null;
+        try {
+            String fileName = getFileName(identifier);
+            InputStream cached = cache.getIfStored(fileName);
+            if (cached != null) {
+                return cached;
+            }
+            in = backend.read(identifier);
+            return cache.store(fileName, in);
+        } catch (IOException e) {
+            throw new DataStoreException("IO Exception: " + identifier, e);
+        } finally {
+            IOUtils.closeQuietly(in);
+        }
+    }
+
+    /**
+     * Return lastModified of record from {@link Backend} assuming
+     * {@link Backend} as a single source of truth.
+     */
+    public long getLastModified(DataIdentifier identifier) throws DataStoreException {
+        LOG.info("accessed lastModified");
+        return backend.getLastModified(identifier);
+    }
+
+    /**
+     * Return the length of record from {@link LocalCache} if available,
+     * otherwise retrieve it from {@link Backend}.
+     */
+    public long getLength(DataIdentifier identifier) throws DataStoreException {
+        String fileName = getFileName(identifier);
+        Long length = cache.getFileLength(fileName);
+        if (length != null) {
+            return length.longValue();
+        }
+        return backend.getLength(identifier);
+    }
+
+    @Override
+    protected byte[] getOrCreateReferenceKey() throws DataStoreException {
+        try {
+            return secret.getBytes("UTF-8");
+        } catch (UnsupportedEncodingException e) {
+            throw new DataStoreException(e);
+        }
+    }
+
+    /**
+     * Returns a unique temporary file to be used for creating a new data
+     * record.
+     */
+    private File newTemporaryFile() throws IOException {
+        return File.createTempFile(TMP, null, tmpDir);
+    }
+
+    /**
+     * Load files from {@link LocalCache} to {@link Backend}.
+     */
+    private void loadFilesFromCache() throws RepositoryException {
+        ArrayList<File> files = new ArrayList<File>();
+        listRecursive(files, directory);
+        long totalSize = 0;
+        for (File f : files) {
+            totalSize += f.length();
+        }
+        long currentSize = 0;
+        long time = System.currentTimeMillis();
+        for (File f : files) {
+            long now = System.currentTimeMillis();
+            if (now > time + 5000) {
+                LOG.info("Uploaded {" + currentSize + "}/{" + totalSize + "}");
+                time = now;
+            }
+            currentSize += f.length();
+            String name = f.getName();
+            LOG.debug("upload file = " + name);
+            if (!name.startsWith(TMP) && !name.endsWith(DS_STORE)
+                    && f.length() > 0) {
+                loadFileToBackEnd(f);
+            }
+        }
+        LOG.info("Uploaded {" + currentSize + "}/{" + totalSize + "}");
+    }
+
+    /**
+     * Traverse recursively and populate list with files.
+     */
+    private void listRecursive(List<File> list, File file) {
+        File[] files = file.listFiles();
+        if (files != null) {
+            for (File f : files) {
+                if (f.isDirectory()) {
+                    listRecursive(list, f);
+                } else {
+                    list.add(f);
+                }
+            }
+        }
+    }
+
+    /**
+     * Upload file from {@link LocalCache} to {@link Backend}.
+     * 
+     * @param f
+     *            file to uploaded.
+     * @throws DataStoreException
+     */
+    private void loadFileToBackEnd(File f) throws DataStoreException {
+        DataIdentifier identifier = new DataIdentifier(f.getName());
+        usesIdentifier(identifier);
+        backend.write(identifier, f);
+        LOG.debug(f.getName() + "uploaded.");
+
+    }
+
+    /**
+     * Derive file name from identifier.
+     */
+    private static String getFileName(DataIdentifier identifier) {
+        String name = identifier.toString();
+        name = name.substring(0, 2) + "/" + name.substring(2, 4) + "/"
+                + name.substring(4, 6) + "/" + name;
+        return name;
+    }
+
+    private void usesIdentifier(DataIdentifier identifier) {
+        inUse.put(identifier, new WeakReference<DataIdentifier>(identifier));
+    }
+
+    private static boolean mkdirs(File dir) throws IOException {
+        if (dir.exists()) {
+            if (dir.isFile()) {
+                throw new IOException("Can not create a directory "
+                        + "because a file exists with the same name: "
+                        + dir.getAbsolutePath());
+            }
+            return false;
+        }
+        boolean created = dir.mkdirs();
+        if (!created) {
+            throw new IOException("Could not create directory: "
+                    + dir.getAbsolutePath());
+        }
+        return created;
+    }
+
+    @Override
+    public void clearInUse() {
+        inUse.clear();
+    }
+
+    public boolean isInUse(DataIdentifier identifier) {
+        return inUse.containsKey(identifier);
+    }
+
+    @Override
+    public void close() throws DataStoreException {
+        cache.close();
+        backend.close();
+        cache = null;
+    }
+
+    /**
+     * Setter for configuration based secret
+     * 
+     * @param secret
+     *            the secret used to sign reference binaries
+     */
+    public void setSecret(String secret) {
+        this.secret = secret;
+    }
+
+    /**
+     * Set the minimum object length.
+     * 
+     * @param minRecordLength
+     *            the length
+     */
+    public void setMinRecordLength(int minRecordLength) {
+        this.minRecordLength = minRecordLength;
+    }
+
+    /**
+     * Return mininum object length.
+     */
+    @Override
+    public int getMinRecordLength() {
+        return minRecordLength;
+    }
+
+    /**
+     * Return path of configuration properties.
+     * 
+     * @return path of configuration properties.
+     */
+    public String getConfig() {
+        return config;
+    }
+
+    /**
+     * Set the configuration properties path.
+     * 
+     * @param config
+     *            path of configuration properties.
+     */
+    public void setConfig(String config) {
+        this.config = config;
+    }
+
+    /**
+     * @return size of {@link LocalCache}.
+     */
+    public long getCacheSize() {
+        return cacheSize;
+    }
+
+    /**
+     * Set size of {@link LocalCache}.
+     * 
+     * @param cacheSize
+     *            size of {@link LocalCache}.
+     */
+    public void setCacheSize(long cacheSize) {
+        this.cacheSize = cacheSize;
+    }
+
+    /**
+     * 
+     * @return path of {@link LocalCache}.
+     */
+    public String getPath() {
+        return path;
+    }
+
+    /**
+     * Set path of {@link LocalCache}.
+     * 
+     * @param path
+     *            of {@link LocalCache}.
+     */
+    public void setPath(String path) {
+        this.path = path;
+    }
+
+    /**
+     * @return Purge trigger factor of {@link LocalCache}.
+     */
+    public double getCachePurgeTrigFactor() {
+        return cachePurgeTrigFactor;
+    }
+
+    /**
+     * Set purge trigger factor of {@link LocalCache}.
+     * 
+     * @param cachePurgeTrigFactor
+     *            purge trigger factor.
+     */
+    public void setCachePurgeTrigFactor(double cachePurgeTrigFactor) {
+        this.cachePurgeTrigFactor = cachePurgeTrigFactor;
+    }
+
+    /**
+     * @return Purge resize factor of {@link LocalCache}.
+     */
+    public double getCachePurgeResizeFactor() {
+        return cachePurgeResizeFactor;
+    }
+
+    /**
+     * Set purge resize factor of {@link LocalCache}.
+     * 
+     * @param cachePurgeResizeFactor
+     *            purge resize factor.
+     */
+    public void setCachePurgeResizeFactor(double cachePurgeResizeFactor) {
+        this.cachePurgeResizeFactor = cachePurgeResizeFactor;
+    }
+
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.Serializable;
+
+/**
+ * Opaque data identifier used to identify records in a data store.
+ * All identifiers must be serializable and implement the standard
+ * object equality and hash code methods.
+ */
+public class DataIdentifier implements Serializable {
+
+    /**
+     * Serial version UID.
+     */
+    private static final long serialVersionUID = -9197191401131100016L;
+
+    /**
+     * Data identifier.
+     */
+    private final String identifier;
+
+    /**
+     * Creates a data identifier from the given string.
+     *
+     * @param identifier data identifier
+     */
+    public DataIdentifier(String identifier) {
+        this.identifier  = identifier;
+    }
+
+    //-------------------------------------------------------------< Object >
+
+    /**
+     * Returns the identifier string.
+     *
+     * @return identifier string
+     */
+    public String toString() {
+        return identifier;
+    }
+
+    /**
+     * Checks if the given object is a data identifier and has the same
+     * string representation as this one.
+     *
+     * @param object other object
+     * @return <code>true</code> if the given object is the same identifier,
+     *         <code>false</code> otherwise
+     */
+    public boolean equals(Object object) {
+        return (object instanceof DataIdentifier)
+            && identifier.equals(object.toString());
+    }
+
+    /**
+     * Returns the hash code of the identifier string.
+     *
+     * @return hash code
+     */
+    public int hashCode() {
+        return identifier.hashCode();
+    }
+
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.InputStream;
+
+/**
+ * Immutable data record that consists of a binary stream.
+ */
+public interface DataRecord {
+
+    /**
+     * Returns the identifier of this record.
+     *
+     * @return data identifier
+     */
+    DataIdentifier getIdentifier();
+
+    /**
+     * Returns a secure reference to this binary, or {@code null} if no such
+     * reference is available.
+     *
+     * @return binary reference, or {@code null}
+     */
+    String getReference();
+
+    /**
+     * Returns the length of the binary stream in this record.
+     *
+     * @return length of the binary stream
+     * @throws DataStoreException if the record could not be accessed
+     */
+    long getLength() throws DataStoreException;
+
+    /**
+     * Returns the the binary stream in this record.
+     *
+     * @return binary stream
+     * @throws DataStoreException if the record could not be accessed
+     */
+    InputStream getStream() throws DataStoreException;
+
+    /**
+     * Returns the last modified of the record.
+     * 
+     * @return last modified time of the binary stream
+     */
+    long getLastModified();
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.InputStream;
+import java.util.Iterator;
+
+import javax.jcr.RepositoryException;
+
+/**
+ * Append-only store for binary streams. A data store consists of a number
+ * of identifiable data records that each contain a distinct binary stream.
+ * New binary streams can be added to the data store, but existing streams
+ * are never removed or modified.
+ * <p>
+ * A data store should be fully thread-safe, i.e. it should be possible to
+ * add and access data records concurrently. Optimally even separate processes
+ * should be able to concurrently access the data store with zero interprocess
+ * synchronization.
+ */
+public interface DataStore {
+
+    /**
+     * Check if a record for the given identifier exists, and return it if yes.
+     * If no record exists, this method returns null.
+     * 
+     * @param identifier data identifier
+     * @return the record if found, and null if not
+     * @throws DataStoreException if the data store could not be accessed
+     */
+    DataRecord getRecordIfStored(DataIdentifier identifier)
+            throws DataStoreException;
+
+    /**
+     * Returns the identified data record. The given identifier should be
+     * the identifier of a previously saved data record. Since records are
+     * never removed, there should never be cases where the identified record
+     * is not found. Abnormal cases like that are treated as errors and
+     * handled by throwing an exception.
+     *
+     * @param identifier data identifier
+     * @return identified data record
+     * @throws DataStoreException if the data store could not be accessed,
+     *                     or if the given identifier is invalid
+     */
+    DataRecord getRecord(DataIdentifier identifier) throws DataStoreException;
+
+    /**
+     * Returns the record that matches the given binary reference.
+     * Returns {@code null} if the reference is invalid, for example if it
+     * points to a record that does not exist.
+     *
+     * @param reference binary reference
+     * @return matching record, or {@code null}
+     * @throws DataStoreException if the data store could not be accessed
+     */
+    DataRecord getRecordFromReference(String reference)
+        throws DataStoreException;
+
+    /**
+     * Creates a new data record. The given binary stream is consumed and
+     * a binary record containing the consumed stream is created and returned.
+     * If the same stream already exists in another record, then that record
+     * is returned instead of creating a new one.
+     * <p>
+     * The given stream is consumed and <strong>not closed</strong> by this
+     * method. It is the responsibility of the caller to close the stream.
+     * A typical call pattern would be:
+     * <pre>
+     *     InputStream stream = ...;
+     *     try {
+     *         record = store.addRecord(stream);
+     *     } finally {
+     *         stream.close();
+     *     }
+     * </pre>
+     *
+     * @param stream binary stream
+     * @return data record that contains the given stream
+     * @throws DataStoreException if the data store could not be accessed
+     */
+    DataRecord addRecord(InputStream stream) throws DataStoreException;
+
+    /**
+     * From now on, update the modified date of an object even when accessing it.
+     * Usually, the modified date is only updated when creating a new object,
+     * or when a new link is added to an existing object. When this setting is enabled,
+     * even getLength() will update the modified date.
+     *
+     * @param before - update the modified date to the current time if it is older than this value
+     */
+    void updateModifiedDateOnAccess(long before);
+
+    /**
+     * Delete objects that have a modified date older than the specified date.
+     *
+     * @param min the minimum time
+     * @return the number of data records deleted
+     * @throws DataStoreException
+     */
+    int deleteAllOlderThan(long min) throws DataStoreException;
+
+    /**
+     * Get all identifiers.
+     *
+     * @return an iterator over all DataIdentifier objects
+     * @throws DataStoreException if the list could not be read
+     */
+    Iterator<DataIdentifier> getAllIdentifiers() throws DataStoreException;
+
+    /**
+     * Initialized the data store
+     *
+     * @param homeDir the home directory of the repository
+     * @throws RepositoryException
+     */
+    void init(String homeDir) throws RepositoryException;
+
+    /**
+     * Get the minimum size of an object that should be stored in this data store.
+     * Depending on the overhead and configuration, each store may return a different value.
+     *
+     * @return the minimum size in bytes
+     */
+    int getMinRecordLength();
+
+    /**
+     * Close the data store
+     *
+     * @throws DataStoreException if a problem occurred
+     */
+    void close() throws DataStoreException;
+
+    /**
+     * Clear the in-use list. This is only used for testing to make the the garbage collection
+     * think that objects are no longer in use.
+     */
+    void clearInUse();
+
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import javax.jcr.RepositoryException;
+
+/**
+ * Exception thrown by the Data Store module.
+ */
+public class DataStoreException extends RepositoryException {
+
+    /**
+     * Constructs a new instance of this class with the specified detail
+     * message.
+     *
+     * @param message the detailed message.
+     */
+    public DataStoreException(String message) {
+        super(message);
+    }
+
+    /**
+     * Constructs a new instance of this class with the specified detail
+     * message and root cause.
+     *
+     * @param message the detailed message.
+     * @param cause root failure cause
+     */
+    public DataStoreException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    /**
+     * Constructs a new instance of this class with the specified root cause.
+     *
+     * @param rootCause root failure cause
+     */
+    public DataStoreException(Throwable rootCause) {
+        super(rootCause);
+    }
+
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import javax.jcr.RepositoryException;
+
+
+/**
+ * Factory interface for creating {@link DataStore} instances. Used
+ * to decouple the repository internals from the repository configuration
+ * mechanism.
+ *
+ * @since Jackrabbit 1.5
+ * @see <a href="https://issues.apache.org/jira/browse/JCR-1438">JCR-1438</a>
+ */
+public interface DataStoreFactory {
+
+    /**
+     * Creates, initializes, and returns a {@link DataStore} instance
+     * for use by the repository. Note that no information is passed from
+     * the client, so all required configuration information must be
+     * encapsulated in the factory.
+     *
+     * @return initialized data store
+     * @throws RepositoryException if the data store can not be created
+     */
+    DataStore getDataStore() throws RepositoryException;
+
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+
+/**
+ * Data record that is based on a normal file.
+ */
+public class FileDataRecord extends AbstractDataRecord {
+
+    /**
+     * The file that contains the binary stream.
+     */
+    private final File file;
+
+    /**
+     * Creates a data record based on the given identifier and file.
+     *
+     * @param identifier data identifier
+     * @param file file that contains the binary stream
+     */
+    public FileDataRecord(
+            AbstractDataStore store, DataIdentifier identifier, File file) {
+        super(store, identifier);
+        assert file.isFile();
+        this.file = file;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public long getLength() {
+        return file.length();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public InputStream getStream() throws DataStoreException {
+        try {
+            return new LazyFileInputStream(file);
+        } catch (IOException e) {
+            throw new DataStoreException("Error opening input stream of " + file.getAbsolutePath(), e);
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public long getLastModified() {
+        return file.lastModified();
+    }
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,481 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+import java.lang.ref.WeakReference;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Simple file-based data store. Data records are stored as normal files
+ * named using a message digest of the contained binary stream.
+ *
+ * Configuration:
+ * <pre>
+ * &lt;DataStore class="org.apache.jackrabbit.core.data.FileDataStore">
+ *     &lt;param name="{@link #setPath(String) path}" value="/data/datastore"/>
+ *     &lt;param name="{@link #setMinRecordLength(int) minRecordLength}" value="1024"/>
+ * &lt/DataStore>
+ * </pre>
+ * <p>
+ * If the directory is not set, the directory &lt;repository home&gt;/repository/datastore is used.
+ * <p>
+ * A three level directory structure is used to avoid placing too many
+ * files in a single directory. The chosen structure is designed to scale
+ * up to billions of distinct records.
+ * <p>
+ * This implementation relies on the underlying file system to support
+ * atomic O(1) move operations with {@link File#renameTo(File)}.
+ */
+public class FileDataStore extends AbstractDataStore
+        implements MultiDataStoreAware {
+
+    /**
+     * Logger instance
+     */
+    private static Logger log = LoggerFactory.getLogger(FileDataStore.class);
+
+    /**
+     * The digest algorithm used to uniquely identify records.
+     */
+    private static final String DIGEST = "SHA-1";
+
+    /**
+     * The default value for the minimum object size.
+     */
+    private static final int DEFAULT_MIN_RECORD_LENGTH = 100;
+
+    /**
+     * The maximum last modified time resolution of the file system.
+     */
+    private static final int ACCESS_TIME_RESOLUTION = 2000;
+
+    /**
+     * Name of the directory used for temporary files.
+     * Must be at least 3 characters.
+     */
+    private static final String TMP = "tmp";
+
+    /**
+     * The minimum modified date. If a file is accessed (read or write) with a modified date
+     * older than this value, the modified date is updated to the current time.
+     */
+    private long minModifiedDate;
+
+    /**
+     * The directory that contains all the data record files. The structure
+     * of content within this directory is controlled by this class.
+     */
+    private File directory;
+
+    /**
+     * The name of the directory that contains all the data record files. The structure
+     * of content within this directory is controlled by this class.
+     */
+    private String path;
+
+    /**
+     * The minimum size of an object that should be stored in this data store.
+     */
+    private int minRecordLength = DEFAULT_MIN_RECORD_LENGTH;
+
+    /**
+     * All data identifiers that are currently in use are in this set until they are garbage collected.
+     */
+    protected Map<DataIdentifier, WeakReference<DataIdentifier>> inUse =
+        Collections.synchronizedMap(new WeakHashMap<DataIdentifier, WeakReference<DataIdentifier>>());
+
+    /**
+     * Initialized the data store.
+     * If the path is not set, &lt;repository home&gt;/repository/datastore is used.
+     * This directory is automatically created if it does not yet exist.
+     *
+     * @param homeDir
+     */
+    public void init(String homeDir) {
+        if (path == null) {
+            path = homeDir + "/repository/datastore";
+        }
+        directory = new File(path);
+        directory.mkdirs();
+    }
+
+    /**
+     * Get a data record for the given identifier.
+     *
+     * @param identifier the identifier
+     * @return the data record or null
+     */
+    public DataRecord getRecordIfStored(DataIdentifier identifier) throws DataStoreException {
+        File file = getFile(identifier);
+        synchronized (this) {
+            if (!file.exists()) {
+                return null;
+            }
+            if (minModifiedDate != 0) {
+                // only check when running garbage collection
+                if (getLastModified(file) < minModifiedDate) {
+                    setLastModified(file, System.currentTimeMillis() + ACCESS_TIME_RESOLUTION);
+                }
+            }
+            usesIdentifier(identifier);
+            return new FileDataRecord(this, identifier, file);
+        }
+    }
+
+    private void usesIdentifier(DataIdentifier identifier) {
+        inUse.put(identifier, new WeakReference<DataIdentifier>(identifier));
+    }
+
+    /**
+     * Creates a new data record.
+     * The stream is first consumed and the contents are saved in a temporary file
+     * and the SHA-1 message digest of the stream is calculated. If a
+     * record with the same SHA-1 digest (and length) is found then it is
+     * returned. Otherwise the temporary file is moved in place to become
+     * the new data record that gets returned.
+     *
+     * @param input binary stream
+     * @return data record that contains the given stream
+     * @throws DataStoreException if the record could not be created
+     */
+    public DataRecord addRecord(InputStream input) throws DataStoreException {
+        File temporary = null;
+        try {
+            temporary = newTemporaryFile();
+            DataIdentifier tempId = new DataIdentifier(temporary.getName());
+            usesIdentifier(tempId);
+            // Copy the stream to the temporary file and calculate the
+            // stream length and the message digest of the stream
+            long length = 0;
+            MessageDigest digest = MessageDigest.getInstance(DIGEST);
+            OutputStream output = new DigestOutputStream(
+                    new FileOutputStream(temporary), digest);
+            try {
+                length = IOUtils.copyLarge(input, output);
+            } finally {
+                output.close();
+            }
+            DataIdentifier identifier =
+                    new DataIdentifier(encodeHexString(digest.digest()));
+            File file;
+
+            synchronized (this) {
+                // Check if the same record already exists, or
+                // move the temporary file in place if needed
+                usesIdentifier(identifier);
+                file = getFile(identifier);
+                if (!file.exists()) {
+                    File parent = file.getParentFile();
+                    parent.mkdirs();
+                    if (temporary.renameTo(file)) {
+                        // no longer need to delete the temporary file
+                        temporary = null;
+                    } else {
+                        throw new IOException(
+                                "Can not rename " + temporary.getAbsolutePath()
+                                + " to " + file.getAbsolutePath()
+                                + " (media read only?)");
+                    }
+                } else {
+                    long now = System.currentTimeMillis();
+                    if (getLastModified(file) < now + ACCESS_TIME_RESOLUTION) {
+                        setLastModified(file, now + ACCESS_TIME_RESOLUTION);
+                    }
+                }
+                if (file.length() != length) {
+                    // Sanity checks on the record file. These should never fail,
+                    // but better safe than sorry...
+                    if (!file.isFile()) {
+                        throw new IOException("Not a file: " + file);
+                    }
+                    throw new IOException(DIGEST + " collision: " + file);
+                }
+            }
+            // this will also make sure that
+            // tempId is not garbage collected until here
+            inUse.remove(tempId);
+            return new FileDataRecord(this, identifier, file);
+        } catch (NoSuchAlgorithmException e) {
+            throw new DataStoreException(DIGEST + " not available", e);
+        } catch (IOException e) {
+            throw new DataStoreException("Could not add record", e);
+        } finally {
+            if (temporary != null) {
+                temporary.delete();
+            }
+        }
+    }
+
+    /**
+     * Returns the identified file. This method implements the pattern
+     * used to avoid problems with too many files in a single directory.
+     * <p>
+     * No sanity checks are performed on the given identifier.
+     *
+     * @param identifier data identifier
+     * @return identified file
+     */
+    private File getFile(DataIdentifier identifier) {
+        usesIdentifier(identifier);
+        String string = identifier.toString();
+        File file = directory;
+        file = new File(file, string.substring(0, 2));
+        file = new File(file, string.substring(2, 4));
+        file = new File(file, string.substring(4, 6));
+        return new File(file, string);
+    }
+
+    /**
+     * Returns a unique temporary file to be used for creating a new
+     * data record.
+     *
+     * @return temporary file
+     * @throws IOException
+     */
+    private File newTemporaryFile() throws IOException {
+        // the directory is already created in the init method
+        return File.createTempFile(TMP, null, directory);
+    }
+
+    public void updateModifiedDateOnAccess(long before) {
+        minModifiedDate = before;
+    }
+
+    public void deleteRecord(DataIdentifier identifier)
+			throws DataStoreException {
+        File file = getFile(identifier);
+        synchronized (this) {
+            if (file.exists()) {
+                if (!file.delete()) {
+                    log.warn("Failed to delete file " + file.getAbsolutePath());
+                }
+            }
+        }
+	}
+
+    public int deleteAllOlderThan(long min) {
+        int count = 0;
+        for (File file : directory.listFiles()) {
+            if (file.isDirectory()) { // skip top-level files
+                count += deleteOlderRecursive(file, min);
+            }
+        }
+        return count;
+    }
+
+    private int deleteOlderRecursive(File file, long min) {
+        int count = 0;
+        if (file.isFile() && file.exists() && file.canWrite()) {
+            synchronized (this) {
+                long lastModified;
+                try {
+                    lastModified = getLastModified(file);
+                } catch (DataStoreException e) {
+                    log.warn("Failed to read modification date; file not deleted", e);
+                    // don't delete the file, since the lastModified date is uncertain
+                    lastModified = min;
+                }
+                if (lastModified < min) {
+                    DataIdentifier id = new DataIdentifier(file.getName());
+                    if (!inUse.containsKey(id)) {
+                        if (log.isInfoEnabled()) {
+                            log.info("Deleting old file " + file.getAbsolutePath() +
+                                    " modified: " + new Timestamp(lastModified).toString() +
+                                    " length: " + file.length());
+                        }
+                        if (!file.delete()) {
+                            log.warn("Failed to delete old file " + file.getAbsolutePath());
+                        }
+                        count++;
+                    }
+                }
+            }
+        } else if (file.isDirectory()) {
+            File[] list = file.listFiles();
+            if (list != null) {
+                for (File f: list) {
+                    count += deleteOlderRecursive(f, min);
+                }
+            }
+
+            // JCR-1396: FileDataStore Garbage Collector and empty directories
+            // Automatic removal of empty directories (but not the root!)
+            synchronized (this) {
+                list = file.listFiles();
+                if (list != null && list.length == 0) {
+                    file.delete();
+                }
+            }
+        }
+        return count;
+    }
+
+    private void listRecursive(List<File> list, File file) {
+        File[] files = file.listFiles();
+        if (files != null) {
+            for (File f : files) {
+                if (f.isDirectory()) {
+                    listRecursive(list, f);
+                } else {
+                    list.add(f);
+                }
+            }
+        }
+    }
+
+    public Iterator<DataIdentifier> getAllIdentifiers() {
+        ArrayList<File> files = new ArrayList<File>();
+        for (File file : directory.listFiles()) {
+            if (file.isDirectory()) { // skip top-level files
+                listRecursive(files, file);
+            }
+        }
+
+        ArrayList<DataIdentifier> identifiers = new ArrayList<DataIdentifier>();
+        for (File f: files) {
+            String name = f.getName();
+            identifiers.add(new DataIdentifier(name));
+        }
+        log.debug("Found " + identifiers.size() + " identifiers.");
+        return identifiers.iterator();
+    }
+
+    public void clearInUse() {
+        inUse.clear();
+    }
+
+    /**
+     * Get the name of the directory where this data store keeps the files.
+     *
+     * @return the full path name
+     */
+    public String getPath() {
+        return path;
+    }
+
+    /**
+     * Set the name of the directory where this data store keeps the files.
+     *
+     * @param directoryName the path name
+     */
+    public void setPath(String directoryName) {
+        this.path = directoryName;
+    }
+
+    public int getMinRecordLength() {
+        return minRecordLength;
+    }
+
+    /**
+     * Set the minimum object length.
+     *
+     * @param minRecordLength the length
+     */
+    public void setMinRecordLength(int minRecordLength) {
+        this.minRecordLength = minRecordLength;
+    }
+
+    public void close() {
+        // nothing to do
+    }
+
+    //---------------------------------------------------------< protected >--
+
+    @Override
+    protected byte[] getOrCreateReferenceKey() throws DataStoreException {
+        File file = new File(directory, "reference.key");
+        try {
+            if (file.exists()) {
+                return FileUtils.readFileToByteArray(file);
+            } else {
+                byte[] key = super.getOrCreateReferenceKey();
+                FileUtils.writeByteArrayToFile(file, key);
+                return key;
+            }
+        } catch (IOException e) {
+            throw new DataStoreException(
+                    "Unable to access reference key file " + file.getPath(), e);
+        }
+    }
+
+    //-----------------------------------------------------------< private >--
+
+    /**
+     * Get the last modified date of a file.
+     *
+     * @param file the file
+     * @return the last modified date
+     * @throws DataStoreException if reading fails
+     */
+    private static long getLastModified(File file) throws DataStoreException {
+        long lastModified = file.lastModified();
+        if (lastModified == 0) {
+            throw new DataStoreException("Failed to read record modified date: " + file.getAbsolutePath());
+        }
+        return lastModified;
+    }
+
+    /**
+     * Set the last modified date of a file, if the file is writable.
+     *
+     * @param file the file
+     * @param time the new last modified date
+     * @throws DataStoreException if the file is writable but modifying the date fails
+     */
+    private static void setLastModified(File file, long time) throws DataStoreException {
+        if (!file.setLastModified(time)) {
+            if (!file.canWrite()) {
+                // if we can't write to the file, so garbage collection will also not delete it
+                // (read only files or file systems)
+                return;
+            }
+            try {
+                // workaround for Windows: if the file is already open for reading
+                // (in this or another process), then setting the last modified date
+                // doesn't work - see also JCR-2872
+                RandomAccessFile r = new RandomAccessFile(file, "rw");
+                try {
+                    r.setLength(r.length());
+                } finally {
+                    r.close();
+                }
+            } catch (IOException e) {
+                throw new DataStoreException("An IO Exception occurred while trying to set the last modified date: " + file.getAbsolutePath(), e);
+            }
+        }
+    }
+}

Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java Wed Feb  5 09:27:20 2014
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.commons.io.input.AutoCloseInputStream;
+
+/**
+ * This input stream delays opening the file until the first byte is read, and
+ * closes and discards the underlying stream as soon as the end of input has
+ * been reached or when the stream is explicitly closed.
+ */
+public class LazyFileInputStream extends AutoCloseInputStream {
+
+    /**
+     * The file descriptor to use.
+     */
+    protected final FileDescriptor fd;
+
+    /**
+     * The file to read from.
+     */
+    protected final File file;
+
+    /**
+     * True if the input stream was opened. It is also set to true if the stream
+     * was closed without reading (to avoid opening the file after the stream
+     * was closed).
+     */
+    protected boolean opened;
+
+    /**
+     * Creates a new <code>LazyFileInputStream</code> for the given file. If the
+     * file is unreadable, a FileNotFoundException is thrown.
+     * The file is not opened until the first byte is read from the stream.
+     *
+     * @param file the file
+     * @throws java.io.FileNotFoundException
+     */
+    public LazyFileInputStream(File file)
+            throws FileNotFoundException {
+        super(null);
+        if (!file.canRead()) {
+            throw new FileNotFoundException(file.getPath());
+        }
+        this.file = file;
+        this.fd = null;
+    }
+
+    /**
+     * Creates a new <code>LazyFileInputStream</code> for the given file
+     * descriptor.
+     * The file is not opened until the first byte is read from the stream.
+     *
+     * @param fd
+     */
+    public LazyFileInputStream(FileDescriptor fd) {
+        super(null);
+        this.file = null;
+        this.fd = fd;
+    }
+
+    /**
+     * Creates a new <code>LazyFileInputStream</code> for the given file. If the
+     * file is unreadable, a FileNotFoundException is thrown.
+     *
+     * @param name
+     * @throws java.io.FileNotFoundException
+     */
+    public LazyFileInputStream(String name) throws FileNotFoundException {
+        this(new File(name));
+    }
+
+    /**
+     * Open the stream if required.
+     *
+     * @throws java.io.IOException
+     */
+    protected void open() throws IOException {
+        if (!opened) {
+            opened = true;
+            if (fd != null) {
+                in = new FileInputStream(fd);
+            } else {
+                in = new FileInputStream(file);
+            }
+        }
+    }
+
+    public int read() throws IOException {
+        open();
+        return super.read();
+    }
+
+    public int available() throws IOException {
+        open();
+        return super.available();
+    }
+
+    public void close() throws IOException {
+        // make sure the file is not opened afterwards
+        opened = true;
+        
+        // only close the file if it was in fact opened
+        if (in != null) {
+            super.close();
+        }
+    }
+
+    public synchronized void reset() throws IOException {
+        open();
+        super.reset();
+    }
+
+    public boolean markSupported() {
+        try {
+            open();
+        } catch (IOException e) {
+            throw new IllegalStateException(e.toString());
+        }
+        return super.markSupported();
+    }
+
+    public synchronized void mark(int readlimit) {
+        try {
+            open();
+        } catch (IOException e) {
+            throw new IllegalStateException(e.toString());
+        }
+        super.mark(readlimit);
+    }
+
+    public long skip(long n) throws IOException {
+        open();
+        return super.skip(n);
+    }
+
+    public int read(byte[] b) throws IOException {
+        open();
+        return super.read(b, 0, b.length);
+    }
+
+    public int read(byte[] b, int off, int len) throws IOException {
+        open();
+        return super.read(b, off, len);
+    }
+
+}