You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by th...@apache.org on 2014/02/05 10:27:23 UTC
svn commit: r1564687 [2/6] - in /jackrabbit/trunk: ./ jackrabbit-aws-ext/
jackrabbit-aws-ext/src/main/java/org/apache/jackrabbit/aws/ext/
jackrabbit-aws-ext/src/main/java/org/apache/jackrabbit/aws/ext/ds/
jackrabbit-aws-ext/src/test/java/org/apache/jac...
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/Backend.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+
+
+
+/**
+ * The interface defines the backend which can be plugged into
+ * {@link CachingDataStore}.
+ */
+public interface Backend {
+
+ /**
+ * This method initialize backend with the configuration.
+ *
+ * @param store {@link CachingDataStore}
+ * @param homeDir path of repository home dir.
+ * @param config path of config property file.
+ * @throws DataStoreException
+ */
+ void init(CachingDataStore store, String homeDir, String config)
+ throws DataStoreException;
+
+ /**
+ * Return inputstream of record identified by identifier.
+ *
+ * @param identifier identifier of record.
+ * @return inputstream of the record.
+ * @throws DataStoreException if record not found or any error.
+ */
+ InputStream read(DataIdentifier identifier) throws DataStoreException;
+
+ /**
+ * Return length of record identified by identifier.
+ *
+ * @param identifier identifier of record.
+ * @return length of the record.
+ * @throws DataStoreException if record not found or any error.
+ */
+ long getLength(DataIdentifier identifier) throws DataStoreException;
+
+ /**
+ * Return lastModified of record identified by identifier.
+ *
+ * @param identifier identifier of record.
+ * @return lastModified of the record.
+ * @throws DataStoreException if record not found or any error.
+ */
+ long getLastModified(DataIdentifier identifier) throws DataStoreException;
+
+ /**
+ * Stores file to backend with identifier used as key. If key pre-exists, it
+ * updates the timestamp of the key.
+ *
+ * @param identifier key of the file
+ * @param file file that would be stored in backend.
+ * @throws DataStoreException for any error.
+ */
+ void write(DataIdentifier identifier, File file) throws DataStoreException;
+
+ /**
+ * Returns identifiers of all records that exists in backend.
+ * @return iterator consisting of all identifiers
+ * @throws DataStoreException
+ */
+ Iterator<DataIdentifier> getAllIdentifiers() throws DataStoreException;
+
+ /**
+ * Update timestamp of record identified by identifier if minModifiedDate is
+ * greater than record's lastModified else no op.
+ *
+ * @throws DataStoreException if record not found.
+ */
+ void touch(DataIdentifier identifier, long minModifiedDate)
+ throws DataStoreException;
+ /**
+ * This method check the existence of record in backend.
+ * @param identifier identifier to be checked.
+ * @return true if records exists else false.
+ * @throws DataStoreException
+ */
+ boolean exists(DataIdentifier identifier) throws DataStoreException;
+
+ /**
+ * Close backend and release resources like database connection if any.
+ * @throws DataStoreException
+ */
+ void close() throws DataStoreException;
+
+ /**
+ * Delete all records which are older than timestamp.
+ * @param timestamp
+ * @return list of identifiers which are deleted.
+ * @throws DataStoreException
+ */
+ List<DataIdentifier> deleteAllOlderThan(long timestamp) throws DataStoreException;
+
+ /**
+ * Delete record identified by identifier. No-op if identifier not found.
+ * @param identifier
+ * @throws DataStoreException
+ */
+ void deleteRecord(DataIdentifier identifier) throws DataStoreException;
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataRecord.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.data;
+
+import java.io.InputStream;
+
+
+/**
+ * CachingDataRecord which stores reference to {@link CachingDataStore}. This
+ * class doesn't store any references to attributes but attributes are fetched
+ * on demand from {@link CachingDataStore}.
+ */
+public class CachingDataRecord extends AbstractDataRecord {
+
+ private final CachingDataStore store;
+
+ public CachingDataRecord(CachingDataStore store, DataIdentifier identifier) {
+ super(store, identifier);
+ this.store = store;
+ }
+
+ @Override
+ public long getLastModified() {
+ try {
+ return store.getLastModified(getIdentifier());
+ } catch (DataStoreException dse) {
+ return 0;
+ }
+ }
+
+ @Override
+ public long getLength() throws DataStoreException {
+ return store.getLength(getIdentifier());
+ }
+
+ @Override
+ public InputStream getStream() throws DataStoreException {
+ return store.getStream(getIdentifier());
+ }
+
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/CachingDataStore.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,605 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.lang.ref.WeakReference;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A caching data store that consists of {@link LocalCache} and {@link Backend}.
+ * {@link Backend} is single source of truth. All methods first try to fetch
+ * information from {@link LocalCache}. If record is not available in
+ * {@link LocalCache}, then it is fetched from {@link Backend} and saved to
+ * {@link LocalCache} for further access. This class is designed to work without
+ * {@link LocalCache} and then all information is fetched from {@link Backend}.
+ * To disable {@link LocalCache} set {@link #setCacheSize(long)} to 0. *
+ * Configuration:
+ *
+ * <pre>
+ * <DataStore class="org.apache.jackrabbit.aws.ext.ds.CachingDataStore">
+ *
+ * <param name="{@link #setPath(String) path}" value="/data/datastore"/>
+ * <param name="{@link #setConfig(String) config}" value="${rep.home}/backend.properties"/>
+ * <param name="{@link #setCacheSize(long) cacheSize}" value="68719476736"/>
+ * <param name="{@link #setSecret(String) secret}" value="123456"/>
+ * <param name="{@link #setCachePurgeTrigFactor(double)}" value="0.95d"/>
+ * <param name="{@link #setCachePurgeResizeFactor(double) cacheSize}" value="0.85d"/>
+ * <param name="{@link #setMinRecordLength(int) minRecordLength}" value="1024"/>
+ * </DataStore>
+ */
+public abstract class CachingDataStore extends AbstractDataStore implements
+ MultiDataStoreAware {
+
+ /**
+ * Logger instance.
+ */
+ private static final Logger LOG = LoggerFactory.getLogger(CachingDataStore.class);
+
+ /**
+ * The digest algorithm used to uniquely identify records.
+ */
+ private static final String DIGEST = "SHA-1";
+
+ private static final String DS_STORE = ".DS_Store";
+
+ /**
+ * Name of the directory used for temporary files. Must be at least 3
+ * characters.
+ */
+ private static final String TMP = "tmp";
+
+ /**
+ * All data identifiers that are currently in use are in this set until they
+ * are garbage collected.
+ */
+ protected Map<DataIdentifier, WeakReference<DataIdentifier>> inUse =
+ Collections.synchronizedMap(new WeakHashMap<DataIdentifier,
+ WeakReference<DataIdentifier>>());
+
+ protected Backend backend;
+
+ /**
+ * The minimum size of an object that should be stored in this data store.
+ */
+ private int minRecordLength = 16 * 1024;
+
+ private String path;
+
+ private File directory;
+
+ private File tmpDir;
+
+ private String secret;
+
+ /**
+ * The optional backend configuration.
+ */
+ private String config;
+
+ /**
+ * The minimum modified date. If a file is accessed (read or write) with a
+ * modified date older than this value, the modified date is updated to the
+ * current time.
+ */
+ private long minModifiedDate;
+
+ /**
+ * Cache purge trigger factor. Cache will undergo in auto-purge mode if
+ * cache current size is greater than cachePurgeTrigFactor * cacheSize
+ */
+ private double cachePurgeTrigFactor = 0.95d;
+
+ /**
+ * Cache resize factor. After auto-purge mode, cache current size would just
+ * greater than cachePurgeResizeFactor * cacheSize cacheSize
+ */
+ private double cachePurgeResizeFactor = 0.85d;
+
+ /**
+ * The number of bytes in the cache. The default value is 64 GB.
+ */
+ private long cacheSize = 64L * 1024 * 1024 * 1024;
+
+ /**
+ * The local file system cache.
+ */
+ private LocalCache cache;
+
+ protected abstract Backend createBackend();
+
+ protected abstract String getMarkerFile();
+
+ /**
+ * Initialized the data store. If the path is not set, <repository
+ * home>/repository/datastore is used. This directory is automatically
+ * created if it does not yet exist. During first initialization, it upload
+ * all files from local datastore to backed and local datastore act as a
+ * local cache.
+ */
+ @Override
+ public void init(String homeDir) throws RepositoryException {
+ if (path == null) {
+ path = homeDir + "/repository/datastore";
+ }
+ directory = new File(path);
+ try {
+ mkdirs(directory);
+ } catch (IOException e) {
+ throw new DataStoreException("Could not create directory "
+ + directory.getAbsolutePath(), e);
+ }
+ tmpDir = new File(homeDir, "/repository/s3tmp");
+ try {
+ if (!mkdirs(tmpDir)) {
+ FileUtils.cleanDirectory(tmpDir);
+ LOG.info("tmp = " + tmpDir.getPath() + " cleaned");
+ }
+ } catch (IOException e) {
+ throw new DataStoreException("Could not create directory "
+ + tmpDir.getAbsolutePath(), e);
+ }
+ LOG.info("cachePurgeTrigFactor = " + cachePurgeTrigFactor
+ + ", cachePurgeResizeFactor = " + cachePurgeResizeFactor);
+ backend = createBackend();
+ backend.init(this, path, config);
+ String markerFileName = getMarkerFile();
+ if (markerFileName != null) {
+ // create marker file in homeDir to avoid deletion in cache cleanup.
+ File markerFile = new File(homeDir, markerFileName);
+ if (!markerFile.exists()) {
+ LOG.info("load files from local cache");
+ loadFilesFromCache();
+ try {
+ markerFile.createNewFile();
+ } catch (IOException e) {
+ throw new DataStoreException(
+ "Could not create marker file "
+ + markerFile.getAbsolutePath(), e);
+ }
+ } else {
+ LOG.info("marker file = " + markerFile.getAbsolutePath()
+ + " exists");
+ }
+ }
+ cache = new LocalCache(path, tmpDir.getAbsolutePath(), cacheSize,
+ cachePurgeTrigFactor, cachePurgeResizeFactor);
+ }
+
+ /**
+ * Creates a new data record in {@link Backend}. The stream is first
+ * consumed and the contents are saved in a temporary file and the SHA-1
+ * message digest of the stream is calculated. If a record with the same
+ * SHA-1 digest (and length) is found then it is returned. Otherwise new
+ * record is created in {@link Backend} and the temporary file is moved in
+ * place to {@link LocalCache}.
+ *
+ * @param input
+ * binary stream
+ * @return {@link CachingDataRecord}
+ * @throws DataStoreException
+ * if the record could not be created.
+ */
+ @Override
+ public DataRecord addRecord(InputStream input) throws DataStoreException {
+ File temporary = null;
+ try {
+ temporary = newTemporaryFile();
+ DataIdentifier tempId = new DataIdentifier(temporary.getName());
+ usesIdentifier(tempId);
+ // Copy the stream to the temporary file and calculate the
+ // stream length and the message digest of the stream
+ MessageDigest digest = MessageDigest.getInstance(DIGEST);
+ OutputStream output = new DigestOutputStream(new FileOutputStream(
+ temporary), digest);
+ try {
+ IOUtils.copyLarge(input, output);
+ } finally {
+ output.close();
+ }
+ DataIdentifier identifier = new DataIdentifier(
+ encodeHexString(digest.digest()));
+ synchronized (this) {
+ usesIdentifier(identifier);
+ backend.write(identifier, temporary);
+ String fileName = getFileName(identifier);
+ cache.store(fileName, temporary);
+ }
+ // this will also make sure that
+ // tempId is not garbage collected until here
+ inUse.remove(tempId);
+ return new CachingDataRecord(this, identifier);
+ } catch (NoSuchAlgorithmException e) {
+ throw new DataStoreException(DIGEST + " not available", e);
+ } catch (IOException e) {
+ throw new DataStoreException("Could not add record", e);
+ } finally {
+ if (temporary != null) {
+ // try to delete - but it's not a big deal if we can't
+ temporary.delete();
+ }
+ }
+ }
+
+ /**
+ * Get a data record for the given identifier or null it data record doesn't
+ * exist in {@link Backend}
+ *
+ * @param identifier
+ * identifier of record.
+ * @return the {@link CachingDataRecord} or null.
+ */
+ @Override
+ public DataRecord getRecordIfStored(DataIdentifier identifier)
+ throws DataStoreException {
+ synchronized (this) {
+ usesIdentifier(identifier);
+ if (!backend.exists(identifier)) {
+ return null;
+ }
+ backend.touch(identifier, minModifiedDate);
+ return new CachingDataRecord(this, identifier);
+ }
+ }
+
+ @Override
+ public void updateModifiedDateOnAccess(long before) {
+ LOG.info("minModifiedDate set to: " + before);
+ minModifiedDate = before;
+ }
+
+ /**
+ * Retrieves all identifiers from {@link Backend}.
+ */
+ @Override
+ public Iterator<DataIdentifier> getAllIdentifiers()
+ throws DataStoreException {
+ return backend.getAllIdentifiers();
+ }
+
+ /**
+ * This method deletes record from {@link Backend} and then from
+ * {@link LocalCache}
+ */
+ @Override
+ public void deleteRecord(DataIdentifier identifier)
+ throws DataStoreException {
+ String fileName = getFileName(identifier);
+ synchronized (this) {
+ backend.deleteRecord(identifier);
+ cache.delete(fileName);
+ }
+ }
+
+ @Override
+ public synchronized int deleteAllOlderThan(long min)
+ throws DataStoreException {
+ List<DataIdentifier> diList = backend.deleteAllOlderThan(min);
+ // remove entries from local cache
+ for (DataIdentifier identifier : diList) {
+ cache.delete(getFileName(identifier));
+ }
+ return diList.size();
+ }
+
+ /**
+ * Get stream of record from {@link LocalCache}. If record is not available
+ * in {@link LocalCache}, this method fetches record from {@link Backend}
+ * and stores it to {@link LocalCache}. Stream is then returned from cached
+ * record.
+ */
+ InputStream getStream(DataIdentifier identifier) throws DataStoreException {
+ InputStream in = null;
+ try {
+ String fileName = getFileName(identifier);
+ InputStream cached = cache.getIfStored(fileName);
+ if (cached != null) {
+ return cached;
+ }
+ in = backend.read(identifier);
+ return cache.store(fileName, in);
+ } catch (IOException e) {
+ throw new DataStoreException("IO Exception: " + identifier, e);
+ } finally {
+ IOUtils.closeQuietly(in);
+ }
+ }
+
+ /**
+ * Return lastModified of record from {@link Backend} assuming
+ * {@link Backend} as a single source of truth.
+ */
+ public long getLastModified(DataIdentifier identifier) throws DataStoreException {
+ LOG.info("accessed lastModified");
+ return backend.getLastModified(identifier);
+ }
+
+ /**
+ * Return the length of record from {@link LocalCache} if available,
+ * otherwise retrieve it from {@link Backend}.
+ */
+ public long getLength(DataIdentifier identifier) throws DataStoreException {
+ String fileName = getFileName(identifier);
+ Long length = cache.getFileLength(fileName);
+ if (length != null) {
+ return length.longValue();
+ }
+ return backend.getLength(identifier);
+ }
+
+ @Override
+ protected byte[] getOrCreateReferenceKey() throws DataStoreException {
+ try {
+ return secret.getBytes("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new DataStoreException(e);
+ }
+ }
+
+ /**
+ * Returns a unique temporary file to be used for creating a new data
+ * record.
+ */
+ private File newTemporaryFile() throws IOException {
+ return File.createTempFile(TMP, null, tmpDir);
+ }
+
+ /**
+ * Load files from {@link LocalCache} to {@link Backend}.
+ */
+ private void loadFilesFromCache() throws RepositoryException {
+ ArrayList<File> files = new ArrayList<File>();
+ listRecursive(files, directory);
+ long totalSize = 0;
+ for (File f : files) {
+ totalSize += f.length();
+ }
+ long currentSize = 0;
+ long time = System.currentTimeMillis();
+ for (File f : files) {
+ long now = System.currentTimeMillis();
+ if (now > time + 5000) {
+ LOG.info("Uploaded {" + currentSize + "}/{" + totalSize + "}");
+ time = now;
+ }
+ currentSize += f.length();
+ String name = f.getName();
+ LOG.debug("upload file = " + name);
+ if (!name.startsWith(TMP) && !name.endsWith(DS_STORE)
+ && f.length() > 0) {
+ loadFileToBackEnd(f);
+ }
+ }
+ LOG.info("Uploaded {" + currentSize + "}/{" + totalSize + "}");
+ }
+
+ /**
+ * Traverse recursively and populate list with files.
+ */
+ private void listRecursive(List<File> list, File file) {
+ File[] files = file.listFiles();
+ if (files != null) {
+ for (File f : files) {
+ if (f.isDirectory()) {
+ listRecursive(list, f);
+ } else {
+ list.add(f);
+ }
+ }
+ }
+ }
+
+ /**
+ * Upload file from {@link LocalCache} to {@link Backend}.
+ *
+ * @param f
+ * file to uploaded.
+ * @throws DataStoreException
+ */
+ private void loadFileToBackEnd(File f) throws DataStoreException {
+ DataIdentifier identifier = new DataIdentifier(f.getName());
+ usesIdentifier(identifier);
+ backend.write(identifier, f);
+ LOG.debug(f.getName() + "uploaded.");
+
+ }
+
+ /**
+ * Derive file name from identifier.
+ */
+ private static String getFileName(DataIdentifier identifier) {
+ String name = identifier.toString();
+ name = name.substring(0, 2) + "/" + name.substring(2, 4) + "/"
+ + name.substring(4, 6) + "/" + name;
+ return name;
+ }
+
+ private void usesIdentifier(DataIdentifier identifier) {
+ inUse.put(identifier, new WeakReference<DataIdentifier>(identifier));
+ }
+
+ private static boolean mkdirs(File dir) throws IOException {
+ if (dir.exists()) {
+ if (dir.isFile()) {
+ throw new IOException("Can not create a directory "
+ + "because a file exists with the same name: "
+ + dir.getAbsolutePath());
+ }
+ return false;
+ }
+ boolean created = dir.mkdirs();
+ if (!created) {
+ throw new IOException("Could not create directory: "
+ + dir.getAbsolutePath());
+ }
+ return created;
+ }
+
+ @Override
+ public void clearInUse() {
+ inUse.clear();
+ }
+
+ public boolean isInUse(DataIdentifier identifier) {
+ return inUse.containsKey(identifier);
+ }
+
+ @Override
+ public void close() throws DataStoreException {
+ cache.close();
+ backend.close();
+ cache = null;
+ }
+
+ /**
+ * Setter for configuration based secret
+ *
+ * @param secret
+ * the secret used to sign reference binaries
+ */
+ public void setSecret(String secret) {
+ this.secret = secret;
+ }
+
+ /**
+ * Set the minimum object length.
+ *
+ * @param minRecordLength
+ * the length
+ */
+ public void setMinRecordLength(int minRecordLength) {
+ this.minRecordLength = minRecordLength;
+ }
+
+ /**
+ * Return mininum object length.
+ */
+ @Override
+ public int getMinRecordLength() {
+ return minRecordLength;
+ }
+
+ /**
+ * Return path of configuration properties.
+ *
+ * @return path of configuration properties.
+ */
+ public String getConfig() {
+ return config;
+ }
+
+ /**
+ * Set the configuration properties path.
+ *
+ * @param config
+ * path of configuration properties.
+ */
+ public void setConfig(String config) {
+ this.config = config;
+ }
+
+ /**
+ * @return size of {@link LocalCache}.
+ */
+ public long getCacheSize() {
+ return cacheSize;
+ }
+
+ /**
+ * Set size of {@link LocalCache}.
+ *
+ * @param cacheSize
+ * size of {@link LocalCache}.
+ */
+ public void setCacheSize(long cacheSize) {
+ this.cacheSize = cacheSize;
+ }
+
+ /**
+ *
+ * @return path of {@link LocalCache}.
+ */
+ public String getPath() {
+ return path;
+ }
+
+ /**
+ * Set path of {@link LocalCache}.
+ *
+ * @param path
+ * of {@link LocalCache}.
+ */
+ public void setPath(String path) {
+ this.path = path;
+ }
+
+ /**
+ * @return Purge trigger factor of {@link LocalCache}.
+ */
+ public double getCachePurgeTrigFactor() {
+ return cachePurgeTrigFactor;
+ }
+
+ /**
+ * Set purge trigger factor of {@link LocalCache}.
+ *
+ * @param cachePurgeTrigFactor
+ * purge trigger factor.
+ */
+ public void setCachePurgeTrigFactor(double cachePurgeTrigFactor) {
+ this.cachePurgeTrigFactor = cachePurgeTrigFactor;
+ }
+
+ /**
+ * @return Purge resize factor of {@link LocalCache}.
+ */
+ public double getCachePurgeResizeFactor() {
+ return cachePurgeResizeFactor;
+ }
+
+ /**
+ * Set purge resize factor of {@link LocalCache}.
+ *
+ * @param cachePurgeResizeFactor
+ * purge resize factor.
+ */
+ public void setCachePurgeResizeFactor(double cachePurgeResizeFactor) {
+ this.cachePurgeResizeFactor = cachePurgeResizeFactor;
+ }
+
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataIdentifier.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.Serializable;
+
+/**
+ * Opaque data identifier used to identify records in a data store.
+ * All identifiers must be serializable and implement the standard
+ * object equality and hash code methods.
+ */
+public class DataIdentifier implements Serializable {
+
+ /**
+ * Serial version UID.
+ */
+ private static final long serialVersionUID = -9197191401131100016L;
+
+ /**
+ * Data identifier.
+ */
+ private final String identifier;
+
+ /**
+ * Creates a data identifier from the given string.
+ *
+ * @param identifier data identifier
+ */
+ public DataIdentifier(String identifier) {
+ this.identifier = identifier;
+ }
+
+ //-------------------------------------------------------------< Object >
+
+ /**
+ * Returns the identifier string.
+ *
+ * @return identifier string
+ */
+ public String toString() {
+ return identifier;
+ }
+
+ /**
+ * Checks if the given object is a data identifier and has the same
+ * string representation as this one.
+ *
+ * @param object other object
+ * @return <code>true</code> if the given object is the same identifier,
+ * <code>false</code> otherwise
+ */
+ public boolean equals(Object object) {
+ return (object instanceof DataIdentifier)
+ && identifier.equals(object.toString());
+ }
+
+ /**
+ * Returns the hash code of the identifier string.
+ *
+ * @return hash code
+ */
+ public int hashCode() {
+ return identifier.hashCode();
+ }
+
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataRecord.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.InputStream;
+
+/**
+ * Immutable data record that consists of a binary stream.
+ */
+public interface DataRecord {
+
+ /**
+ * Returns the identifier of this record.
+ *
+ * @return data identifier
+ */
+ DataIdentifier getIdentifier();
+
+ /**
+ * Returns a secure reference to this binary, or {@code null} if no such
+ * reference is available.
+ *
+ * @return binary reference, or {@code null}
+ */
+ String getReference();
+
+ /**
+ * Returns the length of the binary stream in this record.
+ *
+ * @return length of the binary stream
+ * @throws DataStoreException if the record could not be accessed
+ */
+ long getLength() throws DataStoreException;
+
+ /**
+ * Returns the the binary stream in this record.
+ *
+ * @return binary stream
+ * @throws DataStoreException if the record could not be accessed
+ */
+ InputStream getStream() throws DataStoreException;
+
+ /**
+ * Returns the last modified of the record.
+ *
+ * @return last modified time of the binary stream
+ */
+ long getLastModified();
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStore.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.InputStream;
+import java.util.Iterator;
+
+import javax.jcr.RepositoryException;
+
+/**
+ * Append-only store for binary streams. A data store consists of a number
+ * of identifiable data records that each contain a distinct binary stream.
+ * New binary streams can be added to the data store, but existing streams
+ * are never removed or modified.
+ * <p>
+ * A data store should be fully thread-safe, i.e. it should be possible to
+ * add and access data records concurrently. Optimally even separate processes
+ * should be able to concurrently access the data store with zero interprocess
+ * synchronization.
+ */
+public interface DataStore {
+
+ /**
+ * Check if a record for the given identifier exists, and return it if yes.
+ * If no record exists, this method returns null.
+ *
+ * @param identifier data identifier
+ * @return the record if found, and null if not
+ * @throws DataStoreException if the data store could not be accessed
+ */
+ DataRecord getRecordIfStored(DataIdentifier identifier)
+ throws DataStoreException;
+
+ /**
+ * Returns the identified data record. The given identifier should be
+ * the identifier of a previously saved data record. Since records are
+ * never removed, there should never be cases where the identified record
+ * is not found. Abnormal cases like that are treated as errors and
+ * handled by throwing an exception.
+ *
+ * @param identifier data identifier
+ * @return identified data record
+ * @throws DataStoreException if the data store could not be accessed,
+ * or if the given identifier is invalid
+ */
+ DataRecord getRecord(DataIdentifier identifier) throws DataStoreException;
+
+ /**
+ * Returns the record that matches the given binary reference.
+ * Returns {@code null} if the reference is invalid, for example if it
+ * points to a record that does not exist.
+ *
+ * @param reference binary reference
+ * @return matching record, or {@code null}
+ * @throws DataStoreException if the data store could not be accessed
+ */
+ DataRecord getRecordFromReference(String reference)
+ throws DataStoreException;
+
+ /**
+ * Creates a new data record. The given binary stream is consumed and
+ * a binary record containing the consumed stream is created and returned.
+ * If the same stream already exists in another record, then that record
+ * is returned instead of creating a new one.
+ * <p>
+ * The given stream is consumed and <strong>not closed</strong> by this
+ * method. It is the responsibility of the caller to close the stream.
+ * A typical call pattern would be:
+ * <pre>
+ * InputStream stream = ...;
+ * try {
+ * record = store.addRecord(stream);
+ * } finally {
+ * stream.close();
+ * }
+ * </pre>
+ *
+ * @param stream binary stream
+ * @return data record that contains the given stream
+ * @throws DataStoreException if the data store could not be accessed
+ */
+ DataRecord addRecord(InputStream stream) throws DataStoreException;
+
+ /**
+ * From now on, update the modified date of an object even when accessing it.
+ * Usually, the modified date is only updated when creating a new object,
+ * or when a new link is added to an existing object. When this setting is enabled,
+ * even getLength() will update the modified date.
+ *
+ * @param before - update the modified date to the current time if it is older than this value
+ */
+ void updateModifiedDateOnAccess(long before);
+
+ /**
+ * Delete objects that have a modified date older than the specified date.
+ *
+ * @param min the minimum time
+ * @return the number of data records deleted
+ * @throws DataStoreException
+ */
+ int deleteAllOlderThan(long min) throws DataStoreException;
+
+ /**
+ * Get all identifiers.
+ *
+ * @return an iterator over all DataIdentifier objects
+ * @throws DataStoreException if the list could not be read
+ */
+ Iterator<DataIdentifier> getAllIdentifiers() throws DataStoreException;
+
+ /**
+ * Initialized the data store
+ *
+ * @param homeDir the home directory of the repository
+ * @throws RepositoryException
+ */
+ void init(String homeDir) throws RepositoryException;
+
+ /**
+ * Get the minimum size of an object that should be stored in this data store.
+ * Depending on the overhead and configuration, each store may return a different value.
+ *
+ * @return the minimum size in bytes
+ */
+ int getMinRecordLength();
+
+ /**
+ * Close the data store
+ *
+ * @throws DataStoreException if a problem occurred
+ */
+ void close() throws DataStoreException;
+
+ /**
+ * Clear the in-use list. This is only used for testing to make the the garbage collection
+ * think that objects are no longer in use.
+ */
+ void clearInUse();
+
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreException.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import javax.jcr.RepositoryException;
+
+/**
+ * Exception thrown by the Data Store module.
+ */
+public class DataStoreException extends RepositoryException {
+
+ /**
+ * Constructs a new instance of this class with the specified detail
+ * message.
+ *
+ * @param message the detailed message.
+ */
+ public DataStoreException(String message) {
+ super(message);
+ }
+
+ /**
+ * Constructs a new instance of this class with the specified detail
+ * message and root cause.
+ *
+ * @param message the detailed message.
+ * @param cause root failure cause
+ */
+ public DataStoreException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ /**
+ * Constructs a new instance of this class with the specified root cause.
+ *
+ * @param rootCause root failure cause
+ */
+ public DataStoreException(Throwable rootCause) {
+ super(rootCause);
+ }
+
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/DataStoreFactory.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import javax.jcr.RepositoryException;
+
+
+/**
+ * Factory interface for creating {@link DataStore} instances. Used
+ * to decouple the repository internals from the repository configuration
+ * mechanism.
+ *
+ * @since Jackrabbit 1.5
+ * @see <a href="https://issues.apache.org/jira/browse/JCR-1438">JCR-1438</a>
+ */
+public interface DataStoreFactory {
+
+ /**
+ * Creates, initializes, and returns a {@link DataStore} instance
+ * for use by the repository. Note that no information is passed from
+ * the client, so all required configuration information must be
+ * encapsulated in the factory.
+ *
+ * @return initialized data store
+ * @throws RepositoryException if the data store can not be created
+ */
+ DataStore getDataStore() throws RepositoryException;
+
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataRecord.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+
+/**
+ * Data record that is based on a normal file.
+ */
+public class FileDataRecord extends AbstractDataRecord {
+
+ /**
+ * The file that contains the binary stream.
+ */
+ private final File file;
+
+ /**
+ * Creates a data record based on the given identifier and file.
+ *
+ * @param identifier data identifier
+ * @param file file that contains the binary stream
+ */
+ public FileDataRecord(
+ AbstractDataStore store, DataIdentifier identifier, File file) {
+ super(store, identifier);
+ assert file.isFile();
+ this.file = file;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getLength() {
+ return file.length();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public InputStream getStream() throws DataStoreException {
+ try {
+ return new LazyFileInputStream(file);
+ } catch (IOException e) {
+ throw new DataStoreException("Error opening input stream of " + file.getAbsolutePath(), e);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getLastModified() {
+ return file.lastModified();
+ }
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/FileDataStore.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,481 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+import java.lang.ref.WeakReference;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Simple file-based data store. Data records are stored as normal files
+ * named using a message digest of the contained binary stream.
+ *
+ * Configuration:
+ * <pre>
+ * <DataStore class="org.apache.jackrabbit.core.data.FileDataStore">
+ * <param name="{@link #setPath(String) path}" value="/data/datastore"/>
+ * <param name="{@link #setMinRecordLength(int) minRecordLength}" value="1024"/>
+ * </DataStore>
+ * </pre>
+ * <p>
+ * If the directory is not set, the directory <repository home>/repository/datastore is used.
+ * <p>
+ * A three level directory structure is used to avoid placing too many
+ * files in a single directory. The chosen structure is designed to scale
+ * up to billions of distinct records.
+ * <p>
+ * This implementation relies on the underlying file system to support
+ * atomic O(1) move operations with {@link File#renameTo(File)}.
+ */
+public class FileDataStore extends AbstractDataStore
+ implements MultiDataStoreAware {
+
+ /**
+ * Logger instance
+ */
+ private static Logger log = LoggerFactory.getLogger(FileDataStore.class);
+
+ /**
+ * The digest algorithm used to uniquely identify records.
+ */
+ private static final String DIGEST = "SHA-1";
+
+ /**
+ * The default value for the minimum object size.
+ */
+ private static final int DEFAULT_MIN_RECORD_LENGTH = 100;
+
+ /**
+ * The maximum last modified time resolution of the file system.
+ */
+ private static final int ACCESS_TIME_RESOLUTION = 2000;
+
+ /**
+ * Name of the directory used for temporary files.
+ * Must be at least 3 characters.
+ */
+ private static final String TMP = "tmp";
+
+ /**
+ * The minimum modified date. If a file is accessed (read or write) with a modified date
+ * older than this value, the modified date is updated to the current time.
+ */
+ private long minModifiedDate;
+
+ /**
+ * The directory that contains all the data record files. The structure
+ * of content within this directory is controlled by this class.
+ */
+ private File directory;
+
+ /**
+ * The name of the directory that contains all the data record files. The structure
+ * of content within this directory is controlled by this class.
+ */
+ private String path;
+
+ /**
+ * The minimum size of an object that should be stored in this data store.
+ */
+ private int minRecordLength = DEFAULT_MIN_RECORD_LENGTH;
+
+ /**
+ * All data identifiers that are currently in use are in this set until they are garbage collected.
+ */
+ protected Map<DataIdentifier, WeakReference<DataIdentifier>> inUse =
+ Collections.synchronizedMap(new WeakHashMap<DataIdentifier, WeakReference<DataIdentifier>>());
+
+ /**
+ * Initialized the data store.
+ * If the path is not set, <repository home>/repository/datastore is used.
+ * This directory is automatically created if it does not yet exist.
+ *
+ * @param homeDir
+ */
+ public void init(String homeDir) {
+ if (path == null) {
+ path = homeDir + "/repository/datastore";
+ }
+ directory = new File(path);
+ directory.mkdirs();
+ }
+
+ /**
+ * Get a data record for the given identifier.
+ *
+ * @param identifier the identifier
+ * @return the data record or null
+ */
+ public DataRecord getRecordIfStored(DataIdentifier identifier) throws DataStoreException {
+ File file = getFile(identifier);
+ synchronized (this) {
+ if (!file.exists()) {
+ return null;
+ }
+ if (minModifiedDate != 0) {
+ // only check when running garbage collection
+ if (getLastModified(file) < minModifiedDate) {
+ setLastModified(file, System.currentTimeMillis() + ACCESS_TIME_RESOLUTION);
+ }
+ }
+ usesIdentifier(identifier);
+ return new FileDataRecord(this, identifier, file);
+ }
+ }
+
+ private void usesIdentifier(DataIdentifier identifier) {
+ inUse.put(identifier, new WeakReference<DataIdentifier>(identifier));
+ }
+
+ /**
+ * Creates a new data record.
+ * The stream is first consumed and the contents are saved in a temporary file
+ * and the SHA-1 message digest of the stream is calculated. If a
+ * record with the same SHA-1 digest (and length) is found then it is
+ * returned. Otherwise the temporary file is moved in place to become
+ * the new data record that gets returned.
+ *
+ * @param input binary stream
+ * @return data record that contains the given stream
+ * @throws DataStoreException if the record could not be created
+ */
+ public DataRecord addRecord(InputStream input) throws DataStoreException {
+ File temporary = null;
+ try {
+ temporary = newTemporaryFile();
+ DataIdentifier tempId = new DataIdentifier(temporary.getName());
+ usesIdentifier(tempId);
+ // Copy the stream to the temporary file and calculate the
+ // stream length and the message digest of the stream
+ long length = 0;
+ MessageDigest digest = MessageDigest.getInstance(DIGEST);
+ OutputStream output = new DigestOutputStream(
+ new FileOutputStream(temporary), digest);
+ try {
+ length = IOUtils.copyLarge(input, output);
+ } finally {
+ output.close();
+ }
+ DataIdentifier identifier =
+ new DataIdentifier(encodeHexString(digest.digest()));
+ File file;
+
+ synchronized (this) {
+ // Check if the same record already exists, or
+ // move the temporary file in place if needed
+ usesIdentifier(identifier);
+ file = getFile(identifier);
+ if (!file.exists()) {
+ File parent = file.getParentFile();
+ parent.mkdirs();
+ if (temporary.renameTo(file)) {
+ // no longer need to delete the temporary file
+ temporary = null;
+ } else {
+ throw new IOException(
+ "Can not rename " + temporary.getAbsolutePath()
+ + " to " + file.getAbsolutePath()
+ + " (media read only?)");
+ }
+ } else {
+ long now = System.currentTimeMillis();
+ if (getLastModified(file) < now + ACCESS_TIME_RESOLUTION) {
+ setLastModified(file, now + ACCESS_TIME_RESOLUTION);
+ }
+ }
+ if (file.length() != length) {
+ // Sanity checks on the record file. These should never fail,
+ // but better safe than sorry...
+ if (!file.isFile()) {
+ throw new IOException("Not a file: " + file);
+ }
+ throw new IOException(DIGEST + " collision: " + file);
+ }
+ }
+ // this will also make sure that
+ // tempId is not garbage collected until here
+ inUse.remove(tempId);
+ return new FileDataRecord(this, identifier, file);
+ } catch (NoSuchAlgorithmException e) {
+ throw new DataStoreException(DIGEST + " not available", e);
+ } catch (IOException e) {
+ throw new DataStoreException("Could not add record", e);
+ } finally {
+ if (temporary != null) {
+ temporary.delete();
+ }
+ }
+ }
+
+ /**
+ * Returns the identified file. This method implements the pattern
+ * used to avoid problems with too many files in a single directory.
+ * <p>
+ * No sanity checks are performed on the given identifier.
+ *
+ * @param identifier data identifier
+ * @return identified file
+ */
+ private File getFile(DataIdentifier identifier) {
+ usesIdentifier(identifier);
+ String string = identifier.toString();
+ File file = directory;
+ file = new File(file, string.substring(0, 2));
+ file = new File(file, string.substring(2, 4));
+ file = new File(file, string.substring(4, 6));
+ return new File(file, string);
+ }
+
+ /**
+ * Returns a unique temporary file to be used for creating a new
+ * data record.
+ *
+ * @return temporary file
+ * @throws IOException
+ */
+ private File newTemporaryFile() throws IOException {
+ // the directory is already created in the init method
+ return File.createTempFile(TMP, null, directory);
+ }
+
+ public void updateModifiedDateOnAccess(long before) {
+ minModifiedDate = before;
+ }
+
+ public void deleteRecord(DataIdentifier identifier)
+ throws DataStoreException {
+ File file = getFile(identifier);
+ synchronized (this) {
+ if (file.exists()) {
+ if (!file.delete()) {
+ log.warn("Failed to delete file " + file.getAbsolutePath());
+ }
+ }
+ }
+ }
+
+ public int deleteAllOlderThan(long min) {
+ int count = 0;
+ for (File file : directory.listFiles()) {
+ if (file.isDirectory()) { // skip top-level files
+ count += deleteOlderRecursive(file, min);
+ }
+ }
+ return count;
+ }
+
+ private int deleteOlderRecursive(File file, long min) {
+ int count = 0;
+ if (file.isFile() && file.exists() && file.canWrite()) {
+ synchronized (this) {
+ long lastModified;
+ try {
+ lastModified = getLastModified(file);
+ } catch (DataStoreException e) {
+ log.warn("Failed to read modification date; file not deleted", e);
+ // don't delete the file, since the lastModified date is uncertain
+ lastModified = min;
+ }
+ if (lastModified < min) {
+ DataIdentifier id = new DataIdentifier(file.getName());
+ if (!inUse.containsKey(id)) {
+ if (log.isInfoEnabled()) {
+ log.info("Deleting old file " + file.getAbsolutePath() +
+ " modified: " + new Timestamp(lastModified).toString() +
+ " length: " + file.length());
+ }
+ if (!file.delete()) {
+ log.warn("Failed to delete old file " + file.getAbsolutePath());
+ }
+ count++;
+ }
+ }
+ }
+ } else if (file.isDirectory()) {
+ File[] list = file.listFiles();
+ if (list != null) {
+ for (File f: list) {
+ count += deleteOlderRecursive(f, min);
+ }
+ }
+
+ // JCR-1396: FileDataStore Garbage Collector and empty directories
+ // Automatic removal of empty directories (but not the root!)
+ synchronized (this) {
+ list = file.listFiles();
+ if (list != null && list.length == 0) {
+ file.delete();
+ }
+ }
+ }
+ return count;
+ }
+
+ private void listRecursive(List<File> list, File file) {
+ File[] files = file.listFiles();
+ if (files != null) {
+ for (File f : files) {
+ if (f.isDirectory()) {
+ listRecursive(list, f);
+ } else {
+ list.add(f);
+ }
+ }
+ }
+ }
+
+ public Iterator<DataIdentifier> getAllIdentifiers() {
+ ArrayList<File> files = new ArrayList<File>();
+ for (File file : directory.listFiles()) {
+ if (file.isDirectory()) { // skip top-level files
+ listRecursive(files, file);
+ }
+ }
+
+ ArrayList<DataIdentifier> identifiers = new ArrayList<DataIdentifier>();
+ for (File f: files) {
+ String name = f.getName();
+ identifiers.add(new DataIdentifier(name));
+ }
+ log.debug("Found " + identifiers.size() + " identifiers.");
+ return identifiers.iterator();
+ }
+
+ public void clearInUse() {
+ inUse.clear();
+ }
+
+ /**
+ * Get the name of the directory where this data store keeps the files.
+ *
+ * @return the full path name
+ */
+ public String getPath() {
+ return path;
+ }
+
+ /**
+ * Set the name of the directory where this data store keeps the files.
+ *
+ * @param directoryName the path name
+ */
+ public void setPath(String directoryName) {
+ this.path = directoryName;
+ }
+
+ public int getMinRecordLength() {
+ return minRecordLength;
+ }
+
+ /**
+ * Set the minimum object length.
+ *
+ * @param minRecordLength the length
+ */
+ public void setMinRecordLength(int minRecordLength) {
+ this.minRecordLength = minRecordLength;
+ }
+
+ public void close() {
+ // nothing to do
+ }
+
+ //---------------------------------------------------------< protected >--
+
+ @Override
+ protected byte[] getOrCreateReferenceKey() throws DataStoreException {
+ File file = new File(directory, "reference.key");
+ try {
+ if (file.exists()) {
+ return FileUtils.readFileToByteArray(file);
+ } else {
+ byte[] key = super.getOrCreateReferenceKey();
+ FileUtils.writeByteArrayToFile(file, key);
+ return key;
+ }
+ } catch (IOException e) {
+ throw new DataStoreException(
+ "Unable to access reference key file " + file.getPath(), e);
+ }
+ }
+
+ //-----------------------------------------------------------< private >--
+
+ /**
+ * Get the last modified date of a file.
+ *
+ * @param file the file
+ * @return the last modified date
+ * @throws DataStoreException if reading fails
+ */
+ private static long getLastModified(File file) throws DataStoreException {
+ long lastModified = file.lastModified();
+ if (lastModified == 0) {
+ throw new DataStoreException("Failed to read record modified date: " + file.getAbsolutePath());
+ }
+ return lastModified;
+ }
+
+ /**
+ * Set the last modified date of a file, if the file is writable.
+ *
+ * @param file the file
+ * @param time the new last modified date
+ * @throws DataStoreException if the file is writable but modifying the date fails
+ */
+ private static void setLastModified(File file, long time) throws DataStoreException {
+ if (!file.setLastModified(time)) {
+ if (!file.canWrite()) {
+ // if we can't write to the file, so garbage collection will also not delete it
+ // (read only files or file systems)
+ return;
+ }
+ try {
+ // workaround for Windows: if the file is already open for reading
+ // (in this or another process), then setting the last modified date
+ // doesn't work - see also JCR-2872
+ RandomAccessFile r = new RandomAccessFile(file, "rw");
+ try {
+ r.setLength(r.length());
+ } finally {
+ r.close();
+ }
+ } catch (IOException e) {
+ throw new DataStoreException("An IO Exception occurred while trying to set the last modified date: " + file.getAbsolutePath(), e);
+ }
+ }
+ }
+}
Added: jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java?rev=1564687&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java (added)
+++ jackrabbit/trunk/jackrabbit-data/src/main/java/org/apache/jackrabbit/core/data/LazyFileInputStream.java Wed Feb 5 09:27:20 2014
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.data;
+
+import java.io.File;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.commons.io.input.AutoCloseInputStream;
+
+/**
+ * This input stream delays opening the file until the first byte is read, and
+ * closes and discards the underlying stream as soon as the end of input has
+ * been reached or when the stream is explicitly closed.
+ */
+public class LazyFileInputStream extends AutoCloseInputStream {
+
+ /**
+ * The file descriptor to use.
+ */
+ protected final FileDescriptor fd;
+
+ /**
+ * The file to read from.
+ */
+ protected final File file;
+
+ /**
+ * True if the input stream was opened. It is also set to true if the stream
+ * was closed without reading (to avoid opening the file after the stream
+ * was closed).
+ */
+ protected boolean opened;
+
+ /**
+ * Creates a new <code>LazyFileInputStream</code> for the given file. If the
+ * file is unreadable, a FileNotFoundException is thrown.
+ * The file is not opened until the first byte is read from the stream.
+ *
+ * @param file the file
+ * @throws java.io.FileNotFoundException
+ */
+ public LazyFileInputStream(File file)
+ throws FileNotFoundException {
+ super(null);
+ if (!file.canRead()) {
+ throw new FileNotFoundException(file.getPath());
+ }
+ this.file = file;
+ this.fd = null;
+ }
+
+ /**
+ * Creates a new <code>LazyFileInputStream</code> for the given file
+ * descriptor.
+ * The file is not opened until the first byte is read from the stream.
+ *
+ * @param fd
+ */
+ public LazyFileInputStream(FileDescriptor fd) {
+ super(null);
+ this.file = null;
+ this.fd = fd;
+ }
+
+ /**
+ * Creates a new <code>LazyFileInputStream</code> for the given file. If the
+ * file is unreadable, a FileNotFoundException is thrown.
+ *
+ * @param name
+ * @throws java.io.FileNotFoundException
+ */
+ public LazyFileInputStream(String name) throws FileNotFoundException {
+ this(new File(name));
+ }
+
+ /**
+ * Open the stream if required.
+ *
+ * @throws java.io.IOException
+ */
+ protected void open() throws IOException {
+ if (!opened) {
+ opened = true;
+ if (fd != null) {
+ in = new FileInputStream(fd);
+ } else {
+ in = new FileInputStream(file);
+ }
+ }
+ }
+
+ public int read() throws IOException {
+ open();
+ return super.read();
+ }
+
+ public int available() throws IOException {
+ open();
+ return super.available();
+ }
+
+ public void close() throws IOException {
+ // make sure the file is not opened afterwards
+ opened = true;
+
+ // only close the file if it was in fact opened
+ if (in != null) {
+ super.close();
+ }
+ }
+
+ public synchronized void reset() throws IOException {
+ open();
+ super.reset();
+ }
+
+ public boolean markSupported() {
+ try {
+ open();
+ } catch (IOException e) {
+ throw new IllegalStateException(e.toString());
+ }
+ return super.markSupported();
+ }
+
+ public synchronized void mark(int readlimit) {
+ try {
+ open();
+ } catch (IOException e) {
+ throw new IllegalStateException(e.toString());
+ }
+ super.mark(readlimit);
+ }
+
+ public long skip(long n) throws IOException {
+ open();
+ return super.skip(n);
+ }
+
+ public int read(byte[] b) throws IOException {
+ open();
+ return super.read(b, 0, b.length);
+ }
+
+ public int read(byte[] b, int off, int len) throws IOException {
+ open();
+ return super.read(b, off, len);
+ }
+
+}