You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2010/06/28 15:47:09 UTC
svn commit: r958577 -
/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
Author: mreutegg
Date: Mon Jun 28 13:47:08 2010
New Revision: 958577
URL: http://svn.apache.org/viewvc?rev=958577&view=rev
Log:
JCR-2647: Reduce temporary memory usage of hierarchy cache initialization
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=958577&r1=958576&r2=958577&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java Mon Jun 28 13:47:08 2010
@@ -17,8 +17,8 @@
package org.apache.jackrabbit.core.query.lucene;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -52,6 +52,14 @@ class CachingIndexReader extends FilterI
private static final Logger log = LoggerFactory.getLogger(CachingIndexReader.class);
/**
+ * The number of nodes that are processed in a batch when the hierarchy
+ * cache is initialized. The value is 400'000, which will limit the
+ * temporary memory usage to initialize the hierarchy cache of a segment
+ * to 64MB (-> 170B * 400k)
+ */
+ private static final int MAX_CACHE_INIT_BATCH_SIZE = 400 * 1000;
+
+ /**
* The current value of the global creation tick counter.
*/
private static long currentTick;
@@ -419,61 +427,112 @@ class CachingIndexReader extends FilterI
* @throws IOException if an error occurs while reading from the index.
*/
private void initializeParents(IndexReader reader) throws IOException {
+ double foreignParents = 0;
long time = System.currentTimeMillis();
- final Map<Object, NodeInfo> docs = new HashMap<Object, NodeInfo>();
- // read UUIDs
- collectTermDocs(reader, new Term(FieldNames.UUID, ""), new TermDocsCollector() {
- public void collect(Term term, TermDocs tDocs) throws IOException {
- NodeId id = new NodeId(term.text());
- while (tDocs.next()) {
- int doc = tDocs.doc();
- // skip shareable nodes
- if (!shareableNodes.get(doc)) {
- NodeInfo info = new NodeInfo(doc, id);
- docs.put(doc, info);
+
+ // initialize in multiple passes with
+ // a fixed number of nodes at a time
+ final Term[] startUUID = new Term[]{new Term(FieldNames.UUID, "")};
+
+ for (;;) {
+ final Map<Object, NodeInfo> docs = new HashMap<Object, NodeInfo>();
+ final Map<NodeId, Integer> parents = new HashMap<NodeId, Integer>();
+
+ if (startUUID[0].text().length() != 0) {
+ // force reading the next uuid after startUUID
+ startUUID[0] = new Term(FieldNames.UUID, startUUID[0].text() + "_");
+ }
+ // read UUIDs
+ collectTermDocs(reader, startUUID[0], new TermDocsCollector() {
+ public boolean collect(Term term, TermDocs tDocs) throws IOException {
+ // remember start term for next batch
+ startUUID[0] = term;
+ if (docs.size() >= MAX_CACHE_INIT_BATCH_SIZE) {
+ return false;
+ }
+ NodeId id = new NodeId(term.text());
+ while (tDocs.next()) {
+ int doc = tDocs.doc();
+ // skip shareable nodes
+ if (!shareableNodes.get(doc)) {
+ NodeInfo info = new NodeInfo(doc, id);
+ docs.put(doc, info);
+ }
}
+ return true;
}
- }
- });
+ });
- // read PARENTs
- collectTermDocs(reader, new Term(FieldNames.PARENT, "0"), new TermDocsCollector() {
- public void collect(Term term, TermDocs tDocs) throws IOException {
- NodeId id = new NodeId(term.text());
- while (tDocs.next()) {
- Integer docId = tDocs.doc();
- NodeInfo info = docs.get(docId);
- if (info == null) {
- // shareable node, see above
- } else {
- info.parent = id;
- docs.remove(docId);
- docs.put(info.id, info);
+ if (docs.isEmpty()) {
+ // no more nodes to initialize
+ break;
+ }
+
+ // read PARENTs (full scan)
+ collectTermDocs(reader, new Term(FieldNames.PARENT, "0"), new TermDocsCollector() {
+ public boolean collect(Term term, TermDocs tDocs) throws IOException {
+ NodeId id = new NodeId(term.text());
+ while (tDocs.next()) {
+ Integer docId = tDocs.doc();
+ NodeInfo info = docs.get(docId);
+ if (info == null) {
+ // shareable node, see above
+ // or cache init is batched
+ } else {
+ info.parent = id;
+ docs.remove(docId);
+ docs.put(info.id, info);
+ parents.put(id, null);
+ }
}
+ return true;
}
- }
- });
+ });
- if (stopRequested) {
- return;
- }
+ // scan UUIDs again to get document numbers for parents
+ collectTermDocs(reader, new Term(FieldNames.UUID, ""), new TermDocsCollector() {
+ public boolean collect(Term term, TermDocs tDocs) throws IOException {
+ NodeId id = new NodeId(term.text());
+ while (tDocs.next()) {
+ int doc = tDocs.doc();
+ if (parents.containsKey(id)) {
+ parents.put(id, doc);
+ }
+ }
+ return true;
+ }
+ });
- double foreignParents = 0;
- for (NodeInfo info : docs.values()) {
- NodeInfo parent = docs.get(info.parent);
- if (parent != null) {
- inSegmentParents[info.docId] = parent.docId;
- } else if (info.parent != null) {
- foreignParents++;
- foreignParentDocIds.put(info.docId, DocId.create(info.parent));
- } else if (shareableNodes.get(info.docId)) {
- Document doc = reader.document(info.docId, FieldSelectors.UUID_AND_PARENT);
- foreignParentDocIds.put(info.docId, DocId.create(doc.getValues(FieldNames.PARENT)));
- } else {
- // no parent -> root node
- foreignParentDocIds.put(info.docId, DocId.NULL);
+ if (stopRequested) {
+ return;
+ }
+
+ for (NodeInfo info : docs.values()) {
+ int parentDocId = -1;
+ NodeInfo parent = docs.get(info.parent);
+ if (parent != null) {
+ parentDocId = parent.docId;
+ } else {
+ Integer docId = parents.get(info.parent);
+ if (docId != null) {
+ parentDocId = docId;
+ }
+ }
+ if (parentDocId != -1) {
+ inSegmentParents[info.docId] = parentDocId;
+ } else if (info.parent != null) {
+ foreignParents++;
+ foreignParentDocIds.put(info.docId, DocId.create(info.parent));
+ } else if (shareableNodes.get(info.docId)) {
+ Document doc = reader.document(info.docId, FieldSelectors.UUID_AND_PARENT);
+ foreignParentDocIds.put(info.docId, DocId.create(doc.getValues(FieldNames.PARENT)));
+ } else {
+ // no parent -> root node
+ foreignParentDocIds.put(info.docId, DocId.NULL);
+ }
}
}
+
if (log.isDebugEnabled()) {
NumberFormat nf = NumberFormat.getPercentInstance();
nf.setMaximumFractionDigits(1);
@@ -512,7 +571,10 @@ class CachingIndexReader extends FilterI
Term t = terms.term();
if (t != null && t.field() == start.field()) {
tDocs.seek(terms);
- collector.collect(t, tDocs);
+ if (!collector.collect(t, tDocs)) {
+ // collector indicated break
+ break;
+ }
} else {
break;
}
@@ -542,9 +604,10 @@ class CachingIndexReader extends FilterI
*
* @param term the term.
* @param tDocs the term docs of <code>term</code>.
+ * @return false if the collector does not wish to collect more TermDocs.
* @throws IOException if an error occurs while reading from the index.
*/
- void collect(Term term, TermDocs tDocs) throws IOException;
+ boolean collect(Term term, TermDocs tDocs) throws IOException;
}
private final static class NodeInfo {