You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@jena.apache.org by Kevin Pauli <kp...@dragon-research.com> on 2011/12/22 20:36:22 UTC

TDB: memory leak with named graphs?

Greetings TDB team!

Consider this program:

import java.io.File;

import java.util.UUID;

import org.apache.commons.io.FileUtils;

import com.hp.hpl.jena.graph.Graph;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.tdb.TDBFactory;

public class SampleDataGenerator {

  private static final String DATABASE_LOCATION = "temp/tests/sampleData/tdb";
  private static final boolean DELETE_EXISTING_DATA = true;
  private static final String NAMESPACE = "http://example.com/";
  private static final int NUM_TRIPLES_TO_LOAD = 1000000;
  private static final int SHOW_PROGRESS_INTERVAL = 5000;

  public static void main(String[] args) throws Exception {
    File directory = new File(DATABASE_LOCATION);
    if (DELETE_EXISTING_DATA)
      FileUtils.deleteDirectory(directory);
    directory.mkdirs();
//    Graph graph = TDBFactory.createNamedGraph(NAMESPACE + "graph1", DATABASE_LOCATION);  // this one fails
    Graph graph = TDBFactory.createGraph(DATABASE_LOCATION); // this one works
    for (int i = 1; i <= NUM_TRIPLES_TO_LOAD; i++) {
      graph.add(new Triple(createRandomNode(), createRandomNode(), createRandomNode()));
      if (i % SHOW_PROGRESS_INTERVAL == 0) {
        int percent = 100 * i / NUM_TRIPLES_TO_LOAD;
        System.out.println("loaded " + i + " of " + NUM_TRIPLES_TO_LOAD + " triples (" + percent + "%)");
      }
    }
    graph.close();
  }

  private static Node createRandomNode() {
    return Node.createURI(NAMESPACE + UUID.randomUUID().toString());
  }

}


It runs fine as it is, creating 1M random triples.  

But if I try to use a named graph (rather than the default graph), by commenting out the line "this one works" and uncommenting the line "this one fails", it fails somewhere between 575K and 580K triples, with the following stack trace:

loaded 575000 of 1000000 triples (57%)
Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
at java.nio.HeapByteBuffer.<init>(HeapByteBuffer.java:39)
at java.nio.ByteBuffer.allocate(ByteBuffer.java:312)
at com.hp.hpl.jena.tdb.base.block.BlockMgrDirect.allocateBuffer(BlockMgrDirect.java:35)
at com.hp.hpl.jena.tdb.base.block.BlockMgrDirect.getByteBuffer(BlockMgrDirect.java:52)
at com.hp.hpl.jena.tdb.base.block.BlockMgrDirect.get(BlockMgrDirect.java:46)
at com.hp.hpl.jena.tdb.base.block.BlockMgrSync.get(BlockMgrSync.java:46)
at com.hp.hpl.jena.tdb.base.block.BlockMgrCache.get(BlockMgrCache.java:106)
at com.hp.hpl.jena.tdb.base.block.BlockConverter.get(BlockConverter.java:62)
at com.hp.hpl.jena.tdb.base.recordfile.RecordBufferPageMgr.get(RecordBufferPageMgr.java:45)
at com.hp.hpl.jena.tdb.index.bplustree.BPTreeRecordsMgr.get(BPTreeRecordsMgr.java:32)
at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.get(BPTreeNode.java:125)
at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.internalInsert(BPTreeNode.java:379)
at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.internalInsert(BPTreeNode.java:399)
at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.insert(BPTreeNode.java:167)
at com.hp.hpl.jena.tdb.index.bplustree.BPlusTree.addAndReturnOld(BPlusTree.java:297)
at com.hp.hpl.jena.tdb.index.bplustree.BPlusTree.add(BPlusTree.java:289)
at com.hp.hpl.jena.tdb.nodetable.NodeTableNative.accessIndex(NodeTableNative.java:133)
at com.hp.hpl.jena.tdb.nodetable.NodeTableNative._idForNode(NodeTableNative.java:98)
at com.hp.hpl.jena.tdb.nodetable.NodeTableNative.getAllocateNodeId(NodeTableNative.java:67)
at com.hp.hpl.jena.tdb.nodetable.NodeTableCache._idForNode(NodeTableCache.java:108)
at com.hp.hpl.jena.tdb.nodetable.NodeTableCache.getAllocateNodeId(NodeTableCache.java:67)
at com.hp.hpl.jena.tdb.nodetable.NodeTableWrapper.getAllocateNodeId(NodeTableWrapper.java:32)
at com.hp.hpl.jena.tdb.nodetable.NodeTableInline.getAllocateNodeId(NodeTableInline.java:39)
at com.hp.hpl.jena.tdb.nodetable.NodeTupleTableConcrete.addRow(NodeTupleTableConcrete.java:72)
at com.hp.hpl.jena.tdb.store.QuadTable.add(QuadTable.java:58)
at com.hp.hpl.jena.tdb.store.QuadTable.add(QuadTable.java:52)
at com.hp.hpl.jena.tdb.store.GraphNamedTDB._performAdd(GraphNamedTDB.java:77)
at com.hp.hpl.jena.tdb.store.GraphTDBBase.performAdd(GraphTDBBase.java:80)
at com.hp.hpl.jena.sparql.graph.GraphBase2.add(GraphBase2.java:189)
at SampleDataGenerator.main(SampleDataGenerator.java:29)

 

Regards,
Kevin Pauli

Re: TDB: memory leak with named graphs?

Posted by Andy Seaborne <an...@apache.org>.
Quick question:

This is 32 bit java?  it looks like it from the stack trace.

	Andy

On 22/12/11 19:36, Kevin Pauli wrote:
> Greetings TDB team!
>
> Consider this program:
>
> import java.io.File;
>
> import java.util.UUID;
>
> import org.apache.commons.io.FileUtils;
>
> import com.hp.hpl.jena.graph.Graph;
> import com.hp.hpl.jena.graph.Node;
> import com.hp.hpl.jena.graph.Triple;
> import com.hp.hpl.jena.tdb.TDBFactory;
>
> public class SampleDataGenerator {
>
>    private static final String DATABASE_LOCATION = "temp/tests/sampleData/tdb";
>    private static final boolean DELETE_EXISTING_DATA = true;
>    private static final String NAMESPACE = "http://example.com/";
>    private static final int NUM_TRIPLES_TO_LOAD = 1000000;
>    private static final int SHOW_PROGRESS_INTERVAL = 5000;
>
>    public static void main(String[] args) throws Exception {
>      File directory = new File(DATABASE_LOCATION);
>      if (DELETE_EXISTING_DATA)
>        FileUtils.deleteDirectory(directory);
>      directory.mkdirs();
> //    Graph graph = TDBFactory.createNamedGraph(NAMESPACE + "graph1", DATABASE_LOCATION);  // this one fails
>      Graph graph = TDBFactory.createGraph(DATABASE_LOCATION); // this one works
>      for (int i = 1; i<= NUM_TRIPLES_TO_LOAD; i++) {
>        graph.add(new Triple(createRandomNode(), createRandomNode(), createRandomNode()));
>        if (i % SHOW_PROGRESS_INTERVAL == 0) {
>          int percent = 100 * i / NUM_TRIPLES_TO_LOAD;
>          System.out.println("loaded " + i + " of " + NUM_TRIPLES_TO_LOAD + " triples (" + percent + "%)");
>        }
>      }
>      graph.close();
>    }
>
>    private static Node createRandomNode() {
>      return Node.createURI(NAMESPACE + UUID.randomUUID().toString());
>    }
>
> }
>
>
> It runs fine as it is, creating 1M random triples.
>
> But if I try to use a named graph (rather than the default graph), by commenting out the line "this one works" and uncommenting the line "this one fails", it fails somewhere between 575K and 580K triples, with the following stack trace:
>
> loaded 575000 of 1000000 triples (57%)
> Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
> at java.nio.HeapByteBuffer.<init>(HeapByteBuffer.java:39)
> at java.nio.ByteBuffer.allocate(ByteBuffer.java:312)
> at com.hp.hpl.jena.tdb.base.block.BlockMgrDirect.allocateBuffer(BlockMgrDirect.java:35)
> at com.hp.hpl.jena.tdb.base.block.BlockMgrDirect.getByteBuffer(BlockMgrDirect.java:52)
> at com.hp.hpl.jena.tdb.base.block.BlockMgrDirect.get(BlockMgrDirect.java:46)
> at com.hp.hpl.jena.tdb.base.block.BlockMgrSync.get(BlockMgrSync.java:46)
> at com.hp.hpl.jena.tdb.base.block.BlockMgrCache.get(BlockMgrCache.java:106)
> at com.hp.hpl.jena.tdb.base.block.BlockConverter.get(BlockConverter.java:62)
> at com.hp.hpl.jena.tdb.base.recordfile.RecordBufferPageMgr.get(RecordBufferPageMgr.java:45)
> at com.hp.hpl.jena.tdb.index.bplustree.BPTreeRecordsMgr.get(BPTreeRecordsMgr.java:32)
> at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.get(BPTreeNode.java:125)
> at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.internalInsert(BPTreeNode.java:379)
> at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.internalInsert(BPTreeNode.java:399)
> at com.hp.hpl.jena.tdb.index.bplustree.BPTreeNode.insert(BPTreeNode.java:167)
> at com.hp.hpl.jena.tdb.index.bplustree.BPlusTree.addAndReturnOld(BPlusTree.java:297)
> at com.hp.hpl.jena.tdb.index.bplustree.BPlusTree.add(BPlusTree.java:289)
> at com.hp.hpl.jena.tdb.nodetable.NodeTableNative.accessIndex(NodeTableNative.java:133)
> at com.hp.hpl.jena.tdb.nodetable.NodeTableNative._idForNode(NodeTableNative.java:98)
> at com.hp.hpl.jena.tdb.nodetable.NodeTableNative.getAllocateNodeId(NodeTableNative.java:67)
> at com.hp.hpl.jena.tdb.nodetable.NodeTableCache._idForNode(NodeTableCache.java:108)
> at com.hp.hpl.jena.tdb.nodetable.NodeTableCache.getAllocateNodeId(NodeTableCache.java:67)
> at com.hp.hpl.jena.tdb.nodetable.NodeTableWrapper.getAllocateNodeId(NodeTableWrapper.java:32)
> at com.hp.hpl.jena.tdb.nodetable.NodeTableInline.getAllocateNodeId(NodeTableInline.java:39)
> at com.hp.hpl.jena.tdb.nodetable.NodeTupleTableConcrete.addRow(NodeTupleTableConcrete.java:72)
> at com.hp.hpl.jena.tdb.store.QuadTable.add(QuadTable.java:58)
> at com.hp.hpl.jena.tdb.store.QuadTable.add(QuadTable.java:52)
> at com.hp.hpl.jena.tdb.store.GraphNamedTDB._performAdd(GraphNamedTDB.java:77)
> at com.hp.hpl.jena.tdb.store.GraphTDBBase.performAdd(GraphTDBBase.java:80)
> at com.hp.hpl.jena.sparql.graph.GraphBase2.add(GraphBase2.java:189)
> at SampleDataGenerator.main(SampleDataGenerator.java:29)
>
>
>
> Regards,
> Kevin Pauli