You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/01 12:40:05 UTC
svn commit: r1607036 - in /manifoldcf/trunk: CHANGES.txt
framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java
Author: kwright
Date: Tue Jul 1 10:40:04 2014
New Revision: 1607036
URL: http://svn.apache.org/r1607036
Log:
Fix for CONNECTORS-988
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1607036&r1=1607035&r2=1607036&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Tue Jul 1 10:40:04 2014
@@ -3,6 +3,9 @@ $Id$
======================= 1.7-dev =====================
+CONNECTORS-988: Performance improvements for split pipeline crawls.
+(Karl Wright)
+
CONNECTORS-985: Get UI tests working again.
(Karl Wright)
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java?rev=1607036&r1=1607035&r2=1607036&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java Tue Jul 1 10:40:04 2014
@@ -40,10 +40,10 @@ public class RepositoryDocumentFactory
protected final RepositoryDocument original;
// The binary stream file and stream (if any)
- protected BinaryTracker binaryTracker;
+ protected BinaryInput binaryTracker;
// Readers (organized by metadata)
- protected Map<String,ReaderTracker[]> metadataReaders = new HashMap<String,ReaderTracker[]>();
+ protected Map<String,CharacterInput[]> metadataReaders = new HashMap<String,CharacterInput[]>();
/** Constructor.
* Pass a RepositoryDocument. This constructor reads all streams and stores them in
@@ -59,7 +59,7 @@ public class RepositoryDocumentFactory
try
{
- this.binaryTracker = new BinaryTracker(document.getBinaryStream());
+ this.binaryTracker = new TempFileInput(document.getBinaryStream());
// Copy all reader streams
Iterator<String> iter = document.getFields();
if (iter.hasNext())
@@ -68,12 +68,12 @@ public class RepositoryDocumentFactory
Object[] objects = document.getField(fieldName);
if (objects instanceof Reader[])
{
- ReaderTracker[] newValues = new ReaderTracker[objects.length];
+ CharacterInput[] newValues = new CharacterInput[objects.length];
metadataReaders.put(fieldName,newValues);
// Populate newValues
for (int i = 0; i < newValues.length; i++)
{
- newValues[i] = new ReaderTracker((Reader)objects[i]);
+ newValues[i] = new TempFileCharacterInput((Reader)objects[i]);
}
}
}
@@ -82,14 +82,14 @@ public class RepositoryDocumentFactory
{
// Clean up everything we've done so far.
if (this.binaryTracker != null)
- this.binaryTracker.close();
+ this.binaryTracker.discard();
for (String key : metadataReaders.keySet())
{
- ReaderTracker[] rt = metadataReaders.get(key);
- for (ReaderTracker r : rt)
+ CharacterInput[] rt = metadataReaders.get(key);
+ for (CharacterInput r : rt)
{
if (r != null)
- r.close();
+ r.discard();
}
}
if (e instanceof IOException)
@@ -129,7 +129,8 @@ public class RepositoryDocumentFactory
}
// Copy binary
- rd.setBinary(binaryTracker.createNewInputStream(),original.getBinaryLength());
+ binaryTracker.doneWithStream();
+ rd.setBinary(binaryTracker.getStream(),original.getBinaryLength());
// Copy metadata fields (including minting new Readers where needed)
Iterator<String> iter = original.getFields();
if (iter.hasNext())
@@ -138,11 +139,12 @@ public class RepositoryDocumentFactory
Object[] objects = original.getField(fieldName);
if (objects instanceof Reader[])
{
- ReaderTracker[] rts = metadataReaders.get(fieldName);
+ CharacterInput[] rts = metadataReaders.get(fieldName);
Reader[] newReaders = new Reader[rts.length];
for (int i = 0; i < rts.length; i++)
{
- newReaders[i] = rts[i].createNewReader();
+ rts[i].doneWithStream();
+ newReaders[i] = rts[i].getStream();
}
rd.addField(fieldName,newReaders);
}
@@ -166,199 +168,15 @@ public class RepositoryDocumentFactory
public void close()
throws ManifoldCFException
{
- binaryTracker.close();
+ binaryTracker.discard();
for (String key : metadataReaders.keySet())
{
- ReaderTracker[] rt = metadataReaders.get(key);
- for (ReaderTracker r : rt)
+ CharacterInput[] rt = metadataReaders.get(key);
+ for (CharacterInput r : rt)
{
- r.close();
+ r.discard();
}
}
}
- protected static class ReaderTracker
- {
- protected File readerFile;
- protected Reader reader = null;
-
- public ReaderTracker(Reader r)
- throws IOException
- {
- // Make a local copy
- readerFile = File.createTempFile("mcfrdr","tmp");
- try
- {
- FileOutputStream os = new FileOutputStream(readerFile);
- try
- {
- OutputStreamWriter ow = new OutputStreamWriter(os,"utf-8");
- try
- {
- char[] byteArray = new char[65536];
- while (true)
- {
- int amt = r.read(byteArray,0,byteArray.length);
- if (amt == -1)
- break;
- ow.write(byteArray,0,amt);
- }
- }
- finally
- {
- ow.flush();
- }
- }
- finally
- {
- os.close();
- }
- }
- catch (Throwable e)
- {
- readerFile.delete();
- if (e instanceof IOException)
- throw (IOException)e;
- else if (e instanceof RuntimeException)
- throw (RuntimeException)e;
- else if (e instanceof Error)
- throw (Error)e;
- else
- throw new RuntimeException("Unknown error type: "+e.getClass().getName()+": "+e.getMessage(),e);
- }
- }
-
- public Reader createNewReader()
- throws ManifoldCFException
- {
- try
- {
- // Close existing inputstream and create a new one.
- if (reader != null)
- {
- reader.close();
- reader = null;
- }
- reader = new InputStreamReader(new FileInputStream(readerFile),"utf-8");
- return reader;
- }
- catch (IOException e)
- {
- handleIOException(e);
- return null;
- }
- }
-
- public void close()
- throws ManifoldCFException
- {
- try
- {
- // Close all streams and delete file
- if (reader != null)
- {
- reader.close();
- reader = null;
- }
- readerFile.delete();
- }
- catch (IOException e)
- {
- handleIOException(e);
- }
- }
- }
-
- protected static class BinaryTracker
- {
- protected File binaryFile;
- protected InputStream inputStream = null;
-
- public BinaryTracker(InputStream is)
- throws IOException
- {
- // Make a local copy
- binaryFile = File.createTempFile("mcfbin","tmp");
- try
- {
- FileOutputStream os = new FileOutputStream(binaryFile);
- try
- {
- byte[] byteArray = new byte[65536];
- while (true)
- {
- int amt = is.read(byteArray,0,byteArray.length);
- if (amt == -1)
- break;
- os.write(byteArray,0,amt);
- }
- }
- finally
- {
- os.close();
- }
- }
- catch (Throwable e)
- {
- binaryFile.delete();
- if (e instanceof IOException)
- throw (IOException)e;
- else if (e instanceof RuntimeException)
- throw (RuntimeException)e;
- else if (e instanceof Error)
- throw (Error)e;
- else
- throw new RuntimeException("Unknown error type: "+e.getClass().getName()+": "+e.getMessage(),e);
- }
- }
-
- public InputStream createNewInputStream()
- throws ManifoldCFException
- {
- try
- {
- // Close existing inputstream and create a new one.
- if (inputStream != null)
- {
- inputStream.close();
- inputStream = null;
- }
- inputStream = new FileInputStream(binaryFile);
- return inputStream;
- }
- catch (IOException e)
- {
- handleIOException(e);
- return null;
- }
- }
-
- public void close()
- throws ManifoldCFException
- {
- try
- {
- // Close all streams and delete file
- if (inputStream != null)
- {
- inputStream.close();
- inputStream = null;
- }
- binaryFile.delete();
- }
- catch (IOException e)
- {
- handleIOException(e);
- }
- }
- }
-
- protected static void handleIOException(IOException e)
- throws ManifoldCFException
- {
- if (e instanceof InterruptedIOException)
- throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- throw new ManifoldCFException(e.getMessage(),e);
- }
-
}