You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 15:32:11 UTC
svn commit: r1630099 -
/manifoldcf/branches/CONNECTORS-1067/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java
Author: kwright
Date: Wed Oct 8 13:32:11 2014
New Revision: 1630099
URL: http://svn.apache.org/r1630099
Log:
Update jdbc connector
Modified:
manifoldcf/branches/CONNECTORS-1067/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java
Modified: manifoldcf/branches/CONNECTORS-1067/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java?rev=1630099&r1=1630098&r2=1630099&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java Wed Oct 8 13:32:11 2014
@@ -526,154 +526,167 @@ public class JDBCConnector extends org.a
if (o != null)
contentType = JDBCConnection.readAsString(o);
else
- contentType = null;
-
- if (contentType == null || activities.checkMimeTypeIndexable(contentType))
{
if (contents instanceof BinaryInput)
- {
- // An ingestion will take place for this document.
- RepositoryDocument rd = new RepositoryDocument();
+ contentType = "application/octet-stream";
+ else if (contents instanceof CharacterInput)
+ contentType = "text/plain; charset=utf-8";
+ else
+ contentType = "text/plain";
+ }
+
+ if (!activities.checkMimeTypeIndexable(contentType))
+ {
+ Logging.connectors.debug("JDBC: Document '"+id+"' excluded because of mime type - skipping");
+ activities.noDocument(id,version);
+ continue;
+ }
+
+ if (!activities.checkURLIndexable(url))
+ {
+ Logging.connectors.debug("JDBC: Document '"+id+"' excluded because of url - skipping");
+ activities.noDocument(id,version);
+ continue;
+ }
- // Default content type is application/octet-stream for binary data
- if (contentType == null)
- rd.setMimeType("application/octet-stream");
- else
- rd.setMimeType(contentType);
+ // An ingestion will take place for this document.
+ RepositoryDocument rd = new RepositoryDocument();
+ rd.setMimeType(contentType);
- applyAccessTokens(rd,ts);
- applyMetadata(rd,row);
+ applyAccessTokens(rd,ts);
+ applyMetadata(rd,row);
+
+ if (contents instanceof BinaryInput)
+ {
+
+ BinaryInput bi = (BinaryInput)contents;
+ long fileLength = bi.getLength();
+
+ if (!activities.checkLengthIndexable(fileLength))
+ {
+ Logging.connectors.debug("JDBC: Document '"+id+"' excluded because of length - skipping");
+ activities.noDocument(id, version);
+ continue;
+ }
- BinaryInput bi = (BinaryInput)contents;
+ try
+ {
+ // Read the stream
+ InputStream is = bi.getStream();
try
{
- // Read the stream
- InputStream is = bi.getStream();
- try
- {
- rd.setBinary(is,bi.getLength());
- activities.ingestDocumentWithException(id, version, url, rd);
- }
- finally
- {
- is.close();
- }
+ rd.setBinary(is,fileLength);
+ activities.ingestDocumentWithException(id, version, url, rd);
}
- catch (java.net.SocketTimeoutException e)
+ finally
{
- throw new ManifoldCFException("Socket timeout reading database data: "+e.getMessage(),e);
+ is.close();
}
- catch (InterruptedIOException e)
+ }
+ catch (java.net.SocketTimeoutException e)
+ {
+ throw new ManifoldCFException("Socket timeout reading database data: "+e.getMessage(),e);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
+ }
+ }
+ else if (contents instanceof CharacterInput)
+ {
+ CharacterInput ci = (CharacterInput)contents;
+ long fileLength = ci.getUtf8StreamLength();
+
+ if (!activities.checkLengthIndexable(fileLength))
+ {
+ Logging.connectors.debug("JDBC: Document '"+id+"' excluded because of length - skipping");
+ activities.noDocument(id, version);
+ continue;
+ }
+
+ try
+ {
+ // Read the stream
+ InputStream is = ci.getUtf8Stream();
+ try
{
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ rd.setBinary(is,fileLength);
+ activities.ingestDocumentWithException(id, version, url, rd);
}
- catch (IOException e)
+ finally
{
- throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
+ is.close();
}
}
- else if (contents instanceof CharacterInput)
+ catch (java.net.SocketTimeoutException e)
+ {
+ throw new ManifoldCFException("Socket timeout reading database data: "+e.getMessage(),e);
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
{
- // An ingestion will take place for this document.
- RepositoryDocument rd = new RepositoryDocument();
+ throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
+ }
+ }
+ else
+ {
+ // Turn it into a string, and then into a stream
+ String value = contents.toString();
+ byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
+ long fileLength = bytes.length;
- // Default content type is application/octet-stream for binary data
- if (contentType == null)
- rd.setMimeType("text/plain; charset=utf-8");
- else
- rd.setMimeType(contentType);
-
- applyAccessTokens(rd,ts);
- applyMetadata(rd,row);
+ if (!activities.checkLengthIndexable(fileLength))
+ {
+ Logging.connectors.debug("JDBC: Document '"+id+"' excluded because of length - skipping");
+ activities.noDocument(id, version);
+ continue;
+ }
- CharacterInput ci = (CharacterInput)contents;
+ try
+ {
+ InputStream is = new ByteArrayInputStream(bytes);
try
{
- // Read the stream
- InputStream is = ci.getUtf8Stream();
- try
- {
- rd.setBinary(is,ci.getUtf8StreamLength());
- activities.ingestDocumentWithException(id, version, url, rd);
- }
- finally
- {
- is.close();
- }
- }
- catch (java.net.SocketTimeoutException e)
- {
- throw new ManifoldCFException("Socket timeout reading database data: "+e.getMessage(),e);
- }
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ rd.setBinary(is,fileLength);
+ activities.ingestDocumentWithException(id, version, url, rd);
}
- catch (IOException e)
+ finally
{
- throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
+ is.close();
}
}
- else
+ catch (InterruptedIOException e)
{
- // Turn it into a string, and then into a stream
- String value = contents.toString();
- try
- {
- byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
- RepositoryDocument rd = new RepositoryDocument();
-
- // Default content type is text/plain for character data
- if (contentType == null)
- rd.setMimeType("text/plain");
- else
- rd.setMimeType(contentType);
-
- applyAccessTokens(rd,ts);
- applyMetadata(rd,row);
-
- InputStream is = new ByteArrayInputStream(bytes);
- try
- {
- rd.setBinary(is,bytes.length);
- activities.ingestDocumentWithException(id, version, url, rd);
- }
- finally
- {
- is.close();
- }
- }
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
- {
- throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
- }
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
}
- }
- else
- {
- Logging.connectors.warn("JDBC: Document '"+id+"' excluded because of mime type - skipping");
- activities.noDocument(id,version);
}
}
else
{
- Logging.connectors.warn("JDBC: Document '"+id+"' seems to have null data - skipping");
+ Logging.connectors.debug("JDBC: Document '"+id+"' seems to have null data - skipping");
activities.noDocument(id,version);
}
}
else
{
- Logging.connectors.warn("JDBC: Document '"+id+"' has an illegal url: '"+url+"' - skipping");
+ Logging.connectors.debug("JDBC: Document '"+id+"' has an illegal url: '"+url+"' - skipping");
activities.noDocument(id,version);
}
}
else
{
- Logging.connectors.warn("JDBC: Document '"+id+"' has a null url - skipping");
+ Logging.connectors.debug("JDBC: Document '"+id+"' has a null url - skipping");
activities.noDocument(id,version);
}
}