You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by sh...@apache.org on 2015/07/05 13:41:53 UTC
svn commit: r1689239 - in
/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src:
main/java/org/apache/manifoldcf/agents/output/lucene/
test/java/org/apache/manifoldcf/agents/output/lucene/tests/
Author: shinichiro
Date: Sun Jul 5 11:41:52 2015
New Revision: 1689239
URL: http://svn.apache.org/r1689239
Log:
fix bugs
Modified:
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java Sun Jul 5 11:41:52 2015
@@ -41,7 +41,7 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -130,12 +130,11 @@ public class LuceneClient implements Clo
Analyzer indexAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldIndexAnalyzers);
Analyzer queryAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldQueryAnalyzers);
- IndexWriterConfig config
- = new IndexWriterConfig(indexAnalyzer)
- .setOpenMode(OpenMode.CREATE_OR_APPEND)
- .setUseCompoundFile(false)
- .setCommitOnClose(IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE)
- .setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB * 6);
+ IndexWriterConfig config = new IndexWriterConfig(indexAnalyzer)
+ .setOpenMode(OpenMode.CREATE_OR_APPEND)
+ .setUseCompoundFile(false)
+ .setCommitOnClose(IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE)
+ .setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB * 6);
Directory fsDir = FSDirectory.open(path);
NRTCachingDirectory cachedDir = new NRTCachingDirectory(fsDir, 4, 48);
@@ -215,7 +214,7 @@ public class LuceneClient implements Clo
return analyzersMap;
}
- private Map<String,Analyzer> createFieldAnalyzers(Map<String,Analyzer> analyzersMap, String target) throws IOException {
+ private Map<String,Analyzer> createFieldAnalyzers(Map<String,Analyzer> analyzersMap, String target) {
Map<String,Analyzer> fieldAnalyzers = Maps.newHashMap();
for (Map.Entry<String,Map<String,Object>> e : fieldsInfo.entrySet()) {
if (e.getValue().get(ATTR_FIELDTYPE).toString().equals(FIELDTYPE_TEXT)) {
@@ -360,11 +359,11 @@ public class LuceneClient implements Clo
public Query newQuery(String queryString) {
String qstr = Objects.firstNonNull(queryString, "*:*");
- Query query = null;
+ Query query;
try {
query = queryParser.parse(qstr, contentField);
} catch (QueryNodeException e) {
- query = new MatchAllDocsQuery();
+ query = new MatchNoDocsQuery();
}
return query;
}
@@ -372,7 +371,7 @@ public class LuceneClient implements Clo
public static String defaultPath() {
String sep = StandardSystemProperty.FILE_SEPARATOR.value();
String userDir = StandardSystemProperty.USER_DIR.value();
- return userDir+sep+"lucene"+sep+"data"+sep+"index";
+ return userDir+sep+"lucene"+sep+"collection1"+sep+"data"+sep+"index";
}
public static String defaultCharfilters() {
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java Sun Jul 5 11:41:52 2015
@@ -8,7 +8,6 @@ import com.google.common.collect.Maps;
public class LuceneClientManager {
private static Map<String,LuceneClient> clients = Maps.newHashMap();
- private static Map<String,String> versionStrings = Maps.newHashMap();
private LuceneClientManager() { }
@@ -36,7 +35,7 @@ public class LuceneClientManager {
LuceneClient.parseAsMap(analyzers),
LuceneClient.parseAsMap(fields),
idField, contentField);
- String activeVersion = versionStrings.get(path);
+ String activeVersion = client.versionString();
if (!activeVersion.equals(latestVersion)) {
throw new IllegalStateException("The connection on this path is active. Can not update to the latest settings."
+ " Active settings:" + activeVersion
@@ -56,7 +55,6 @@ public class LuceneClientManager {
charfilters, tokenizers, filters, analyzers, fields,
idField, contentField);
clients.put(path, client);
- versionStrings.put(path, client.versionString());
return client;
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java Sun Jul 5 11:41:52 2015
@@ -118,7 +118,7 @@ public class LuceneConnector extends org
{
client.close();
} catch (IOException e) {
- Logging.connectors.error("disconnect fails:", e);
+ Logging.connectors.error("Failed to disconnect:", e);
}
client = null;
expirationTime = -1L;
@@ -128,7 +128,7 @@ public class LuceneConnector extends org
protected void getSession() throws ManifoldCFException
{
- if (client == null)
+ if (client == null || !client.isOpen())
{
final String path = params.getParameter(LuceneConfig.PARAM_PATH);
if (path == null)
@@ -203,7 +203,7 @@ public class LuceneConnector extends org
{
client.close();
} catch (IOException e) {
- Logging.connectors.error("poll fails:", e);
+ Logging.connectors.error("Failed to poll:", e);
}
client = null;
expirationTime = -1L;
@@ -313,14 +313,14 @@ public class LuceneConnector extends org
{
getSession();
- LuceneDocument inputDoc = buildDocument(documentURI, document);
-
long startTime = System.currentTimeMillis();
try
{
+ LuceneDocument inputDoc = buildDocument(documentURI, document);
client.addOrReplace(documentURI, inputDoc);
activities.recordActivity(startTime, INGEST_ACTIVITY, null, documentURI, "OK", "Document Indexed");
- } catch (IOException e) {
+ } catch (Exception e) {
+ Logging.connectors.error("Failed to addOrReplaceDocumentWithException:" + documentURI, e);
String activityCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
String activityDetails = e.getMessage() + ((e.getCause() != null) ? ": "+ e.getCause().getMessage() : "");
activities.recordActivity(startTime, INGEST_ACTIVITY, null, documentURI, activityCode, activityDetails);
@@ -329,7 +329,7 @@ public class LuceneConnector extends org
return DOCUMENTSTATUS_ACCEPTED;
}
- private LuceneDocument buildDocument(String documentURI, RepositoryDocument document) {
+ private LuceneDocument buildDocument(String documentURI, RepositoryDocument document) throws Exception {
LuceneDocument doc = new LuceneDocument();
doc = LuceneDocument.addField(doc, client.idField(), documentURI, client.fieldsInfo());
@@ -339,8 +339,12 @@ public class LuceneConnector extends org
StringWriter writer = new StringWriter();
IOUtils.copy(document.getBinaryStream(), writer, StandardCharsets.UTF_8);
doc = LuceneDocument.addField(doc, client.contentField(), writer.toString(), client.fieldsInfo());
- } catch (IOException e) {
- Logging.connectors.error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer", e);
+ } catch (Exception e) {
+ if (e instanceof IOException) {
+ Logging.connectors.error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer " + documentURI, e);
+ } else {
+ throw e;
+ }
}
Iterator<String> it = document.getFields();
@@ -354,7 +358,7 @@ public class LuceneConnector extends org
doc = LuceneDocument.addField(doc, rdField, value, client.fieldsInfo());
}
} catch (IOException e) {
- Logging.connectors.error("[Getting Field Values]Impossible to read value for metadata " + rdField, e);
+ Logging.connectors.error("[Getting Field Values]Impossible to read value for metadata " + rdField + " " + documentURI, e);
}
}
}
@@ -413,7 +417,7 @@ public class LuceneConnector extends org
{
client.optimize();
} catch (IOException e) {
- Logging.connectors.error("noteJobComplete fails:", e);
+ Logging.connectors.error("Failed to noteJobComplete:", e);
}
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java Sun Jul 5 11:41:52 2015
@@ -112,7 +112,7 @@ public class LuceneClientTest {
@Test
public void testGetClientFromManager() throws Exception {
- String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"getclientfrommager-index";
+ String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"getclientfrommanager-index";
LuceneClient client1 =
LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
@@ -139,6 +139,7 @@ public class LuceneClientTest {
client1.close();
assertThat(client1.isOpen(), is(false));
assertThat(client2.isOpen(), is(false));
+ assertThat(client1, is(client2));
client3 =
LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
@@ -272,18 +273,18 @@ public class LuceneClientTest {
}
assertThat(client.reader().docFreq(new Term(CONTENT, br)), is(3));
- hits = searcher.search(client.newQuery(ID+":\\/repo\\/003"), 1);
+ hits = searcher.search(client.newQuery("id:\\/repo\\/003"), 1);
Document storedDocument = searcher.doc(hits.scoreDocs[0].doc);
assertThat(storedDocument.getField(CONTENT).stringValue(), is("Apache Solr"));
- String rt = "realtime";
+ String nrt = "near-real-time";
LuceneDocument doc4 = new LuceneDocument()
- .addStringField(ID, rt, true);
- client.addOrReplace(rt, doc4);
- ManifoldCF.sleep(2000L);
- assertThat(searcher.count(client.newQuery(ID+":"+rt)), is(0));
- assertThat(client.newSearcher().count(client.newQuery(ID+":"+rt)), is(0));
- assertThat(client.newRealtimeSearcher().count(client.newQuery(ID+":"+rt)), is(1));
+ .addStringField(ID, nrt, true);
+ client.addOrReplace(nrt, doc4);
+ ManifoldCF.sleep(1500L);
+ assertThat(searcher.count(client.newQuery(ID+":"+nrt)), is(0));
+ assertThat(client.newSearcher().count(client.newQuery(ID+":"+nrt)), is(0));
+ assertThat(client.newRealtimeSearcher().count(client.newQuery(ID+":"+nrt)), is(1));
}
}
@@ -297,19 +298,17 @@ public class LuceneClientTest {
String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"rd-index";
try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
-
- Map<String,Map<String,Object>> fieldsInfo = client.fieldsInfo();
-
LuceneDocument doc = new LuceneDocument();
- doc = LuceneDocument.addField(doc, client.idField(), documentURI, fieldsInfo);
+
+ doc = LuceneDocument.addField(doc, client.idField(), documentURI, client.fieldsInfo());
Iterator<String> it = rd.getFields();
while (it.hasNext()) {
String rdField = it.next();
- if (fieldsInfo.containsKey(rdField)) {
+ if (client.fieldsInfo().containsKey(rdField)) {
String[] values = rd.getFieldAsStrings(rdField);
for (String value : values) {
- doc = LuceneDocument.addField(doc, rdField, value, fieldsInfo);
+ doc = LuceneDocument.addField(doc, rdField, value, client.fieldsInfo());
}
}
}