You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by sh...@apache.org on 2015/07/05 13:41:53 UTC

svn commit: r1689239 - in /manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src: main/java/org/apache/manifoldcf/agents/output/lucene/ test/java/org/apache/manifoldcf/agents/output/lucene/tests/

Author: shinichiro
Date: Sun Jul  5 11:41:52 2015
New Revision: 1689239

URL: http://svn.apache.org/r1689239
Log:
fix bugs

Modified:
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java Sun Jul  5 11:41:52 2015
@@ -41,7 +41,7 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
 import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -130,12 +130,11 @@ public class LuceneClient implements Clo
     Analyzer indexAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldIndexAnalyzers);
     Analyzer queryAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldQueryAnalyzers);
 
-    IndexWriterConfig config
-      = new IndexWriterConfig(indexAnalyzer)
-        .setOpenMode(OpenMode.CREATE_OR_APPEND)
-        .setUseCompoundFile(false)
-        .setCommitOnClose(IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE)
-        .setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB * 6);
+    IndexWriterConfig config = new IndexWriterConfig(indexAnalyzer)
+      .setOpenMode(OpenMode.CREATE_OR_APPEND)
+      .setUseCompoundFile(false)
+      .setCommitOnClose(IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE)
+      .setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB * 6);
 
     Directory fsDir = FSDirectory.open(path);
     NRTCachingDirectory cachedDir = new NRTCachingDirectory(fsDir, 4, 48);
@@ -215,7 +214,7 @@ public class LuceneClient implements Clo
     return analyzersMap;
   }
 
-  private Map<String,Analyzer> createFieldAnalyzers(Map<String,Analyzer> analyzersMap, String target) throws IOException {
+  private Map<String,Analyzer> createFieldAnalyzers(Map<String,Analyzer> analyzersMap, String target) {
     Map<String,Analyzer> fieldAnalyzers = Maps.newHashMap();
     for (Map.Entry<String,Map<String,Object>> e : fieldsInfo.entrySet()) {
       if (e.getValue().get(ATTR_FIELDTYPE).toString().equals(FIELDTYPE_TEXT)) {
@@ -360,11 +359,11 @@ public class LuceneClient implements Clo
 
   public Query newQuery(String queryString) {
     String qstr = Objects.firstNonNull(queryString, "*:*");
-    Query query = null;
+    Query query;
     try {
       query = queryParser.parse(qstr, contentField);
     } catch (QueryNodeException e) {
-      query = new MatchAllDocsQuery();
+      query = new MatchNoDocsQuery();
     }
     return query;
   }
@@ -372,7 +371,7 @@ public class LuceneClient implements Clo
   public static String defaultPath() {
     String sep = StandardSystemProperty.FILE_SEPARATOR.value();
     String userDir = StandardSystemProperty.USER_DIR.value();
-    return userDir+sep+"lucene"+sep+"data"+sep+"index";
+    return userDir+sep+"lucene"+sep+"collection1"+sep+"data"+sep+"index";
   }
 
   public static String defaultCharfilters() {

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java Sun Jul  5 11:41:52 2015
@@ -8,7 +8,6 @@ import com.google.common.collect.Maps;
 public class LuceneClientManager {
 
   private static Map<String,LuceneClient> clients = Maps.newHashMap();
-  private static Map<String,String> versionStrings = Maps.newHashMap();
 
   private LuceneClientManager() { }
 
@@ -36,7 +35,7 @@ public class LuceneClientManager {
           LuceneClient.parseAsMap(analyzers),
           LuceneClient.parseAsMap(fields),
           idField, contentField);
-      String activeVersion = versionStrings.get(path);
+      String activeVersion = client.versionString();
       if (!activeVersion.equals(latestVersion)) {
         throw new IllegalStateException("The connection on this path is active. Can not update to the latest settings."
           + " Active settings:" + activeVersion
@@ -56,7 +55,6 @@ public class LuceneClientManager {
                            charfilters, tokenizers, filters, analyzers, fields,
                            idField, contentField);
     clients.put(path, client);
-    versionStrings.put(path, client.versionString());
     return client;
   }
 

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java Sun Jul  5 11:41:52 2015
@@ -118,7 +118,7 @@ public class LuceneConnector extends org
       {
         client.close();
       } catch (IOException e) {
-        Logging.connectors.error("disconnect fails:", e);
+        Logging.connectors.error("Failed to disconnect:", e);
       }
       client = null;
       expirationTime = -1L;
@@ -128,7 +128,7 @@ public class LuceneConnector extends org
 
   protected void getSession() throws ManifoldCFException
   {
-    if (client == null)
+    if (client == null || !client.isOpen())
     {
       final String path = params.getParameter(LuceneConfig.PARAM_PATH);
       if (path == null)
@@ -203,7 +203,7 @@ public class LuceneConnector extends org
         {
           client.close();
         } catch (IOException e) {
-          Logging.connectors.error("poll fails:", e);
+          Logging.connectors.error("Failed to poll:", e);
         }
         client = null;
         expirationTime = -1L;
@@ -313,14 +313,14 @@ public class LuceneConnector extends org
   {
     getSession();
 
-    LuceneDocument inputDoc = buildDocument(documentURI, document);
-
     long startTime = System.currentTimeMillis();
     try
     {
+      LuceneDocument inputDoc = buildDocument(documentURI, document);
       client.addOrReplace(documentURI, inputDoc);
       activities.recordActivity(startTime, INGEST_ACTIVITY, null, documentURI, "OK", "Document Indexed");
-    } catch (IOException e) {
+    } catch (Exception e) {
+      Logging.connectors.error("Failed to addOrReplaceDocumentWithException:" + documentURI, e);
       String activityCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
       String activityDetails = e.getMessage() + ((e.getCause() != null) ? ": "+ e.getCause().getMessage() : "");
       activities.recordActivity(startTime, INGEST_ACTIVITY, null, documentURI, activityCode, activityDetails);
@@ -329,7 +329,7 @@ public class LuceneConnector extends org
     return DOCUMENTSTATUS_ACCEPTED;
   }
 
-  private LuceneDocument buildDocument(String documentURI, RepositoryDocument document) {
+  private LuceneDocument buildDocument(String documentURI, RepositoryDocument document) throws Exception {
     LuceneDocument doc = new LuceneDocument();
 
     doc = LuceneDocument.addField(doc, client.idField(), documentURI, client.fieldsInfo());
@@ -339,8 +339,12 @@ public class LuceneConnector extends org
       StringWriter writer = new StringWriter();
       IOUtils.copy(document.getBinaryStream(), writer, StandardCharsets.UTF_8);
       doc = LuceneDocument.addField(doc, client.contentField(), writer.toString(), client.fieldsInfo());
-    } catch (IOException e) {
-      Logging.connectors.error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer", e);
+    } catch (Exception e) {
+      if (e instanceof IOException) {
+        Logging.connectors.error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer " + documentURI, e);
+      } else {
+        throw e;
+      }
     }
 
     Iterator<String> it = document.getFields();
@@ -354,7 +358,7 @@ public class LuceneConnector extends org
             doc = LuceneDocument.addField(doc, rdField, value, client.fieldsInfo());
           }
         } catch (IOException e) {
-          Logging.connectors.error("[Getting Field Values]Impossible to read value for metadata " + rdField, e);
+          Logging.connectors.error("[Getting Field Values]Impossible to read value for metadata " + rdField + " " + documentURI, e);
         }
       }
     }
@@ -413,7 +417,7 @@ public class LuceneConnector extends org
     {
       client.optimize();
     } catch (IOException e) {
-      Logging.connectors.error("noteJobComplete fails:", e);
+      Logging.connectors.error("Failed to noteJobComplete:", e);
     }
   }
 

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java?rev=1689239&r1=1689238&r2=1689239&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java Sun Jul  5 11:41:52 2015
@@ -112,7 +112,7 @@ public class LuceneClientTest {
 
   @Test
   public void testGetClientFromManager() throws Exception {
-    String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"getclientfrommager-index";
+    String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"getclientfrommanager-index";
 
     LuceneClient client1 =
       LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
@@ -139,6 +139,7 @@ public class LuceneClientTest {
     client1.close();
     assertThat(client1.isOpen(), is(false));
     assertThat(client2.isOpen(), is(false));
+    assertThat(client1, is(client2));
 
     client3 =
       LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
@@ -272,18 +273,18 @@ public class LuceneClientTest {
       }
       assertThat(client.reader().docFreq(new Term(CONTENT, br)), is(3));
 
-      hits = searcher.search(client.newQuery(ID+":\\/repo\\/003"), 1);
+      hits = searcher.search(client.newQuery("id:\\/repo\\/003"), 1);
       Document storedDocument = searcher.doc(hits.scoreDocs[0].doc);
       assertThat(storedDocument.getField(CONTENT).stringValue(), is("Apache Solr"));
 
-      String rt = "realtime";
+      String nrt = "near-real-time";
       LuceneDocument doc4 = new LuceneDocument()
-        .addStringField(ID, rt, true);
-      client.addOrReplace(rt, doc4);
-      ManifoldCF.sleep(2000L);
-      assertThat(searcher.count(client.newQuery(ID+":"+rt)), is(0));
-      assertThat(client.newSearcher().count(client.newQuery(ID+":"+rt)), is(0));
-      assertThat(client.newRealtimeSearcher().count(client.newQuery(ID+":"+rt)), is(1));
+        .addStringField(ID, nrt, true);
+      client.addOrReplace(nrt, doc4);
+      ManifoldCF.sleep(1500L);
+      assertThat(searcher.count(client.newQuery(ID+":"+nrt)), is(0));
+      assertThat(client.newSearcher().count(client.newQuery(ID+":"+nrt)), is(0));
+      assertThat(client.newRealtimeSearcher().count(client.newQuery(ID+":"+nrt)), is(1));
     }
   }
 
@@ -297,19 +298,17 @@ public class LuceneClientTest {
 
     String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"rd-index";
     try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
-
-      Map<String,Map<String,Object>> fieldsInfo = client.fieldsInfo();
-
       LuceneDocument doc = new LuceneDocument();
-      doc = LuceneDocument.addField(doc, client.idField(), documentURI, fieldsInfo);
+
+      doc = LuceneDocument.addField(doc, client.idField(), documentURI, client.fieldsInfo());
 
       Iterator<String> it = rd.getFields();
       while (it.hasNext()) {
         String rdField = it.next();
-        if (fieldsInfo.containsKey(rdField)) {
+        if (client.fieldsInfo().containsKey(rdField)) {
           String[] values = rd.getFieldAsStrings(rdField);
           for (String value : values) {
-            doc = LuceneDocument.addField(doc, rdField, value, fieldsInfo);
+            doc = LuceneDocument.addField(doc, rdField, value, client.fieldsInfo());
           }
         }
       }