You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/01/30 01:26:39 UTC

svn commit: r1440226 - in /lucene/dev/trunk/solr: ./ contrib/langid/src/java/org/apache/solr/update/processor/ contrib/langid/src/test/org/apache/solr/update/processor/

Author: janhoy
Date: Wed Jan 30 00:26:39 2013
New Revision: 1440226

URL: http://svn.apache.org/viewvc?rev=1440226&view=rev
Log:
SOLR-3967: langid.enforceSchema option checks source field instead of target field

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
    lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1440226&r1=1440225&r2=1440226&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Jan 30 00:26:39 2013
@@ -95,6 +95,8 @@ Bug Fixes
 
 * SOLR-4342: Fix DataImportHandler stats to be a prper Map (hossman)
 
+* SOLR-3967: langid.enforceSchema option checks source field instead of target field (janhoy)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java?rev=1440226&r1=1440225&r2=1440226&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java Wed Jan 30 00:26:39 2013
@@ -222,10 +222,6 @@ public abstract class LanguageIdentifier
             log.debug("Mapping field "+fieldName+" using document global language "+fieldLang);
           }
           String mappedOutputField = getMappedField(fieldName, fieldLang);
-          if(enforceSchema && schema.getFieldOrNull(fieldName) == null) {
-            log.warn("Unsuccessful field name mapping to {}, field does not exist, skipping mapping.", mappedOutputField, fieldName);
-            mappedOutputField = fieldName;
-          }
 
           if (mappedOutputField != null) {
             log.debug("Mapping field {} to {}", doc.getFieldValue(docIdField), fieldLang);
@@ -350,17 +346,23 @@ public abstract class LanguageIdentifier
 
   /**
    * Returns the name of the field to map the current contents into, so that they are properly analyzed.  For instance
-   * if the currentField is "text" and the code is "en", the new field would be "text_en".  If such a field doesn't exist,
-   * then null is returned.
+   * if the currentField is "text" and the code is "en", the new field would by default be "text_en".
+   * This method also performs custom regex pattern replace if configured. If enforceSchema=true
+   * and the resulting field name doesn't exist, then null is returned.
    *
    * @param currentField The current field name
    * @param language the language code
-   * @return The new schema field name, based on pattern and replace
+   * @return The new schema field name, based on pattern and replace, or null if illegal
    */
   protected String getMappedField(String currentField, String language) {
     String lc = lcMap.containsKey(language) ? lcMap.get(language) : language;
     String newFieldName = langPattern.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr)).replaceFirst(lc);
-    log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
+    if(enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
+      log.warn("Unsuccessful field name mapping from {} to {}, field does not exist and enforceSchema=true; skipping mapping.", currentField, newFieldName);
+      return null;
+    } else {
+      log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
+    }
     return newFieldName;
   }
 

Modified: lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java?rev=1440226&r1=1440225&r2=1440226&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java Wed Jan 30 00:26:39 2013
@@ -93,7 +93,7 @@ public abstract class LanguageIdentifier
     parameters = new ModifiableSolrParams();
     parameters.add("langid.fl", "name");
     parameters.add("langid.map.lcmap", "jp:s zh:cjk ko:cjk");
-    parameters.add("langid.enforceSchema", "true");
+    parameters.set("langid.enforceSchema", "false");
     liProcessor = createLangIdProcessor(parameters);
     
     assertEquals("test_no", liProcessor.getMappedField("test", "no"));
@@ -102,13 +102,17 @@ public abstract class LanguageIdentifier
     assertEquals("test_cjk", liProcessor.getMappedField("test", "zh"));
     assertEquals("test_cjk", liProcessor.getMappedField("test", "ko"));
 
-    // Prove support for other mapping regex
-    parameters.add("langid.map.pattern", "text_(.*?)_field");
-    parameters.add("langid.map.replace", "$1_{lang}Text");
+    // Test that enforceSchema correctly catches illegal field and returns null
+    parameters.set("langid.enforceSchema", "true");
     liProcessor = createLangIdProcessor(parameters);
+    assertEquals(null, liProcessor.getMappedField("inputfield", "sv"));
 
-    assertEquals("title_noText", liProcessor.getMappedField("text_title_field", "no"));
-    assertEquals("body_svText", liProcessor.getMappedField("text_body_field", "sv"));
+    // Prove support for other mapping regex, still with enforceSchema=true
+    parameters.add("langid.map.pattern", "text_(.*?)_field");
+    parameters.add("langid.map.replace", "$1_{lang}_s");
+    liProcessor = createLangIdProcessor(parameters);
+    assertEquals("title_no_s", liProcessor.getMappedField("text_title_field", "no"));
+    assertEquals("body_sv_s", liProcessor.getMappedField("text_body_field", "sv"));
   }
 
   @Test