You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/01/30 01:26:39 UTC
svn commit: r1440226 - in /lucene/dev/trunk/solr: ./
contrib/langid/src/java/org/apache/solr/update/processor/
contrib/langid/src/test/org/apache/solr/update/processor/
Author: janhoy
Date: Wed Jan 30 00:26:39 2013
New Revision: 1440226
URL: http://svn.apache.org/viewvc?rev=1440226&view=rev
Log:
SOLR-3967: langid.enforceSchema option checks source field instead of target field
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1440226&r1=1440225&r2=1440226&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Jan 30 00:26:39 2013
@@ -95,6 +95,8 @@ Bug Fixes
* SOLR-4342: Fix DataImportHandler stats to be a prper Map (hossman)
+* SOLR-3967: langid.enforceSchema option checks source field instead of target field (janhoy)
+
Optimizations
----------------------
Modified: lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java?rev=1440226&r1=1440225&r2=1440226&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java Wed Jan 30 00:26:39 2013
@@ -222,10 +222,6 @@ public abstract class LanguageIdentifier
log.debug("Mapping field "+fieldName+" using document global language "+fieldLang);
}
String mappedOutputField = getMappedField(fieldName, fieldLang);
- if(enforceSchema && schema.getFieldOrNull(fieldName) == null) {
- log.warn("Unsuccessful field name mapping to {}, field does not exist, skipping mapping.", mappedOutputField, fieldName);
- mappedOutputField = fieldName;
- }
if (mappedOutputField != null) {
log.debug("Mapping field {} to {}", doc.getFieldValue(docIdField), fieldLang);
@@ -350,17 +346,23 @@ public abstract class LanguageIdentifier
/**
* Returns the name of the field to map the current contents into, so that they are properly analyzed. For instance
- * if the currentField is "text" and the code is "en", the new field would be "text_en". If such a field doesn't exist,
- * then null is returned.
+ * if the currentField is "text" and the code is "en", the new field would by default be "text_en".
+ * This method also performs custom regex pattern replace if configured. If enforceSchema=true
+ * and the resulting field name doesn't exist, then null is returned.
*
* @param currentField The current field name
* @param language the language code
- * @return The new schema field name, based on pattern and replace
+ * @return The new schema field name, based on pattern and replace, or null if illegal
*/
protected String getMappedField(String currentField, String language) {
String lc = lcMap.containsKey(language) ? lcMap.get(language) : language;
String newFieldName = langPattern.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr)).replaceFirst(lc);
- log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
+ if(enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
+ log.warn("Unsuccessful field name mapping from {} to {}, field does not exist and enforceSchema=true; skipping mapping.", currentField, newFieldName);
+ return null;
+ } else {
+ log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
+ }
return newFieldName;
}
Modified: lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java?rev=1440226&r1=1440225&r2=1440226&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java Wed Jan 30 00:26:39 2013
@@ -93,7 +93,7 @@ public abstract class LanguageIdentifier
parameters = new ModifiableSolrParams();
parameters.add("langid.fl", "name");
parameters.add("langid.map.lcmap", "jp:s zh:cjk ko:cjk");
- parameters.add("langid.enforceSchema", "true");
+ parameters.set("langid.enforceSchema", "false");
liProcessor = createLangIdProcessor(parameters);
assertEquals("test_no", liProcessor.getMappedField("test", "no"));
@@ -102,13 +102,17 @@ public abstract class LanguageIdentifier
assertEquals("test_cjk", liProcessor.getMappedField("test", "zh"));
assertEquals("test_cjk", liProcessor.getMappedField("test", "ko"));
- // Prove support for other mapping regex
- parameters.add("langid.map.pattern", "text_(.*?)_field");
- parameters.add("langid.map.replace", "$1_{lang}Text");
+ // Test that enforceSchema correctly catches illegal field and returns null
+ parameters.set("langid.enforceSchema", "true");
liProcessor = createLangIdProcessor(parameters);
+ assertEquals(null, liProcessor.getMappedField("inputfield", "sv"));
- assertEquals("title_noText", liProcessor.getMappedField("text_title_field", "no"));
- assertEquals("body_svText", liProcessor.getMappedField("text_body_field", "sv"));
+ // Prove support for other mapping regex, still with enforceSchema=true
+ parameters.add("langid.map.pattern", "text_(.*?)_field");
+ parameters.add("langid.map.replace", "$1_{lang}_s");
+ liProcessor = createLangIdProcessor(parameters);
+ assertEquals("title_no_s", liProcessor.getMappedField("text_title_field", "no"));
+ assertEquals("body_sv_s", liProcessor.getMappedField("text_body_field", "sv"));
}
@Test