You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by "alaa.abuzaghleh" <al...@gmail.com> on 2014/12/11 22:52:08 UTC
Mutli Lengual Suggester Solr 4.8
I am trying create suggester handler using solr 4.8, everything work fine but
when I try to get suggestion using different language Arabic, or Japanese
for example I got result in mixed language, but I am trying to search only
using Japanese, I got Arabic with that too. the following is my Schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="people_schema" version="1.5">
<fields>
<field name="_version_" type="long" indexed="true" stored="true" />
<field name="id" type="string" indexed="true" stored="true"
required="true" />
<field name="first_name" type="txt_general" indexed="true"
stored="true" multiValued="false" />
<field name="last_name" type="txt_general" indexed="true"
stored="true" multiValued="false" />
<field name="about" type="text_general_edge_ngram" indexed="true"
stored="true" multiValued="false" />
<field name="year_birth" type="tint" indexed="true" stored="true"
multiValued="false" />
<field name="month_birth" type="tint" indexed="true" stored="true"
multiValued="false" />
<field name="day_birth" type="tint" indexed="true" stored="true"
multiValued="false" />
<field name="country" type="string" indexed="true" stored="true"
required="false" multiValued="false" />
<field name="country_tree" type="placetree" indexed="true"
stored="false" multiValued="false" />
<field name="state" type="string" indexed="true" stored="true"
required="false" multiValued="false" />
<field name="state_tree" type="placetree" indexed="true" stored="false"
multiValued="false" />
<field name="city" type="string" indexed="true" stored="true"
required="false" multiValued="false" />
<field name="city_tree" type="placetree" indexed="true" stored="false"
multiValued="false" />
<field name="job" type="string" indexed="true" stored="true"
required="false" multiValued="false" />
<field name="job_tree" type="txt_general" indexed="true" stored="true"
multiValued="false" />
<field name="company" type="string" indexed="true" stored="true"
required="false" multiValued="false" />
<field name="company_tree" type="companytree" indexed="true"
stored="false" multiValued="false" />
<field name="full_name" type="txt_general" indexed="true"
stored="true" multiValued="false" />
<field name="full_name_suggest" type="text_suggest" indexed="true"
stored="true" multiValued="false" />
<field name="full_name_edge" type="text_suggest_edge" indexed="true"
stored="true" multiValued="false" />
<field name="full_name_ngram" type="text_suggest_ngram" indexed="true"
stored="true" multiValued="false" />
<field name="full_name_sort" type="alphaNumericSort" indexed="true"
stored="true" multiValued="false" />
<field name="job_suggest" type="text_suggest" indexed="true"
stored="true" multiValued="false" />
<field name="job_edge" type="text_suggest_edge" indexed="true"
stored="true" multiValued="false" />
<field name="job_ngram" type="text_suggest_ngram" indexed="true"
stored="true" multiValued="false" />
<field name="job_sort" type="alphaNumericSort" indexed="true"
stored="true" multiValued="false" />
<copyField source="full_name" dest="full_name_suggest" />
<copyField source="full_name" dest="full_name_edge" />
<copyField source="full_name" dest="full_name_ngram" />
<copyField source="full_name" dest="full_name_sort" />
<copyField source="job_tree" dest="job_suggest" />
<copyField source="job_tree" dest="job_edge" />
<copyField source="job_tree" dest="job_ngram" />
<copyField source="job_tree" dest="job_sort" />
</fields>
<uniqueKey>id</uniqueKey>
<types>
<fieldType name="string" class="solr.StrField"
sortMissingLast="true" />
<fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true" />
<fieldType name="int" class="solr.TrieIntField"
precisionStep="0" positionIncrementGap="0" />
<fieldType name="float" class="solr.TrieFloatField"
precisionStep="0" positionIncrementGap="0" />
<fieldType name="long" class="solr.TrieLongField"
precisionStep="0" positionIncrementGap="0" />
<fieldType name="double" class="solr.TrieDoubleField"
precisionStep="0" positionIncrementGap="0" />
<fieldType name="tint" class="solr.TrieIntField"
precisionStep="8" positionIncrementGap="0" />
<fieldType name="tfloat" class="solr.TrieFloatField"
precisionStep="8" positionIncrementGap="0" />
<fieldType name="tlong" class="solr.TrieLongField"
precisionStep="8" positionIncrementGap="0" />
<fieldType name="tdouble" class="solr.TrieDoubleField"
precisionStep="8" positionIncrementGap="0" />
<fieldType name="date" class="solr.TrieDateField"
precisionStep="0" positionIncrementGap="0" />
<fieldType name="tdate" class="solr.TrieDateField"
precisionStep="6" positionIncrementGap="0" />
<fieldtype name="binary" class="solr.BinaryField" />
<fieldType name="text_general_edge_ngram" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.LowerCaseTokenizerFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2"
maxGramSize="15" side="front" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.LowerCaseTokenizerFactory" />
</analyzer>
</fieldType>
<fieldType name="txt_general" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldtype name="name_phonetic" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false" />
</analyzer>
</fieldtype>
<fieldType name="placetree" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="jobtree" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="companytree" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="text_suggest_ngram" class="solr.TextField">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="20"
minGramSize="1" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ
])"
replacement="" replace="all" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ
])"
replacement="" replace="all" />
<filter class="solr.PatternReplaceFilterFactory" pattern="^(.{20})(.*)?"
replacement="$1" replace="all" />
</analyzer>
</fieldType>
<fieldType name="alphaNumericSort" class="solr.TextField"
sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.TrimFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory" pattern="^(a |the |les
|la |le |l'|de la |du |des )"
replacement="" replace="all" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z0-9])"
replacement="" replace="all" />
</analyzer>
</fieldType>
<fieldType name="text_suggest_edge" class="solr.TextField">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])"
replacement=" " replace="all" />
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="30"
minGramSize="1" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ
])"
replacement="" replace="all" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
<tokenizer class="solr.KeywordTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])"
replacement=" " replace="all" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ
])"
replacement="" replace="all" />
<filter class="solr.PatternReplaceFilterFactory" pattern="^(.{30})(.*)?"
replacement="$1" replace="all" />
</analyzer>
</fieldType>
<fieldType name="text_suggest" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"
splitOnNumerics="1" preserveOriginal="1" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ
])"
replacement=" " replace="all" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt" />
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"
splitOnNumerics="0" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ
])"
replacement=" " replace="all" />
</analyzer>
</fieldType>
</types>
</schema>
and this is my SolrConfig
<?xml version="1.0" encoding="UTF-8" ?>
<config>
<luceneMatchVersion>4.8</luceneMatchVersion>
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.StandardDirectoryFactory}" />
<dataDir>${solr.core0.data.dir:}</dataDir>
<schemaFactory class="ClassicIndexSchemaFactory" />
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.core0.data.dir:}</str>
</updateLog>
</updateHandler>
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
</lst>
</requestHandler>
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="df">id</str>
</lst>
</requestHandler>
<requestHandler name="/suggest" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="defType">edismax</str>
<str name="rows">10</str>
<str name="fl">full_name,job_tree, company, city, state, country,
first_name, last_name, id</str>
<str name="qf">full_name_suggest^60 full_name_ngram^100.0 job_suggest^30
job_ngram^50.0 </str>
<str name="pf">full_name_edge^100.0 job_edge^50.0</str>
<str name="group">true</str>
<str name="group.field">full_name</str>
<str name="sort">full_name asc</str>
<str name="group.sort">full_name asc</str>
</lst>
</requestHandler>
<requestHandler name="/replication" class="solr.ReplicationHandler"
startup="lazy" />
<requestDispatcher handleSelect="true">
<requestParsers enableRemoteStreaming="false"
multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" />
</requestDispatcher>
<requestHandler name="standard" class="solr.StandardRequestHandler"
default="true" />
<requestHandler name="/analysis/field" startup="lazy"
class="solr.FieldAnalysisRequestHandler" />
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<requestHandler name="/admin/"
class="org.apache.solr.handler.admin.AdminHandlers" />
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>
<admin>
<defaultQuery>solr</defaultQuery>
</admin>
</config>
the following is the result for
(http://localhost:9090/solr/people/suggest?q=%E3%82%B7%E3%82%B9%E3%83%86%E3%83%A0%E3%82%A2%E3%83%8A%E3%83%AA%E3%82%B9%E3%83%88&wt=json&indent=true)
{
"responseHeader":{
"status":0,
"QTime":8,
"params":{
"indent":"true",
"q":"システムアナリスト",
"wt":"json"}},
"grouped":{
"full_name":{
"matches":2,
"groups":[{
"groupValue":"مسعود",
"doclist":{"numFound":1,"start":0,"docs":[
{
"job_tree":"رسام كاريكاتور",
"last_name":"النغش",
"state":"Amman",
"country":"Jordan",
"city":"Amman",
"id":"fa0a5f94-0497-49f6-9060-ec45c27c0d8e",
"company":"شركة الفنون المتطورة",
"full_name":"مسعود النغش",
"first_name":"مسعود "}]
}},
{
"groupValue":"ね",
"doclist":{"numFound":1,"start":0,"docs":[
{
"job_tree":"システムアナリスト",
"last_name":"シャン",
"state":"Tokyo",
"country":"Japan",
"city":"Tokyo",
"id":"4fdce27b-3a9b-4045-85f3-2d5087d97b50",
"company":"日立",
"full_name":"すね シャン",
"first_name":"すね"}]
}}]}}}
I dont now why it bring the Arabic text with it the result is the same if I
try to search for the Arabic. Any help from you will be highly appreciated.
--
View this message in context: http://lucene.472066.n3.nabble.com/Mutli-Lengual-Suggester-Solr-4-8-tp4173880.html
Sent from the Solr - User mailing list archive at Nabble.com.