You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by Sumit Sen <su...@yahoo.com> on 2012/01/24 22:14:25 UTC

Fw: Problem with SpliBy in Solr 3.4



----- Forwarded Message -----
From: Sumit Sen <su...@yahoo.com>
To: Solr List <so...@lucene.apache.org> 
Sent: Tuesday, January 24, 2012 3:53 PM
Subject: Problem with SpliBy in Solr 3.4


Hi All:

I have a very silly problem. I am using Solr 3.4. I have a data import handle for indexing which is not Spliting a field data by '|' inspite of following setup.

    <document>
  <entity dataSource="ds-1" name="associate" pk="id" 
    transformer="RegexTransformer" 
    query="Select case  when EMPLID != ' ' then EMPLID END as ID   ,
       case  when FIRST_NAME   != ' ' then FIRST_NAME  END as firstName,
       case  when MIDDLE_NAME  != ' ' then MIDDLE_NAME END as middleName,
       case  when LAST_NAME   != ' ' then LAST_NAME END as familyName,
       case  when FORMER_NAME != ' ' then FORMER_NAME END as middleName,
       case  when EMAIL_ADDRESS  != ' ' then EMAIL_ADDRESS END as businessEmail,
       case  when CITY != ' ' then CITY END as homeCity,
       case  when STATE != ' ' then STATE END  as homeCState,
       case  when ZIP != ' ' then ZIP END         as homeZip,
       case  when COUNTRY_ISO  != ' ' then COUNTRY_ISO END as homeCountry,
       case  when WORK_PHONE  != ' ' then WORK_PHONE END as businessTel,
       (select xlatlongname
       from xlattable
      where fieldname = 'PER_STATUS' and fieldvalue = t1.per_status
       and language_cd = 'ENG') as PER_STATUS,
       case when ORIG_HIRE_DT IS NOT NULL then ORIG_HIRE_DT END as hireDate,
       (select xlatlongname
       from xlattable
      where fieldname = 'SEX' and fieldvalue = t1.sex
       and language_cd = 'ENG') as sex,
       (select xlatlongname
       from xlattable
      where fieldname = 'ETHNIC_GROUP' and fieldvalue = t1.ethnic_group
       and language_cd = 'ENG')           as ethnicityCode,
       case when CITZNS_CNTRY_ISO != ' ' then  CITZNS_CNTRY_ISO END      as citizenship,
       (select xlatlongname
       from xlattable
      where fieldname = 'MAR_STATUS' and fieldvalue = t1.mar_status
       and language_cd = 'ENG')           as marritalStatus,
       case when PREFERRED_LANGUAGE  != ' ' then PREFERRED_LANGUAGE END as primaryLanguageCode,
       case when BUSINESS_TITLE != ' ' then BUSINESS_TITLE END as businessTitle,
       case when TITLE != ' ' then TITLE END     as title,
       case when JOBCODE != ' ' then JOBCODE END            ,
       (select xlatlongname
       from xlattable
      where fieldname = 'EMPL_STATUS' and fieldvalue = t1.empl_status
       and language_cd = 'ENG')           as workLevelStatus,
       case when LOCATION  != ' ' then LOCATION END         ,
       case when CITY_EMPL  != ' ' then CITY_EMPL END       ,
       case when STATE_EMPL  != ' ' then STATE_EMPL END    ,
       case when  COUNTRY_2CHAR  != ' ' then COUNTRY_2CHAR END          ,
       case when ZIP_INTL != ' ' then ZIP_INTL END         ,
      (select xlatlongname
       from xlattable
      where fieldname = 'EMPL_TYPE' and fieldvalue = t1.empl_type
       and language_cd = 'ENG')           as employmenttype,
       case when HOME_DEPARTMENT != ' ' then HOME_DEPARTMENT END   as DEPARTMENT,
       (Select case when name != ' ' then name end from ps_personal_data where employee_oid = t1.REPORTS_TO_AOID) as reportsTo,
       case when t1.ROLE_CODE1 != ' ' then   t1.ROLE_CODE1 end ||'|'||
       case when t1.ROLE_CODE2 != ' ' then  t1.ROLE_CODE2 end  ||'|'|| 
       case when t1.ROLE_CODE3 != ' ' then t1.ROLE_CODE3  end   ||'|'|| 
       case when t1.EE_ROLE_CODE1 != ' ' then t1.EE_ROLE_CODE1 end ||'|'||
       case when t1.EE_ROLE_CODE2 != ' ' then t1.EE_ROLE_CODE2 end ||'|'||
       case when t1.EE_ROLE_CODE3 != ' ' then t1.EE_ROLE_CODE3 end ||'|'|| 
       case when t1.EE_ROLE_CODE4 != ' ' then t1.EE_ROLE_CODE4 end ||'|'||
       case when t1.EE_ROLE_CODE5 != ' ' then t1.EE_ROLE_CODE5 end ||'|'||
       case when t1.EE_ROLE_CODE6 != ' ' then t1.EE_ROLE_CODE6 end as roleCode
   From     PS_BOD_EE_VW t1 where t1.per_status = 'A'">
   <field column = "id" />
   <field column = "title" />
   <field column = "firstName" />
   <field column = "middleName" />
   <field column = "familyName" />
   <field column = "maidenName" />
   <field column = "primaryLanguageCode" />
    ...
    ...
   <field column = "education" />
   <field column = "roleCode" splitBy = "\|" name="roleCode" />
      <field column = "applicationDate" />
    ...
    ...
   <field column = "securityLevel" />
   </entity>
    </document>
</dataConfig>

I schema.xm  I have 

   <field name="id" type="string" indexed="true" stored="true" required="true" />
   <field name="title" type="string" indexed="true" stored="true" required="false" />
   <field name="firstName" type="string" indexed="true" stored="true" required="false" />
   <field name="middleName" type="string" indexed="true" stored="true" required="false" />
   <field name="familyName" type="string" indexed="true" stored="true" required="false" />
   <field name="maidenName" type="string" indexed="true" stored="true" required="false" />
   <field name="sex" type="string" indexed="true" stored="true" required="false" />
   <field name="businessTitle" type="string" indexed="true" stored="true" required="false" />
   <field name="workLevelStatus" type="string" indexed="true" stored="true" required="false" />
   <field name="education" type="string" indexed="true" stored="true" required="false" />
   <field name="roleCode" type="text" indexed="true" stored="true" required="false" multiValued="true" />
<field name="homeCity" type="string" indexed="true" stored="true" required="false" />
   <field name="homeState" type="string" indexed="true" stored="true" required="false" />
   <field name="homeZip" type="string" indexed="true" stored="true" required="false" />
   <field name="homeCountry" type="string" indexed="true" stored="true" required="false" />
   <field name="workAddress" type="string" indexed="true" stored="true" required="false" />
   <field name="workCity" type="string" indexed="true" stored="true" required="false" />
   <field name="workState" type="string" indexed="true" stored="true" required="false" />
   <field name="workZip" type="string" indexed="true" stored="true" required="false" />
   <field name="workCountry" type="string" indexed="true" stored="true" required="false" />

Results:
<doc>
 <str name="aoid">G3FRBV113TQ4WV4Y</str>
 <str name="citizenship">US</str>
 <str name="employmenttype">Salaried</str>
 <str name="ethnicityCode">White</str>
 <str name="familyName">last</str>
 <str name="firstName">first</str>
 <str name="fullparttime">Full-Time</str>
 <date name="hireDate">2012-02-11T05:00:00Z</date>
 <str name="homeCity">A</str>
 <str name="homeCountry">US</str>
 <str name="homeZip">30004</str>
 <str name="id">052641</str>
 <str name="marritalStatus">Single</str>
 <str name="payGroup">AX9</str>
 <int name="popularity">0</int>
 <str name="primaryLanguageCode">E</str>
 <str name="reportsTo">Bennett,Chad D</str>
<arr name="roleCode">
 <str>Reporting Manager|||Employee|||||</str>
 </arr>
 <str name="sex">Male</str>
 </doc>
<doc>
 <str name="aoid">G3FRBV113TQ4955G</str>
 <str name="citizenship">US</str>
 <str name="department">124214</str>
 <str name="employmenttype">Hourly</str>
 <str name="ethnicityCode">White</str>
 <str name="familyName">last</str>
 <str name="firstName">first</str>
 <str name="fullparttime">Full-Time</str>
 <date name="hireDate">2012-02-11T05:00:00Z</date>
 <str name="homeCity">Atlanta</str>
 <str name="homeCountry">US</str>
 <str name="homeZip">30004</str>
 <str name="id">052643</str>
 <str name="marritalStatus">Single</str>
 <str name="payGroup">22D</str>
 <int name="popularity">0</int>
 <str name="primaryLanguageCode">E</str>
 <str name="reportsTo">Bell,Derrick</str>
<arr name="roleCode">
 <str>Reporting Manager|||Employee|||||</str>
 </arr>
 <str name="sex">Male</str>
</doc>
Why the RegexTransformer is not spliting the roleCode field.
 
Thanks
Sumit Sen