You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2012/01/25 15:18:07 UTC

svn commit: r1235753 - in /lucene/dev/trunk: dev-tools/eclipse/ dev-tools/maven/ solr/ solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/ solr/contrib/extraction/ solr/contrib/extraction/lib/ solr/contrib/extraction/src/...

Author: janhoy
Date: Wed Jan 25 14:18:06 2012
New Revision: 1235753

URL: http://svn.apache.org/viewvc?rev=1235753&view=rev
Log:
SOLR-2901: Upgrade Solr to Tika 1.0

Added:
    lucene/dev/trunk/solr/contrib/extraction/lib/commons-compress-1.3.jar   (with props)
    lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-1.0.jar   (with props)
    lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-1.0.jar   (with props)
Removed:
    lucene/dev/trunk/solr/contrib/extraction/lib/commons-compress-1.2.jar
    lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-0.10.jar
    lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-0.10.jar
Modified:
    lucene/dev/trunk/dev-tools/eclipse/dot.classpath
    lucene/dev/trunk/dev-tools/maven/pom.xml.template
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java
    lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
    lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt
    lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
    lucene/dev/trunk/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
    lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java

Modified: lucene/dev/trunk/dev-tools/eclipse/dot.classpath
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/eclipse/dot.classpath?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/eclipse/dot.classpath (original)
+++ lucene/dev/trunk/dev-tools/eclipse/dot.classpath Wed Jan 25 14:18:06 2012
@@ -96,7 +96,7 @@
 	<classpathentry kind="lib" path="modules/analysis/morfologik/lib/morfologik-stemming-1.5.2.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-beanutils-1.7.0.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-collections-3.1.jar"/>
-	<classpathentry kind="lib" path="modules/benchmark/lib/commons-compress-1.2.jar"/>
+	<classpathentry kind="lib" path="modules/benchmark/lib/commons-compress-1.3.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-digester-1.7.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-logging-1.0.4.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar"/>
@@ -149,8 +149,8 @@
 	<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8-beta4.jar"/>
 	<classpathentry kind="lib" path="solr/contrib/extraction/lib/rome-0.9.jar"/>
 	<classpathentry kind="lib" path="solr/contrib/extraction/lib/tagsoup-1.2.1.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-0.10.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-0.10.jar"/>
+	<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.0.jar"/>
+	<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.0.jar"/>
 	<classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/>
 	<classpathentry kind="lib" path="solr/contrib/langid/lib/langdetect-r111.jar"/>
 	<classpathentry kind="lib" path="solr/contrib/langid/lib/jsonic-1.2.0.jar"/>

Modified: lucene/dev/trunk/dev-tools/maven/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/maven/pom.xml.template?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/maven/pom.xml.template (original)
+++ lucene/dev/trunk/dev-tools/maven/pom.xml.template Wed Jan 25 14:18:06 2012
@@ -45,7 +45,7 @@
     <jetty.version>6.1.26</jetty.version>
     <patched.jetty.version>6.1.26-patched-JETTY-1340</patched.jetty.version>
     <slf4j.version>1.6.1</slf4j.version>
-    <tika.version>0.10</tika.version>
+    <tika.version>1.0</tika.version>
   </properties>
   <issueManagement>
     <system>JIRA</system>

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Jan 25 14:18:06 2012
@@ -24,7 +24,7 @@ $Id$
 ==================  4.0.0-dev ==================
 Versions of Major Components
 ---------------------
-Apache Tika 0.10
+Apache Tika 1.0
 Carrot2 3.5.0
 Velocity 1.6.4 and Velocity Tools 2.0
 Apache UIMA 2.3.1
@@ -556,6 +556,8 @@ Other Changes
 * SOLR-2718: Add ability to lazy load response writers, defined with startup="lazy".
   (ehatcher)
 
+* SOLR-2901: Upgrade Solr to Tika 1.0 (janhoy)
+
 Build
 ----------------------
 * SOLR-2487: Add build target to package war without slf4j jars (janhoy)

Modified: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java Wed Jan 25 14:18:06 2012
@@ -18,8 +18,8 @@ package org.apache.solr.handler.dataimpo
 
 import com.sun.mail.imap.IMAPMessage;
 
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.utils.ParseUtils;
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -95,6 +95,8 @@ public class MailEntityProcessor extends
               getStringFromContext("processAttachment",null) == null ? "processAttachement":"processAttachment"
             , true);
 
+    tika = new Tika();
+    
     logConfig();
   }
 
@@ -166,7 +168,10 @@ public class MailEntityProcessor extends
       if (!processAttachment || (disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT)))        return;
       InputStream is = part.getInputStream();
       String fileName = part.getFileName();
-      String content = ParseUtils.getStringContent(is, TikaConfig.getDefaultConfig(), ctype.getBaseType().toLowerCase(Locale.ENGLISH));
+      Metadata md = new Metadata();
+      md.set(Metadata.CONTENT_TYPE, ctype.getBaseType().toLowerCase(Locale.ENGLISH));
+      md.set(Metadata.RESOURCE_NAME_KEY, fileName);
+      String content = tika.parseToString(is, md);
       if (disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT)) {
         if (row.get(ATTACHMENT) == null)
           row.put(ATTACHMENT, new ArrayList<String>());
@@ -529,6 +534,8 @@ public class MailEntityProcessor extends
 
   private boolean processAttachment = true;
 
+  private Tika tika;
+  
   // holds the current state
   private Store mailbox;
   private boolean connected = false;

Modified: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java Wed Jan 25 14:18:06 2012
@@ -118,9 +118,7 @@ public class TikaEntityProcessor extends
     }
     Parser tikaParser = null;
     if(parser.equals(AUTO_PARSER)){
-      AutoDetectParser parser = new AutoDetectParser();
-      parser.setConfig(tikaConfig);
-      tikaParser = parser;
+      tikaParser = new AutoDetectParser(tikaConfig);
     } else {
       tikaParser = (Parser) context.getSolrCore().getResourceLoader().newInstance(parser);
     }

Modified: lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt Wed Jan 25 14:18:06 2012
@@ -20,7 +20,7 @@ to your Solr Home lib directory.  See ht
 Tika Dependency
 ---------------
 
-Current Version: Tika 0.10 (released 2011-09-30)
+Current Version: Tika 1.0 (released 2011-11-07)
 
 $Id$
 
@@ -34,6 +34,8 @@ $Id$
   This is convenient when Tika's auto detector cannot detect encoding, especially
   the text file is too short to detect encoding. (koji)
 
+* SOLR-2901: Upgrade Solr to Tika 1.0 (janhoy)
+
 ================== Release 3.5.0 ==================
 
 * SOLR-2372: Upgrade Solr to Tika 0.10 (janhoy)

Added: lucene/dev/trunk/solr/contrib/extraction/lib/commons-compress-1.3.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/lib/commons-compress-1.3.jar?rev=1235753&view=auto
==============================================================================
Binary file - no diff available.

Added: lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-1.0.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-1.0.jar?rev=1235753&view=auto
==============================================================================
Binary file - no diff available.

Added: lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-1.0.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-1.0.jar?rev=1235753&view=auto
==============================================================================
Binary file - no diff available.

Modified: lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java Wed Jan 25 14:18:06 2012
@@ -39,6 +39,7 @@ import org.apache.tika.exception.TikaExc
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -138,7 +139,7 @@ public class ExtractingDocumentLoader ex
     if (streamType != null) {
       //Cache?  Parsers are lightweight to construct and thread-safe, so I'm told
       MediaType mt = MediaType.parse(streamType.trim().toLowerCase(Locale.ENGLISH));
-      parser = config.getParser(mt);
+      parser = new DefaultParser(config.getMediaTypeRegistry()).getParsers().get(mt);
     } else {
       parser = autoDetectParser;
     }
@@ -151,6 +152,10 @@ public class ExtractingDocumentLoader ex
       if (resourceName != null) {
         metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
       }
+      // Provide stream's content type as hint for auto detection
+      if(stream.getContentType() != null) {
+        metadata.add(Metadata.CONTENT_TYPE, stream.getContentType());
+      }
 
       InputStream inputStream = null;
       try {

Modified: lucene/dev/trunk/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java Wed Jan 25 14:18:06 2012
@@ -18,7 +18,6 @@ package org.apache.solr.handler.extracti
 
 import java.util.ArrayList;
 import java.util.List;
-
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.ContentStream;
@@ -419,7 +418,33 @@ public class ExtractingRequestHandlerTes
     assertU(commit());
     assertQ(req("*:*"), "//result[@numFound=1]");
   }
+  
+  @Test
+  public void testWrongStreamType() throws Exception {
+    ExtractingRequestHandler handler = (ExtractingRequestHandler) h.getCore().getRequestHandler("/update/extract");
+    assertTrue("handler is null and it shouldn't be", handler != null);
+
+    try{
+      // Load plain text specifying another mime type, should fail
+      loadLocal("extraction/version_control.txt", 
+              "literal.id", "one",
+              ExtractingParams.STREAM_TYPE, "application/pdf"
+      );
+      fail("SolrException is expected because wrong parser specified for the file type");
+    }
+    catch(Exception expected){}
 
+    try{
+      // Load plain text specifying non existing mimetype, should fail
+      loadLocal("extraction/version_control.txt", 
+              "literal.id", "one",
+              ExtractingParams.STREAM_TYPE, "foo/bar"
+      );
+      fail("SolrException is expected because nonexsisting parser specified");
+    }
+    catch(Exception expected){}
+  }
+  
   SolrQueryResponse loadLocal(String filename, String... args) throws Exception {
     LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
     try {

Modified: lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java?rev=1235753&r1=1235752&r2=1235753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java Wed Jan 25 14:18:06 2012
@@ -67,7 +67,7 @@ public abstract class LanguageIdentifier
     assertLang("no", "id", "1no", "name", "Lucene", "subject", "Lucene er et fri/åpen kildekode programvarebibliotek for informasjonsgjenfinning, opprinnelig utviklet i programmeringsspråket Java av Doug Cutting. Lucene støttes av Apache Software Foundation og utgis under Apache-lisensen.");
     assertLang("en", "id", "2en", "name", "Lucene", "subject", "Apache Lucene is a free/open source information retrieval software library, originally created in Java by Doug Cutting. It is supported by the Apache Software Foundation and is released under the Apache Software License.");
     assertLang("sv", "id", "3sv", "name", "Maven", "subject", "Apache Maven är ett verktyg utvecklat av Apache Software Foundation och används inom systemutveckling av datorprogram i programspråket Java. Maven används för att automatiskt paketera (bygga) programfilerna till en distribuerbar enhet. Maven används inom samma område som Apache Ant men dess byggfiler är deklarativa till skillnad ifrån Ants skriptbaserade.");
-    assertLang("es", "id", "4es", "name", "Lucene", "subject", "Lucene es un API de código abierto para recuperación de información, originalmente implementada en Java por Doug Cutting. Está apoyado por el Apache Software Foundation y se distribuye bajo la Apache Software License. Lucene tiene versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, Ruby y PHP.");
+    assertLang("es", "id", "4es", "name", "Español", "subject", "El español, como las otras lenguas romances, es una continuación moderna del latín hablado (denominado latín vulgar), desde el siglo III, que tras el desmembramiento del Imperio romano fue divergiendo de las otras variantes del latín que se hablaban en las distintas provincias del antiguo Imperio, dando lugar mediante una lenta evolución a las distintas lenguas romances. Debido a su propagación por América, el español es, con diferencia, la lengua romance que ha logrado mayor difusión.");
     assertLang("un", "id", "5un", "name", "a", "subject", "b");
     assertLang("th", "id", "6th", "name", "บทความคัดสรรเดือนนี้", "subject", "อันเนอลีส มารี อันเนอ ฟรังค์ หรือมักรู้จักในภาษาไทยว่า แอนน์ แฟรงค์ เป็นเด็กหญิงชาวยิว เกิดที่เมืองแฟรงก์เฟิร์ต ประเทศเยอรมนี เธอมีชื่อเสียงโด่à�
 �‡à¸”ังในฐานะผู้เขียนบันทึกประจำวันซึ่งต่อมาได้รับการตีพิมพ์เป็นหนังสือ บรรยายเหตุการณ์ขณะหลบซ่อนตัวจากการล่าชาวยิวในประเทศเนเธอร์แลนด์ ระหว่างที่ถูกเยอรมนีเข้าครอบครองใน
 ช่วงสงครามโลกครั้งที่สอง");
     assertLang("ru", "id", "7ru", "name", "Lucene", "subject", "The Apache Lucene — это свободная библиотека для высокоскоростного полнотекстового поиска, написанная на Java. Может быть использована для поиска в интернете и других областях компьютерной лингвистики (аналитическая философия).");
@@ -76,7 +76,17 @@ public abstract class LanguageIdentifier
     assertLang("nl", "id", "10nl", "name", "Lucene", "subject", "Lucene is een gratis open source, tekst gebaseerde information retrieval API van origine geschreven in Java door Doug Cutting. Het wordt ondersteund door de Apache Software Foundation en is vrijgegeven onder de Apache Software Licentie. Lucene is ook beschikbaar in andere programeertalen zoals Perl, C#, C++, Python, Ruby en PHP.");
     assertLang("it", "id", "11it", "name", "Lucene", "subject", "Lucene è una API gratuita ed open source per il reperimento di informazioni inizialmente implementata in Java da Doug Cutting. È supportata dall'Apache Software Foundation ed è resa disponibile con l'Apache License. Lucene è stata successivamente reimplementata in Perl, C#, C++, Python, Ruby e PHP.");
     assertLang("pt", "id", "12pt", "name", "Lucene", "subject", "Apache Lucene, ou simplesmente Lucene, é um software de busca e uma API de indexação de documentos, escrito na linguagem de programação Java. É um software de código aberto da Apache Software Foundation licenciado através da licença Apache.");
+    // New in Tika1.0
+    assertLang("ca", "id", "13ca", "name", "Catalan", "subject", "El català posseeix dos estàndards principals: el regulat per l'Institut d'Estudis Catalans, o estàndard general, que pren com a base l'ortografia establerta per Pompeu Fabra amb els trets gramaticals i ortogràfics característics del català central; i el regulat per l'Acadèmia Valenciana de la Llengua, estàndard d'àmbit restringit, centrat en l'estandardització del valencià i que pren com a base les Normes de Castelló, és a dir, l'ortografia de Pompeu Fabra però més adaptada a la pronúncia del català occidental i als trets que caracteritzen els dialectes valencians.");
+    assertLang("be", "id", "14be", "name", "Belarusian", "subject", "Наступнай буйной дзяржавай на беларускай зямлі было Вялікае княства Літоўскае, Рускае і Жамойцкае (ВКЛ). Падчас стварэння і пачатковага развіцця гэтай дзяржавы найбуйнейшым і асноўным яе цэнтрам быў Новагародак. Акрамя сучасных земляў Беларусі, у склад гэтай дÐ�
 �яржавы ўваходзілі таксама землі сучаснай Літвы, паўночная частка сучаснай Украіны і частка сучаснай Расіі.");
+    assertLang("eo", "id", "15eo", "name", "Esperanto", "subject", "La vortprovizo de Esperanto devenas plejparte el la okcidenteŭropaj lingvoj, dum ĝia sintakso kaj morfologio montras ankaŭ slavlingvan influon. La morfemoj ne ŝanĝiĝas kaj oni povas ilin preskaŭ senlime kombini, kreante diverssignifajn vortojn, Esperanto do havas multajn kunaĵojn kun la analizaj lingvoj, al kiuj apartenas ekzemple la ĉina; kontraŭe la interna strukturo de Esperanto certagrade respegulas la aglutinajn lingvojn, kiel la japanan, svahilan aŭ turkan.");
+    assertLang("gl", "id", "16gl", "name", "Galician", "subject", "A cifra de falantes medrou axiña durante as décadas seguintes, nun principio no Imperio ruso e na Europa oriental, logo na Europa occidental, América, China e no Xapón. Nos primeiros anos do movemento, os esperantistas mantiñan contacto por correspondencia, pero en 1905 o primeiro Congreso Universal de Esperanto levouse a cabo na cidade francesa de Boulogne-sur-Mer. Dende entón, os congresos mundiais organizáronse nos cinco continentes ano tras ano agás durante as dúas Guerras Mundiais.");
+    assertLang("ro", "id", "17ro", "name", "Romanian", "subject", "La momentul destrămării Uniunii Sovietice și a înlăturării regimului comunist instalat în România (1989), țara a inițiat o serie de reforme economice și politice. După un deceniu de probleme economice, România a introdus noi reforme economice de ordin general (precum cota unică de impozitare, în 2005) și a aderat la Uniunea Europeană la 1 ianuarie 2007.");
+    assertLang("sk", "id", "18sk", "name", "Slovakian", "subject", "Boli vytvorené dva národné parlamenty - Česká národná rada a Slovenská národná rada a spoločný jednokomorový česko-slovenský parlament bol premenovaný z Národného zhromaždenia na Federálne zhromaždenie s dvoma komorami - Snemovňou ľudu a Snemovňu národov.");
+    assertLang("sl", "id", "19sl", "name", "Slovenian", "subject", "Slovenska Wikipedija je različica spletne enciklopedije Wikipedije v slovenskem jeziku. Projekt slovenske Wikipedije se je začel 26. februarja 2002 z ustanovitvijo njene spletne strani, njen pobudnik pa je bil uporabnik Jani Melik.");
+    assertLang("uk", "id", "20uk", "name", "Ukrainian", "subject", "Народно-господарський комплекс країни включає такі види промисловості як важке машинобудування, чорна та кольорова металургія, суднобудування, виробництво автобусів, легкових та вантажних автомобілів, тракторів та іншої сільськогосподарської техніки, тепл
 овозів, верстатів, турбін, авіаційних двигунів та літаків, обладнання для електростанцій, нафто-газової та хімічної промисловості тощо. Крім того, Україна є потужним виробником електроенергії. Україна має розвинуте сільське господарство і займає одне з провідних місць серед експÐ
 ¾Ñ€Ñ‚ерів деяких видів сільськогосподарської продукції і продовольства (зокрема, соняшникової олії).");
   }
+    
   
   @Test
   public void testMapFieldName() throws Exception {