You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/07/13 16:23:11 UTC

svn commit: r1690677 - in /jackrabbit/oak/branches/1.0: ./ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ oak-lucene/src/test/java/org/apache/jackrabbit/...

Author: chetanm
Date: Mon Jul 13 14:23:10 2015
New Revision: 1690677

URL: http://svn.apache.org/r1690677
Log:
OAK-2892 - Speed up lucene indexing post migration by pre extracting the text content from binaries

Merge 1690637,1690650, 1690674

Added:
    jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ExtractedTextCache.java
      - copied unchanged from r1690637, jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ExtractedTextCache.java
    jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TextExtractionStatsMBean.java
      - copied unchanged from r1690637, jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TextExtractionStatsMBean.java
Modified:
    jackrabbit/oak/branches/1.0/   (props changed)
    jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
    jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
    jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
    jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
    jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
    jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
    jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java

Propchange: jackrabbit/oak/branches/1.0/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Jul 13 14:23:10 2015
@@ -1,2 +1,2 @@
-/jackrabbit/oak/trunk:1584578,1584602,1584614,1584616,1584709,1584781,1584937,1585297,1585304-1585305,1585420,1585424,1585427,1585448,1585465,1585468,1585486,1585497,1585509,1585647,1585655-1585656,1585661,1585665-1585666,1585669-1585670,1585673,1585680,1585719,1585763,1585770,1585896,1585904,1585907,1585940,1585949,1585951,1585956,1585962-1585963,1586287,1586320,1586364,1586372,1586655,1586836,1587130,1587224,1587399,1587408,1587472,1587485,1587488,1587538,1587580,1587807,1588033,1588042,1588046,1588066,1588201,1589025,1589101,1589137,1589141,1589263,1589440,1589442,1589484,1589488,1589661,1589664,1589682,1589708,1589741,1589748,1589789,1589794,1589850,1589864,1590628,1590660,1590684,1590697,1590701,1590980,1590988,1591101,1591226,1591229,1591293,1591314,1591317,1591362,1591374,1591381,1591438,1591467,1591552,1591704,1591713,1591715,1591723,1591874,1592487,1592512,1592658,1592665,1592677,1592742,1592744,1592787,1592809,1592955,1593036,1593048,1593061,1593133,1593210-1593211,1593231
 ,1593245,1593250,1593294,1593304,1593317,1593342,1593554,1594158-1594164,1594166-1594167,1594169,1594237,1594800,1594808,1594835,1594888,1594987,1595147,1595457,1595856,1596241,1596474,1596534,1596844,1596889,1597569,1597795,1597854,1597860,1598292,1598302,1598352,1598369,1598595,1598631,1598696,1598732,1598797-1598798,1599160,1599299,1599332,1599416,1599434,1599671,1600088,1600935,1601309,1601388,1601578,1601649,1601676,1601757,1601768,1601814,1601833,1601838,1601853,1601878,1601888,1601922,1602156,1602170,1602174,1602179,1602183,1602201,1602207,1602227,1602256,1602261,1602342,1602796-1602797,1602800,1602809,1602853,1602872,1602914,1603155,1603307,1603401,1603441,1603748,1604166,1605030,1605036,1605038,1605292,1605447,1605526,1605670,1605725,1605831,1605852,1606077,1606079,1606087,1606638,1606641,1606644,1606708,1606711,1607031-1607032,1607077,1607127,1607141,1607152,1607185,1607196,1607331,1607362,1607366,1607392,1607526,1607557,1607664,1607737,1608560,1608731,1608783,1609064,1609
 081,1609165,1609214,1609488,1610489,1610592,1610603,1610634,1610658,1610664,1611021,1611041,1611270,1611275,1611277,1611313,1611332,1611584,1612560,1612825,1612848,1612892,1612899,1612993,1613018,1613041,1614032,1614265,1614272,1614344-1614345,1614384-1614385,1614397,1614405-1614406,1614574,1614591,1614593,1614596,1614604,1614689,1614807,1614835,1614891,1615417-1615418,1616182,1616236,1616463,1616719,1617417,1617451,1617463,1617711,1618158,1618613,1618624,1618709,1619222,1619411,1619695,1619800,1619808,1619815,1619823-1619824,1620512,1620581,1620585,1620634,1620898,1620905,1621115,1621123-1621124,1621168,1621192,1621201,1621706,1621962,1622197,1622201,1622207,1622250,1622479,1623364,1623766,1623827,1623949,1623969,1623973,1624216,1624317,1624551,1624559,1624973,1624993-1624994,1625025,1625036,1625158,1625224,1625237,1625299,1625348,1625620,1625916,1625962-1625963,1626021,1626053,1626163,1626168,1626175,1626191,1626265,1626770,1627047,1627052,1627228,1627346,1627470,1627473,1627479,1
 627503,1627586,1627590,1627715,1627731,1628180,1628198,1628262,1628447,1628608,1629688,1629840,1629858,1629917,1630055-1630057,1630156,1630299,1630338,1630773,1631283-1631284,1631333-1631334,1631617-1631619,1631630,1631699,1631704,1631711,1631967-1631969,1631986,1631990,1631999,1632002-1632003,1632017,1632258,1632264,1632270,1632293,1632303,1632592,1632605,1633315,1633389,1633559-1633560,1633562,1633567,1633571,1633598,1633608,1633641,1633687,1633697,1633768,1633783,1634505,1634513,1634774,1634779,1634781,1634792,1634803,1634814,1634816,1634838,1634841,1634852,1634864,1634896,1634898,1635044-1635045,1635060,1635077,1635089,1635102,1635108,1635178,1635218,1635387,1635435,1635518,1635563,1635586,1636336,1636348,1636505,1636585,1636799,1637368,1637382,1637413,1637651,1637815,1638779-1638783,1639260,1639577,1639622,1639963,1639966,1639973,1640134,1640143,1640523,1640555-1640556,1640694-1640695,1640715,1640722-1640723,1640728,1640863-1640872,1641340,1641346,1641350,1641352,1641541,164159
 6-1641599,1641601,1641662,1641671,1641695,1641771,1641802,1641811,1641950,1642031,1642056,1642119,1642285,1642648,1642667,1642954,1642959,1643111,1643178,1643186,1643204,1643287,1643767,1643774,1643982,1644016,1644106,1644366,1644383,1644397-1644398,1644407,1644479,1644547,1644552,1644554,1644588,1644645,1644650,1644654,1644689,1644750,1645421,1645424,1645459,1645585,1645611,1645637,1645646,1645660-1645663,1645888,1645901,1645948,1645966,1645970-1645971,1646014,1646164,1646174,1646469,1646684,1646687,1646726-1646728,1646766,1646795,1646981,1649743,1649803,1650015,1650239,1650529,1650797,1651323,1651382,1651643,1651652,1651730,1651988-1651989,1651996,1652024,1652035,1652058-1652059,1652075,1652127,1652158,1652467,1652965,1652971,1652992,1653207,1653446,1653463,1653484,1653572,1653579,1653591,1653804,1653809,1653813,1653848-1653850,1653882,1654116,1654174,1654743,1654756,1654778,1655028,1655049,1655054-1655055,1655086,1655237,1655248,1655996,1656019,1656027,1656033,1656303,1656394,165
 6400,1656425,1656427,1656453,1656628,1656678,1657128,1657132,1657163,1657188,1657265,1657511,1657766,1657804,1658470,1658977,1658983,1659285,1659483,1659527,1659550,1659578,1659765,1660100,1660154-1660155,1660383-1660384,1660409,1660426,1660676,1660870,1660872,1660897,1660903,1661069,1661122,1661146,1661158,1661226,1661630,1661643,1661645,1662313-1662315,1662323,1662381,1662450,1662456,1663241,1663275,1663288,1663448,1663526,1663528,1663565,1663578,1663666,1663705,1663730,1663753,1663854,1664038,1664184,1664228-1664229,1664231,1664381,1664569,1664947,1664987,1665184,1665257,1665271-1665272,1665274-1665275,1665436,1665604,1665634,1665758,1665835,1665892,1665897,1665910,1665918,1666100,1666102,1666177,1666218,1666220,1666351-1666352,1666381,1666384,1666426,1666491,1666787,1667062,1667184,1667293,1667462,1667498,1667502,1667573,1667590,1667696,1667782,1668160,1668275,1668624,1668641,1668645,1668649,1668665,1668671,1668683,1668688,1668845,1669072,1669096,1669135,1669337,1669361,1669579,
 1669680,1669989,1670030,1670693,1670705,1671489,1671512,1671773,1671787,1671795,1672055,1672277,1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673431,1673436,1673644,1673662-1673663,1673695,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674150,1674780,1675054,1675319,1675332,1675382,1675555,1675566,1676198,1676407,1676458,1676670,1676703,1677579,1677609,1677611,1677774,1677788,1677797,1677939,1677991,1678095-1678096,1678173,1678323,1678758,1678938,1678954,1679144,1679147,1679165,1679191,1679232,1679503,1679961,1680170,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174,1684376,1684442,1684561,1684570,1684618,1684836,1684868,1685023,1685370,1685552,1685589,1685840,1685999,1686097,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,16870
 53,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688090,1688349,1688421,1688436,1688453,1688622,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689774,1689810,1689828,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690634-1690635
+/jackrabbit/oak/trunk:1584578,1584602,1584614,1584616,1584709,1584781,1584937,1585297,1585304-1585305,1585420,1585424,1585427,1585448,1585465,1585468,1585486,1585497,1585509,1585647,1585655-1585656,1585661,1585665-1585666,1585669-1585670,1585673,1585680,1585719,1585763,1585770,1585896,1585904,1585907,1585940,1585949,1585951,1585956,1585962-1585963,1586287,1586320,1586364,1586372,1586655,1586836,1587130,1587224,1587399,1587408,1587472,1587485,1587488,1587538,1587580,1587807,1588033,1588042,1588046,1588066,1588201,1589025,1589101,1589137,1589141,1589263,1589440,1589442,1589484,1589488,1589661,1589664,1589682,1589708,1589741,1589748,1589789,1589794,1589850,1589864,1590628,1590660,1590684,1590697,1590701,1590980,1590988,1591101,1591226,1591229,1591293,1591314,1591317,1591362,1591374,1591381,1591438,1591467,1591552,1591704,1591713,1591715,1591723,1591874,1592487,1592512,1592658,1592665,1592677,1592742,1592744,1592787,1592809,1592955,1593036,1593048,1593061,1593133,1593210-1593211,1593231
 ,1593245,1593250,1593294,1593304,1593317,1593342,1593554,1594158-1594164,1594166-1594167,1594169,1594237,1594800,1594808,1594835,1594888,1594987,1595147,1595457,1595856,1596241,1596474,1596534,1596844,1596889,1597569,1597795,1597854,1597860,1598292,1598302,1598352,1598369,1598595,1598631,1598696,1598732,1598797-1598798,1599160,1599299,1599332,1599416,1599434,1599671,1600088,1600935,1601309,1601388,1601578,1601649,1601676,1601757,1601768,1601814,1601833,1601838,1601853,1601878,1601888,1601922,1602156,1602170,1602174,1602179,1602183,1602201,1602207,1602227,1602256,1602261,1602342,1602796-1602797,1602800,1602809,1602853,1602872,1602914,1603155,1603307,1603401,1603441,1603748,1604166,1605030,1605036,1605038,1605292,1605447,1605526,1605670,1605725,1605831,1605852,1606077,1606079,1606087,1606638,1606641,1606644,1606708,1606711,1607031-1607032,1607077,1607127,1607141,1607152,1607185,1607196,1607331,1607362,1607366,1607392,1607526,1607557,1607664,1607737,1608560,1608731,1608783,1609064,1609
 081,1609165,1609214,1609488,1610489,1610592,1610603,1610634,1610658,1610664,1611021,1611041,1611270,1611275,1611277,1611313,1611332,1611584,1612560,1612825,1612848,1612892,1612899,1612993,1613018,1613041,1614032,1614265,1614272,1614344-1614345,1614384-1614385,1614397,1614405-1614406,1614574,1614591,1614593,1614596,1614604,1614689,1614807,1614835,1614891,1615417-1615418,1616182,1616236,1616463,1616719,1617417,1617451,1617463,1617711,1618158,1618613,1618624,1618709,1619222,1619411,1619695,1619800,1619808,1619815,1619823-1619824,1620512,1620581,1620585,1620634,1620898,1620905,1621115,1621123-1621124,1621168,1621192,1621201,1621706,1621962,1622197,1622201,1622207,1622250,1622479,1623364,1623766,1623827,1623949,1623969,1623973,1624216,1624317,1624551,1624559,1624973,1624993-1624994,1625025,1625036,1625158,1625224,1625237,1625299,1625348,1625620,1625916,1625962-1625963,1626021,1626053,1626163,1626168,1626175,1626191,1626265,1626770,1627047,1627052,1627228,1627346,1627470,1627473,1627479,1
 627503,1627586,1627590,1627715,1627731,1628180,1628198,1628262,1628447,1628608,1629688,1629840,1629858,1629917,1630055-1630057,1630156,1630299,1630338,1630773,1631283-1631284,1631333-1631334,1631617-1631619,1631630,1631699,1631704,1631711,1631967-1631969,1631986,1631990,1631999,1632002-1632003,1632017,1632258,1632264,1632270,1632293,1632303,1632592,1632605,1633315,1633389,1633559-1633560,1633562,1633567,1633571,1633598,1633608,1633641,1633687,1633697,1633768,1633783,1634505,1634513,1634774,1634779,1634781,1634792,1634803,1634814,1634816,1634838,1634841,1634852,1634864,1634896,1634898,1635044-1635045,1635060,1635077,1635089,1635102,1635108,1635178,1635218,1635387,1635435,1635518,1635563,1635586,1636336,1636348,1636505,1636585,1636799,1637368,1637382,1637413,1637651,1637815,1638779-1638783,1639260,1639577,1639622,1639963,1639966,1639973,1640134,1640143,1640523,1640555-1640556,1640694-1640695,1640715,1640722-1640723,1640728,1640863-1640872,1641340,1641346,1641350,1641352,1641541,164159
 6-1641599,1641601,1641662,1641671,1641695,1641771,1641802,1641811,1641950,1642031,1642056,1642119,1642285,1642648,1642667,1642954,1642959,1643111,1643178,1643186,1643204,1643287,1643767,1643774,1643982,1644016,1644106,1644366,1644383,1644397-1644398,1644407,1644479,1644547,1644552,1644554,1644588,1644645,1644650,1644654,1644689,1644750,1645421,1645424,1645459,1645585,1645611,1645637,1645646,1645660-1645663,1645888,1645901,1645948,1645966,1645970-1645971,1646014,1646164,1646174,1646469,1646684,1646687,1646726-1646728,1646766,1646795,1646981,1649743,1649803,1650015,1650239,1650529,1650797,1651323,1651382,1651643,1651652,1651730,1651988-1651989,1651996,1652024,1652035,1652058-1652059,1652075,1652127,1652158,1652467,1652965,1652971,1652992,1653207,1653446,1653463,1653484,1653572,1653579,1653591,1653804,1653809,1653813,1653848-1653850,1653882,1654116,1654174,1654743,1654756,1654778,1655028,1655049,1655054-1655055,1655086,1655237,1655248,1655996,1656019,1656027,1656033,1656303,1656394,165
 6400,1656425,1656427,1656453,1656628,1656678,1657128,1657132,1657163,1657188,1657265,1657511,1657766,1657804,1658470,1658977,1658983,1659285,1659483,1659527,1659550,1659578,1659765,1660100,1660154-1660155,1660383-1660384,1660409,1660426,1660676,1660870,1660872,1660897,1660903,1661069,1661122,1661146,1661158,1661226,1661630,1661643,1661645,1662313-1662315,1662323,1662381,1662450,1662456,1663241,1663275,1663288,1663448,1663526,1663528,1663565,1663578,1663666,1663705,1663730,1663753,1663854,1664038,1664184,1664228-1664229,1664231,1664381,1664569,1664947,1664987,1665184,1665257,1665271-1665272,1665274-1665275,1665436,1665604,1665634,1665758,1665835,1665892,1665897,1665910,1665918,1666100,1666102,1666177,1666218,1666220,1666351-1666352,1666381,1666384,1666426,1666491,1666787,1667062,1667184,1667293,1667462,1667498,1667502,1667573,1667590,1667696,1667782,1668160,1668275,1668624,1668641,1668645,1668649,1668665,1668671,1668683,1668688,1668845,1669072,1669096,1669135,1669337,1669361,1669579,
 1669680,1669989,1670030,1670693,1670705,1671489,1671512,1671773,1671787,1671795,1672055,1672277,1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673431,1673436,1673644,1673662-1673663,1673695,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674150,1674780,1675054,1675319,1675332,1675382,1675555,1675566,1676198,1676407,1676458,1676670,1676703,1677579,1677609,1677611,1677774,1677788,1677797,1677939,1677991,1678095-1678096,1678173,1678323,1678758,1678938,1678954,1679144,1679147,1679165,1679191,1679232,1679503,1679961,1680170,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174,1684376,1684442,1684561,1684570,1684618,1684836,1684868,1685023,1685370,1685552,1685589,1685840,1685999,1686097,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,16870
 53,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688090,1688349,1688421,1688436,1688453,1688622,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689774,1689810,1689828,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690634-1690635,1690637,1690650,1690674
 /jackrabbit/trunk:1345480

Modified: jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java?rev=1690677&r1=1690676&r2=1690677&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java (original)
+++ jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java Mon Jul 13 14:23:10 2015
@@ -46,7 +46,7 @@ public class ExtractedText {
 
     public static final ExtractedText ERROR = new ExtractedText(ExtractionResult.ERROR);
 
-    public static final ExtractedText EMPTY = new ExtractedText(ExtractionResult.ERROR, "");
+    public static final ExtractedText EMPTY = new ExtractedText(ExtractionResult.EMPTY, "");
 
     private final ExtractionResult extractionResult;
     private final CharSequence extractedText;

Modified: jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1690677&r1=1690676&r2=1690677&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java (original)
+++ jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java Mon Jul 13 14:23:10 2015
@@ -34,6 +34,8 @@ import org.apache.jackrabbit.oak.commons
 import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
 import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
 import org.apache.jackrabbit.oak.plugins.index.PathFilter;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
 import org.apache.jackrabbit.oak.plugins.index.lucene.Aggregate.Matcher;
 import org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState;
 import org.apache.jackrabbit.oak.plugins.tree.ImmutableTree;
@@ -84,6 +86,7 @@ public class LuceneIndexEditor implement
 
     private static final Logger log =
             LoggerFactory.getLogger(LuceneIndexEditor.class);
+    static final String TEXT_EXTRACTION_ERROR = "TextExtractionError";
 
     private final LuceneIndexEditorContext context;
 
@@ -118,12 +121,14 @@ public class LuceneIndexEditor implement
     private final PathFilter.Result pathFilterResult;
 
     LuceneIndexEditor(NodeState root, NodeBuilder definition,
-        IndexUpdateCallback updateCallback,@Nullable IndexCopier indexCopier) throws CommitFailedException {
+                        IndexUpdateCallback updateCallback,
+                        @Nullable IndexCopier indexCopier,
+                        ExtractedTextCache extractedTextCache) throws CommitFailedException {
         this.parent = null;
         this.name = null;
         this.path = "/";
         this.context = new LuceneIndexEditorContext(root, definition,
-                updateCallback, indexCopier);
+                updateCallback, indexCopier, extractedTextCache);
         this.root = root;
         this.isDeleted = false;
         this.matcherState = MatcherState.NONE;
@@ -548,12 +553,16 @@ public class LuceneIndexEditor implement
         }
 
         for (Blob v : property.getValue(Type.BINARIES)) {
+            String value = parseStringValue(v, metadata, path, property.getName());
+            if (value == null){
+                continue;
+            }
+
             if (nodePath != null){
-                fields.add(newFulltextField(nodePath, parseStringValue(v, metadata, path)));
+                fields.add(newFulltextField(nodePath, value));
             } else {
-                fields.add(newFulltextField(parseStringValue(v, metadata, path)));
+                fields.add(newFulltextField(value));
             }
-
         }
         return fields;
     }
@@ -799,16 +808,24 @@ public class LuceneIndexEditor implement
         return context.getDefinition().getPathFilter().doFiler(concat(getPath(), childNodeName));
     }
 
-    private String parseStringValue(Blob v, Metadata metadata, String path) {
+    private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) {
+        String text = context.getExtractedTextCache().get(path, propertyName, v, context.isReindex());
+        if (text == null){
+            text = parseStringValue0(v, metadata, path);
+        }
+        return text;
+    }
+
+    private String parseStringValue0(Blob v, Metadata metadata, String path) {
         WriteOutContentHandler handler = new WriteOutContentHandler();
         long start = System.currentTimeMillis();
-        long size = 0;
+        long bytesRead = 0;
         try {
             CountingInputStream stream = new CountingInputStream(new LazyInputStream(new BlobByteSource(v)));
             try {
                 context.getParser().parse(stream, handler, metadata, new ParseContext());
             } finally {
-                size = stream.getCount();
+                bytesRead = stream.getCount();
                 stream.close();
             }
         } catch (LinkageError e) {
@@ -826,11 +843,15 @@ public class LuceneIndexEditor implement
                         + " worry about. The stack trace is included to"
                         + " help improve the text extraction feature.",
                         getIndexName(), path, t);
-                return "TextExtractionError";
+                context.getExtractedTextCache().put(v, ExtractedText.ERROR);
+                return TEXT_EXTRACTION_ERROR;
             }
         }
         String result = handler.toString();
-        context.recordTextExtractionStats(System.currentTimeMillis() - start, size);
+        if (bytesRead > 0) {
+            context.recordTextExtractionStats(System.currentTimeMillis() - start, bytesRead, result.length());
+        }
+        context.getExtractedTextCache().put(v,  new ExtractedText(ExtractionResult.SUCCESS, result));
         return result;
     }
 

Modified: jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1690677&r1=1690676&r2=1690677&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java (original)
+++ jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java Mon Jul 13 14:23:10 2015
@@ -130,19 +130,21 @@ public class LuceneIndexEditorContext {
 
     private final TextExtractionStats textExtractionStats = new TextExtractionStats();
 
+    private final ExtractedTextCache extractedTextCache;
     /**
      * The media types supported by the parser used.
      */
     private Set<MediaType> supportedMediaTypes;
 
     LuceneIndexEditorContext(NodeState root, NodeBuilder definition, IndexUpdateCallback updateCallback,
-                             @Nullable IndexCopier indexCopier) {
+                             @Nullable IndexCopier indexCopier, ExtractedTextCache extractedTextCache) {
         this.definitionBuilder = definition;
         this.indexCopier = indexCopier;
         this.definition = new IndexDefinition(root, definition);
         this.indexedNodes = 0;
         this.updateCallback = updateCallback;
 
+        this.extractedTextCache = extractedTextCache;
         if (this.definition.isOfOldFormat()){
             IndexDefinition.updateDefinition(definition);
         }
@@ -202,6 +204,7 @@ public class LuceneIndexEditorContext {
             PERF_LOGGER.end(start, -1, "Closed IndexWriter for directory {}", definition);
 
             textExtractionStats.log(reindex);
+            textExtractionStats.collectStats(extractedTextCache);
         }
     }
 
@@ -271,8 +274,22 @@ public class LuceneIndexEditorContext {
         return definition;
     }
 
-    public void recordTextExtractionStats(long timeInMillis, long size) {
-        textExtractionStats.addStats(timeInMillis, size);
+    @Deprecated
+    public void recordTextExtractionStats(long timeInMillis, long bytesRead) {
+        //Keeping deprecated method to avoid major version change
+        recordTextExtractionStats(timeInMillis, bytesRead, 0);
+    }
+
+    public void recordTextExtractionStats(long timeInMillis, long bytesRead, int textLength) {
+        textExtractionStats.addStats(timeInMillis, bytesRead, textLength);
+    }
+
+    ExtractedTextCache getExtractedTextCache() {
+        return extractedTextCache;
+    }
+
+    public boolean isReindex() {
+        return reindex;
     }
 
     private static Parser initializeTikaParser(IndexDefinition definition) {
@@ -331,13 +348,15 @@ public class LuceneIndexEditorContext {
          */
         private static final long LOGGING_THRESHOLD = TimeUnit.MINUTES.toMillis(2);
         private int count;
-        private long totalSize;
+        private long totalBytesRead;
         private long totalTime;
+        private long totalTextLength;
 
-        public void addStats(long timeInMillis, long size) {
+        public void addStats(long timeInMillis, long bytesRead, int textLength) {
             count++;
-            totalSize += size;
+            totalBytesRead += bytesRead;
             totalTime += timeInMillis;
+            totalTextLength += textLength;
         }
 
         public void log(boolean reindex) {
@@ -348,6 +367,10 @@ public class LuceneIndexEditorContext {
             }
         }
 
+        public void collectStats(ExtractedTextCache cache){
+            cache.addStats(count, totalTime, totalBytesRead, totalTextLength);
+        }
+
         private boolean isTakingLotsOfTime() {
             return totalTime > LOGGING_THRESHOLD;
         }
@@ -358,8 +381,8 @@ public class LuceneIndexEditorContext {
 
         @Override
         public String toString() {
-            return String.format(" %d (%s, %s)", count,
-                    timeInWords(totalTime), humanReadableByteCount(totalSize));
+            return String.format(" %d (Time Taken %s, Bytes Read %s, Extracted text size %d)", count,
+                    timeInWords(totalTime), humanReadableByteCount(totalBytesRead), totalTextLength);
         }
 
         private static String timeInWords(long millis) {

Modified: jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java?rev=1690677&r1=1690676&r2=1690677&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java (original)
+++ jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java Mon Jul 13 14:23:10 2015
@@ -38,13 +38,20 @@ import org.apache.jackrabbit.oak.spi.sta
  */
 public class LuceneIndexEditorProvider implements IndexEditorProvider {
     private final IndexCopier indexCopier;
+    private final ExtractedTextCache extractedTextCache;
 
     public LuceneIndexEditorProvider() {
         this(null);
     }
 
     public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier) {
+        this(indexCopier, new ExtractedTextCache());
+    }
+
+    public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier,
+                                     ExtractedTextCache extractedTextCache) {
         this.indexCopier = indexCopier;
+        this.extractedTextCache = extractedTextCache;
     }
 
     @Override
@@ -53,7 +60,7 @@ public class LuceneIndexEditorProvider i
             @Nonnull IndexUpdateCallback callback)
             throws CommitFailedException {
         if (TYPE_LUCENE.equals(type)) {
-            return new LuceneIndexEditor(root, definition, callback, indexCopier);
+            return new LuceneIndexEditor(root, definition, callback, indexCopier, extractedTextCache);
         }
         return null;
     }
@@ -61,4 +68,8 @@ public class LuceneIndexEditorProvider i
     IndexCopier getIndexCopier() {
         return indexCopier;
     }
+
+    ExtractedTextCache getExtractedTextCache() {
+        return extractedTextCache;
+    }
 }

Modified: jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java?rev=1690677&r1=1690676&r2=1690677&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java (original)
+++ jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java Mon Jul 13 14:23:10 2015
@@ -48,6 +48,7 @@ import org.apache.jackrabbit.oak.commons
 import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
 import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
 import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
 import org.apache.jackrabbit.oak.spi.commit.BackgroundObserver;
 import org.apache.jackrabbit.oak.spi.commit.BackgroundObserverMBean;
 import org.apache.jackrabbit.oak.spi.commit.Observer;
@@ -85,7 +86,7 @@ public class LuceneIndexProviderService
             boolValue = false,
             label = "Enable Debug Logging",
             description = "Enables debug logging in Lucene. After enabling this actual logging can be " +
-            "controlled via changing log level for category 'oak.lucene' to debug")
+                    "controlled via changing log level for category 'oak.lucene' to debug")
     private static final String PROP_DEBUG = "debug";
 
     @Property(
@@ -137,6 +138,13 @@ public class LuceneIndexProviderService
 
     private BackgroundObserver backgroundObserver;
 
+    @Reference(policy = ReferencePolicy.DYNAMIC,
+            cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE,
+            policyOption = ReferencePolicyOption.GREEDY
+    )
+    private volatile PreExtractedTextProvider extractedTextProvider;
+
+
     private IndexCopier indexCopier;
 
     private File indexDir;
@@ -145,6 +153,8 @@ public class LuceneIndexProviderService
 
     private int threadPoolSize;
 
+    private ExtractedTextCache extractedTextCache = new ExtractedTextCache();
+
     @Activate
     private void activate(BundleContext bundleContext, Map<String, ?> config)
             throws NotCompliantMBeanException, IOException {
@@ -223,12 +233,17 @@ public class LuceneIndexProviderService
         LuceneIndexEditorProvider editorProvider;
         if (enableCopyOnWrite){
             initializeIndexCopier(bundleContext, config);
-            editorProvider = new LuceneIndexEditorProvider(indexCopier);
+            editorProvider = new LuceneIndexEditorProvider(indexCopier, extractedTextCache);
             log.info("Enabling CopyOnWrite support. Index files would be copied under {}", indexDir.getAbsolutePath());
         } else {
-            editorProvider = new LuceneIndexEditorProvider();
+            editorProvider = new LuceneIndexEditorProvider(null, extractedTextCache);
         }
         regs.add(bundleContext.registerService(IndexEditorProvider.class.getName(), editorProvider, null));
+        oakRegs.add(registerMBean(whiteboard,
+                TextExtractionStatsMBean.class,
+                editorProvider.getExtractedTextCache().getStatsMBean(),
+                TextExtractionStatsMBean.TYPE,
+                "TextExtraction statistics"));
     }
 
     private IndexTracker createTracker(BundleContext bundleContext, Map<String, ?> config) throws IOException {
@@ -325,6 +340,17 @@ public class LuceneIndexProviderService
         regs.add(bundleContext.registerService(Observer.class.getName(), observer, null));
     }
 
+    private void registerExtractedTextProvider(PreExtractedTextProvider provider){
+        if (extractedTextCache != null){
+            if (provider != null){
+                log.info("Registering PreExtractedTextProvider {} with extracted text cache", provider);
+            } else {
+                log.info("Unregistering PreExtractedTextProvider with extracted text cache");
+            }
+            extractedTextCache.setExtractedTextProvider(provider);
+        }
+    }
+
     protected void bindNodeAggregator(NodeAggregator aggregator) {
         this.nodeAggregator = aggregator;
         initialize();
@@ -334,4 +360,14 @@ public class LuceneIndexProviderService
         this.nodeAggregator = null;
         initialize();
     }
+
+    protected void bindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
+        this.extractedTextProvider = preExtractedTextProvider;
+        registerExtractedTextProvider(preExtractedTextProvider);
+    }
+
+    protected void unbindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
+        this.extractedTextProvider = null;
+        registerExtractedTextProvider(null);
+    }
 }

Modified: jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java?rev=1690677&r1=1690676&r2=1690677&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java (original)
+++ jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java Mon Jul 13 14:23:10 2015
@@ -19,10 +19,14 @@
 
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
 import org.apache.jackrabbit.oak.spi.commit.BackgroundObserver;
 import org.apache.jackrabbit.oak.spi.commit.Observer;
 import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
@@ -126,9 +130,31 @@ public class LuceneIndexProviderServiceT
         MockOsgi.deactivate(service);
     }
 
+    @Test
+    public void preExtractedTextProvider() throws Exception{
+        MockOsgi.activate(service, context.bundleContext(), getDefaultConfig());
+        LuceneIndexEditorProvider editorProvider =
+                (LuceneIndexEditorProvider) context.getService(IndexEditorProvider.class);
+        assertNull(editorProvider.getExtractedTextCache().getExtractedTextProvider());
+
+        //Mock OSGi does not support components
+        //context.registerService(PreExtractedTextProvider.class, new DummyProvider());
+        service.bindExtractedTextProvider(new DummyProvider());
+
+        assertNotNull(editorProvider.getExtractedTextCache().getExtractedTextProvider());
+    }
+
     private Map<String,Object> getDefaultConfig(){
         Map<String,Object> config = new HashMap<String, Object>();
         config.put("localIndexDir", folder.getRoot().getAbsolutePath());
         return config;
     }
+
+    private static class DummyProvider implements PreExtractedTextProvider {
+
+        @Override
+        public ExtractedText getText(String propertyPath, Blob blob) throws IOException {
+            return null;
+        }
+    }
 }

Modified: jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1690677&r1=1690676&r2=1690677&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java (original)
+++ jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java Mon Jul 13 14:23:10 2015
@@ -25,6 +25,7 @@ import java.text.ParseException;
 import java.util.Calendar;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
@@ -51,6 +52,9 @@ import org.apache.jackrabbit.oak.api.Res
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
 import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
 import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
 import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
@@ -92,6 +96,7 @@ import static org.hamcrest.CoreMatchers.
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.junit.matchers.JUnitMatchers.containsString;
 
 public class LucenePropertyIndexTest extends AbstractQueryTest {
@@ -105,6 +110,8 @@ public class LucenePropertyIndexTest ext
     @Rule
     public TemporaryFolder temporaryFolder = new TemporaryFolder();
 
+    private LuceneIndexEditorProvider editorProvider;
+
     @Override
     protected void createTestIndexNode() throws Exception {
         setTraversalEnabled(false);
@@ -112,13 +119,14 @@ public class LucenePropertyIndexTest ext
 
     @Override
     protected ContentRepository createRepository() {
+        editorProvider = new LuceneIndexEditorProvider(createIndexCopier());
         LuceneIndexProvider provider = new LuceneIndexProvider();
         return new Oak()
                 .with(new InitialContent())
                 .with(new OpenSecurityProvider())
                 .with((QueryIndexProvider) provider)
                 .with((Observer) provider)
-                .with(new LuceneIndexEditorProvider(createIndexCopier()))
+                .with(editorProvider)
                 .with(new PropertyIndexEditorProvider())
                 .with(new NodeTypeIndexProvider())
                 .createContentRepository();
@@ -205,8 +213,8 @@ public class LucenePropertyIndexTest ext
 
         Tree indexWithType = createIndex("test2", of("propa"));
         indexWithType.setProperty(PropertyStates
-            .createProperty(DECLARING_NODE_TYPES, of("nt:unstructured"),
-                Type.STRINGS));
+                .createProperty(DECLARING_NODE_TYPES, of("nt:unstructured"),
+                        Type.STRINGS));
 
         Tree test = root.getTree("/").addChild("test");
         test.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
@@ -237,8 +245,8 @@ public class LucenePropertyIndexTest ext
     public void declaringNodeTypeSingleIndex() throws Exception {
         Tree indexWithType = createIndex("test2", of("propa", "propb"));
         indexWithType.setProperty(PropertyStates
-            .createProperty(DECLARING_NODE_TYPES, of("nt:unstructured"),
-                Type.STRINGS));
+                .createProperty(DECLARING_NODE_TYPES, of("nt:unstructured"),
+                        Type.STRINGS));
 
         Tree test = root.getTree("/").addChild("test");
         test.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
@@ -257,7 +265,7 @@ public class LucenePropertyIndexTest ext
         root.commit();
 
         String propabQuery = "select [jcr:path] from [nt:unstructured] where [propb] = 'baz' and " +
-            "[propa] = 'foo'";
+                "[propa] = 'foo'";
         assertThat(explain(propabQuery), containsString("lucene:test2"));
         assertQuery(propabQuery, asList("/test/a", "/test/b"));
 
@@ -600,11 +608,11 @@ public class LucenePropertyIndexTest ext
         root.commit();
 
         assertQuery("select [jcr:path] from [nt:base] where propa like 'hum%'",
-            asList("/test/a", "/test/c"));
+                asList("/test/a", "/test/c"));
         assertQuery("select [jcr:path] from [nt:base] where propa like '%ty'",
-            asList("/test/a", "/test/b"));
+                asList("/test/a", "/test/b"));
         assertQuery("select [jcr:path] from [nt:base] where propa like '%ump%'",
-            asList("/test/a", "/test/b", "/test/c"));
+                asList("/test/a", "/test/b", "/test/c"));
     }
 
     @Test
@@ -834,10 +842,10 @@ public class LucenePropertyIndexTest ext
         root.commit();
 
         assertOrderedQuery("select [jcr:path] from [nt:base] where [bar] = 'baz' order by [foo]", Lists
-            .newArrayList(Iterables.concat(Lists.newArrayList("/test/a"), getSortedPaths(tuples, OrderDirection.ASC))));
+                .newArrayList(Iterables.concat(Lists.newArrayList("/test/a"), getSortedPaths(tuples, OrderDirection.ASC))));
         assertOrderedQuery("select [jcr:path] from [nt:base] where [bar] = 'baz' order by [foo] DESC", Lists
-            .newArrayList(Iterables.concat(getSortedPaths(tuples, OrderDirection.DESC), Lists.newArrayList("/test/a")
-            )));
+                .newArrayList(Iterables.concat(getSortedPaths(tuples, OrderDirection.DESC), Lists.newArrayList("/test/a")
+                )));
     }
 
     void assertSortedString() throws CommitFailedException {
@@ -893,7 +901,7 @@ public class LucenePropertyIndexTest ext
         root.commit();
 
         assertOrderedQuery("select [jcr:path] from [nt:base] where [bar] = 'baz' order by [foo]",
-            getSortedPaths(tuples, OrderDirection.ASC));
+                getSortedPaths(tuples, OrderDirection.ASC));
         assertOrderedQuery(
                 "select [jcr:path] from [nt:base] where [bar] = 'baz' order by [foo] DESC",
                 getSortedPaths(tuples, OrderDirection.DESC));
@@ -925,8 +933,8 @@ public class LucenePropertyIndexTest ext
 
         // Add the path of property added as timestamp string in the sorted list
         assertOrderedQuery("select [jcr:path] from [nt:base] where [bar] = 'baz' order by [foo]",
-            Lists.newArrayList(Iterables.concat(Lists.newArrayList("/test/n0"),
-                getSortedPaths(tuples, OrderDirection.ASC))));
+                Lists.newArrayList(Iterables.concat(Lists.newArrayList("/test/n0"),
+                        getSortedPaths(tuples, OrderDirection.ASC))));
         // Append the path of property added as timestamp string to the sorted list
         assertOrderedQuery(
                 "select [jcr:path] from [nt:base] where [bar] = 'baz' order by [foo] DESC", Lists
@@ -1010,12 +1018,12 @@ public class LucenePropertyIndexTest ext
 
         // Descending matches with lucene native sort
         String query =
-            "measure select [jcr:path] from [nt:base] where [propa] = 'foo' order by [jcr:score] desc";
+                "measure select [jcr:path] from [nt:base] where [propa] = 'foo' order by [jcr:score] desc";
         assertThat(measureWithLimit(query, SQL2, 1), containsString("scanCount: 1"));
 
         // Ascending needs to be sorted by query engine
         query =
-            "measure select [jcr:path] from [nt:base] where [propa] = 'foo' order by [jcr:score]";
+                "measure select [jcr:path] from [nt:base] where [propa] = 'foo' order by [jcr:score]";
         assertThat(measureWithLimit(query, SQL2, 1), containsString("scanCount: 3"));
     }
 
@@ -1088,8 +1096,8 @@ public class LucenePropertyIndexTest ext
 
     private String measureWithLimit(String query, String lang, int limit) throws ParseException {
         List<? extends ResultRow> result = Lists.newArrayList(
-            qe.executeQuery(query, lang, limit, 0, Maps.<String, PropertyValue>newHashMap(),
-                NO_MAPPINGS).getRows());
+                qe.executeQuery(query, lang, limit, 0, Maps.<String, PropertyValue>newHashMap(),
+                        NO_MAPPINGS).getRows());
 
         String measure = "";
         if (result.size() > 0) {
@@ -1181,6 +1189,41 @@ public class LucenePropertyIndexTest ext
     }
 
     @Test
+    public void preExtractedTextProvider() throws Exception{
+        Tree idx = createFulltextIndex(root.getTree("/"), "test");
+        TestUtil.useV2(idx);
+        root.commit();
+
+        AccessStateProvidingBlob testBlob =
+                new AccessStateProvidingBlob("fox is jumping", "id1");
+
+        MapBasedProvider textProvider = new MapBasedProvider();
+        textProvider.write("id1","lion");
+        editorProvider.getExtractedTextCache().setExtractedTextProvider(textProvider);
+
+        Tree test = root.getTree("/").addChild("test");
+        createFileNode(test, "text", testBlob, "text/plain");
+        root.commit();
+
+        //As its not a reindex case actual blob content would be accessed
+        assertTrue(testBlob.isStreamAccessed());
+        assertQuery("select * from [nt:base] where CONTAINS(*, 'fox ')", asList("/test/text/jcr:content"));
+        assertEquals(0, textProvider.accessCount);
+
+        testBlob.resetState();
+
+        //Lets trigger a reindex
+        root.getTree(idx.getPath()).setProperty(IndexConstants.REINDEX_PROPERTY_NAME, true);
+        root.commit();
+
+        //Now the content should be provided by the PreExtractedTextProvider
+        //and instead of fox its lion!
+        assertFalse(testBlob.isStreamAccessed());
+        assertQuery("select * from [nt:base] where CONTAINS(*, 'lion ')", asList("/test/text/jcr:content"));
+        assertEquals(1, textProvider.accessCount);
+    }
+
+    @Test
     public void relativePropertyAndCursor() throws Exception{
         // Index Definition
         Tree idx = createIndex("test1", of("propa", "propb"));
@@ -1423,6 +1466,7 @@ public class LucenePropertyIndexTest ext
 
     private static class AccessStateProvidingBlob extends ArrayBasedBlob {
         private CountingInputStream stream;
+        private String id;
 
         public AccessStateProvidingBlob(byte[] value) {
             super(value);
@@ -1432,6 +1476,11 @@ public class LucenePropertyIndexTest ext
             this(content.getBytes(Charsets.UTF_8));
         }
 
+        public AccessStateProvidingBlob(String content, String id) {
+            this(content.getBytes(Charsets.UTF_8));
+            this.id = id;
+        }
+
         @Nonnull
         @Override
         public InputStream getNewStream() {
@@ -1453,5 +1502,32 @@ public class LucenePropertyIndexTest ext
             }
             return stream.getCount();
         }
+
+        @Override
+        public String getContentIdentity() {
+            return id;
+        }
+    }
+
+    private static class MapBasedProvider implements PreExtractedTextProvider {
+        final Map<String, ExtractedText> idMap = Maps.newHashMap();
+        int accessCount = 0;
+
+        @Override
+        public ExtractedText getText(String propertyPath, Blob blob) throws IOException {
+            ExtractedText result = idMap.get(blob.getContentIdentity());
+            if (result != null){
+                accessCount++;
+            }
+            return result;
+        }
+
+        public void write(String id, String text){
+            idMap.put(id, new ExtractedText(ExtractionResult.SUCCESS, text));
+        }
+
+        public void reset(){
+            accessCount = 0;
+        }
     }
 }