You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@jena.apache.org by Mikael Pesonen <mi...@lingsoft.fi> on 2020/09/29 14:13:23 UTC
java.lang.StringIndexOutOfBoundsException with Jena Text
Hi
I'm building a new text index with following command and getting java error.
/usr/bin/java -cp ./fuseki-server.jar jena.textindexer
--desc=fuseki_config.ttl
After the command I get 4 files in /home/text/tools/jena_text_index/
_0.fdt
_0.fdx
segments_1
write.lock
Any idea what could case this?
Error is:
java.lang.StringIndexOutOfBoundsException: String index out of range: 59
at java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
at java.base/java.lang.String.charAt(String.java:711)
at org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
at
org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
at
org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
at
org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
at
org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
at
org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
at
org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
at
org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
at
org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
at
org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
at
org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
at jena.textindexer.exec(textindexer.java:130)
at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
at jena.textindexer.main(textindexer.java:52)
mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
/usr/bin/java -cp ./fuseki-server.jar jena.textindexer
--desc=fuseki_config.ttl
java.lang.StringIndexOutOfBoundsException: String index out of range: 59
at java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
at java.base/java.lang.String.charAt(String.java:711)
at org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
at
org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
at
org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
at
org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
at
org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
at
org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
at
org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
at
org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
at
org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
at
org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
at
org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
at jena.textindexer.exec(textindexer.java:130)
at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
at jena.textindexer.main(textindexer.java:52)
config:
@prefix :<http://localhost/jena_example/#> .
@prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
@prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
@prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
@prefix text:<http://jena.apache.org/text#> .
@prefix skos:<http://www.w3.org/2004/02/skos/core#> .
@prefix fuseki:<http://jena.apache.org/fuseki#> .
@prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
## Example of a TDB dataset and text index
## Initialize TDB
[] ja:loadClass "org.apache.jena.tdb.TDB" .
tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
tdb:GraphTDB rdfs:subClassOf ja:Model .
## Initialize text query
[] ja:loadClass "org.apache.jena.query.text.TextQuery" .
# A TextDataset is a regular dataset with a text index.
text:TextDataset rdfs:subClassOf ja:RDFDataset .
# Lucene index
text:TextIndexLucene rdfs:subClassOf text:TextIndex .
## ---------------------------------------------------------------
# build: java -cp ./fuseki-server.jar jena.textindexer
--desc=fuseki_config.ttl
:text_dataset rdf:type text:TextDataset ;
text:dataset :my_dataset ;
text:index <#indexLucene> ;
.
# A TDB dataset used for RDF storage
:my_dataset rdf:type tdb:DatasetTDB ;
tdb:location "/home/text/tools/jena_data/" ;
# tdb:unionDefaultGraph true ; # Optional
.
# Text index description
<#indexLucene> a text:TextIndexLucene ;
text:directory <file:/home/text/tools/jena_text_index/> ;
text:entityMap <#entMap> ;
text:storeValues true ;
text:analyzer [ a text:StandardAnalyzer ] ;
text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
text:queryParser text:AnalyzingQueryParser ;
text:multilingualSupport true ;
.
<#entMap> a text:EntityMap ;
text:defaultField "vcard_fn" ;
text:entityField "uri" ;
text:uidField "uid" ;
text:langField "lang" ;
text:graphField "graph" ;
text:map (
[ text:field "vcard_fn" ; text:predicate vcard:fn ]
[ text:field "altLabel" ; text:predicate skos:altLabel ]
) .
<#service> rdf:type fuseki:Service ;
fuseki:name "/ds" ; # http://host:port/ds-ro
fuseki:serviceQuery "query" ; # SPARQL query service
fuseki:serviceQuery "sparql" ; # SPARQL query service
fuseki:serviceUpdate "update" ; # SPARQL update service
fuseki:serviceUpload "upload" ; # Non-SPARQL upload
service
fuseki:serviceReadWriteGraphStore "data" ; # SPARQL Graph
store protocol (read and write)
fuseki:dataset :text_dataset ;
.
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Mikael Pesonen <mi...@lingsoft.fi>.
Added some new fields to index and rebuilt it and now everything works.
Not sure what happened...
On 1.10.2020 0:32, Andy Seaborne wrote:
>
>>> On startup Jena now says
>>>
>>> 2020-09-30 16:47:48,396 main ERROR Reconfiguration failed: No
>>> configuration found for '5bc2b487' at 'null' in 'null'
>>>
>>> if that is somehow related.
>>
>> looks likely.
>
> That message is not coming from Jena (the string "Reconfiguration
> failed" isn't in the codebase)
>
> Andy
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Mikael Pesonen <mi...@lingsoft.fi>.
Newly inserted data is found, only old data before the index is created
cannot be found.
On 1.10.2020 0:32, Andy Seaborne wrote:
>
>>> On startup Jena now says
>>>
>>> 2020-09-30 16:47:48,396 main ERROR Reconfiguration failed: No
>>> configuration found for '5bc2b487' at 'null' in 'null'
>>>
>>> if that is somehow related.
>>
>> looks likely.
>
> That message is not coming from Jena (the string "Reconfiguration
> failed" isn't in the codebase)
>
> Andy
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Andy Seaborne <an...@apache.org>.
>> On startup Jena now says
>>
>> 2020-09-30 16:47:48,396 main ERROR Reconfiguration failed: No
>> configuration found for '5bc2b487' at 'null' in 'null'
>>
>> if that is somehow related.
>
> looks likely.
That message is not coming from Jena (the string "Reconfiguration
failed" isn't in the codebase)
Andy
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Mikael Pesonen <mi...@lingsoft.fi>.
On 30.9.2020 22:53, Andy Seaborne wrote:
>
> On 30/09/2020 15:12, Mikael Pesonen wrote:
>>
>> Okay got the index done:
>>
>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>> --desc=fuseki_config.ttl
>> 16:51:57 INFO textindexer :: 159657 (15965 per second)properties
>> indexed (15965 per second overall)
>> 16:52:07 INFO textindexer :: 349257 (18960 per second)properties
>> indexed (17462 per second overall)
>> 16:52:17 INFO textindexer :: 539238 (18998 per second)properties
>> indexed (17974 per second overall)
>> 16:52:27 INFO textindexer :: 708454 (16921 per second)properties
>> indexed (17711 per second overall)
>> 16:52:37 INFO textindexer :: 888469 (18001 per second)properties
>> indexed (17769 per second overall)
>> 16:52:46 INFO textindexer :: 928952 (15744 per second)
>> properties indexed
>>
>> but I'm getting no results. Tried (with data that should return matches)
>>
>> (?s ?score ?content) text:query (vcard:fn "Some Person" )
>> and
>> ?s text:query "something" .
>>
>>
>> On startup Jena now says
>>
>> 2020-09-30 16:47:48,396 main ERROR Reconfiguration failed: No
>> configuration found for '5bc2b487' at 'null' in 'null'
>>
>> if that is somehow related.
>
> looks likely.
>
> Earier you showed:
>
> select * where
> {
> graph ?g {
> ?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
> }
> }
>
> so also may be a namedgraph
Entire query is:
SELECT * WHERE
{
GRAPH ?g
{
(?s ?score ?content) text:query (vcard:fn "Some Person" )
# or
# ?s text:query "something" .
}
}
The error occurs like this:
/usr/bin/java
-Dlog4j.configuration=file:/home/text/tools/apache-jena-fuseki-3.16.0/log4j.properties
-Xmx3000M -jar fuseki-server.jar --update --port 3030 --config
/home/text/tools/apache-jena-fuseki-3.16.0/fuseki_config.ttl
2020-10-01 11:57:48,819 main ERROR Reconfiguration failed: No
configuration found for '5bc2b487' at 'null' in 'null'
11:57:49 INFO Server :: Apache Jena Fuseki 3.16.0
11:57:49 INFO Config ::
FUSEKI_HOME=/home/text/tools/apache-jena-fuseki-3.16.0/.
11:57:49 INFO Config ::
FUSEKI_BASE=/home/text/tools/apache-jena-fuseki-3.16.0/run
11:57:49 INFO Config :: Shiro file:
file:///home/text/tools/apache-jena-fuseki-3.16.0/run/shiro.ini
11:57:49 INFO Config :: Configuration file:
/home/text/tools/apache-jena-fuseki-3.16.0/fuseki_config.ttl
11:57:50 INFO Server :: Path = /ds
11:57:50 INFO Server :: System
11:57:50 INFO Server :: Memory: 2.9 GiB
11:57:50 INFO Server :: Java: 14.0.1
11:57:50 INFO Server :: OS: Linux 4.4.0-174-generic amd64
11:57:50 INFO Server :: PID: 12616
11:57:50 INFO Server :: Started 2020/10/01 11:57:50 EEST on
port 3030
> Andy
>
>>
>>
>>
>> On 30.9.2020 15:18, Andy Seaborne wrote:
>>> https://issues.apache.org/jira/browse/JENA-1890 and 1892
>>>
>>> are fixed in 3.16.0
>>>
>>> Its a dcode error - the TDB database is intact.
>>>
>>> On 30/09/2020 12:31, Mikael Pesonen wrote:
>>>>
>>>> I figured out the regexp. Seems that we have external data having
>>>> non Ascii URLs that can't be altered. Is there any workaround, for
>>>> example adding text index to selected graphs only?
>>>>
>>>> On 30.9.2020 13:57, Mikael Pesonen wrote:
>>>>>
>>>>> Ah, thanks. Is it possible to find such URis with SPARQL query?
>>>>> SPARQL seems not to support \x -notation
>>>>>
>>>>> select * where
>>>>> {
>>>>> graph ?g {
>>>>> ?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
>>>>> }
>>>>> }
>>>>>
>>>>>
>>>>>
>>>>> On 30.9.2020 13:29, Andy Seaborne wrote:
>>>>>> In the data (probbaly in a URI) - it's reading the database.
>>>>>>
>>>>>> On 30/09/2020 10:36, Mikael Pesonen wrote:
>>>>>>>
>>>>>>> I couldn't find any non Ascii characters in the config file
>>>>>>> ([^\x00-\x7F]+)...
>>>>>>>
>>>>>>> On 30.9.2020 0:48, Andy Seaborne wrote:
>>>>>>>> Looks like
>>>>>>>>
>>>>>>>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>>>>>>>
>>>>>>>> Andy
>>>>>>>>
>>>>>>>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>>>>>>>
>>>>>>>>> Hi
>>>>>>>>>
>>>>>>>>> I'm building a new text index with following command and
>>>>>>>>> getting java error.
>>>>>>>>>
>>>>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>>>>> --desc=fuseki_config.ttl
>>>>>>>>>
>>>>>>>>> After the command I get 4 files in
>>>>>>>>> /home/text/tools/jena_text_index/
>>>>>>>>>
>>>>>>>>> _0.fdt
>>>>>>>>> _0.fdx
>>>>>>>>> segments_1
>>>>>>>>> write.lock
>>>>>>>>>
>>>>>>>>> Any idea what could case this?
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Error is:
>>>>>>>>>
>>>>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>>>>> range: 59
>>>>>>>>> at
>>>>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>>>>
>>>>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>>>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>>>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>>>>> --desc=fuseki_config.ttl
>>>>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>>>>> range: 59
>>>>>>>>> at
>>>>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>>>>
>>>>>>>>> at
>>>>>>>>> org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>>>>> at
>>>>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>>>>
>>>>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> config:
>>>>>>>>>
>>>>>>>>> @prefix :<http://localhost/jena_example/#> .
>>>>>>>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>>>>>>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>>>>>>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>>>>>>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>>>>>>>> @prefix text:<http://jena.apache.org/text#> .
>>>>>>>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>>>>>>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>>>>>>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>>>>>>>
>>>>>>>>> ## Example of a TDB dataset and text index
>>>>>>>>> ## Initialize TDB
>>>>>>>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>>>>>>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>>>>>>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>>>>>>>
>>>>>>>>> ## Initialize text query
>>>>>>>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>>>>>>>> # A TextDataset is a regular dataset with a text index.
>>>>>>>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>>>>>>>> # Lucene index
>>>>>>>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> ##
>>>>>>>>> ---------------------------------------------------------------
>>>>>>>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>>>>>>>> --desc=fuseki_config.ttl
>>>>>>>>>
>>>>>>>>> :text_dataset rdf:type text:TextDataset ;
>>>>>>>>> text:dataset :my_dataset ;
>>>>>>>>> text:index <#indexLucene> ;
>>>>>>>>> .
>>>>>>>>>
>>>>>>>>> # A TDB dataset used for RDF storage
>>>>>>>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>>>>>>>> tdb:location "/home/text/tools/jena_data/" ;
>>>>>>>>> # tdb:unionDefaultGraph true ; # Optional
>>>>>>>>> .
>>>>>>>>>
>>>>>>>>> # Text index description
>>>>>>>>> <#indexLucene> a text:TextIndexLucene ;
>>>>>>>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>>>>>>>> text:entityMap <#entMap> ;
>>>>>>>>> text:storeValues true ;
>>>>>>>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>>>>>>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>>>>>>>> text:queryParser text:AnalyzingQueryParser ;
>>>>>>>>> text:multilingualSupport true ;
>>>>>>>>> .
>>>>>>>>>
>>>>>>>>> <#entMap> a text:EntityMap ;
>>>>>>>>> text:defaultField "vcard_fn" ;
>>>>>>>>> text:entityField "uri" ;
>>>>>>>>> text:uidField "uid" ;
>>>>>>>>> text:langField "lang" ;
>>>>>>>>> text:graphField "graph" ;
>>>>>>>>> text:map (
>>>>>>>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>>>>>>>> [ text:field "altLabel" ; text:predicate
>>>>>>>>> skos:altLabel ]
>>>>>>>>> ) .
>>>>>>>>>
>>>>>>>>> <#service> rdf:type fuseki:Service ;
>>>>>>>>> fuseki:name "/ds" ; #
>>>>>>>>> http://host:port/ds-ro
>>>>>>>>> fuseki:serviceQuery "query" ; # SPARQL query
>>>>>>>>> service
>>>>>>>>> fuseki:serviceQuery "sparql" ; # SPARQL
>>>>>>>>> query service
>>>>>>>>> fuseki:serviceUpdate "update" ; # SPARQL
>>>>>>>>> update service
>>>>>>>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>>>>>>>> upload service
>>>>>>>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL
>>>>>>>>> Graph store protocol (read and write)
>>>>>>>>> fuseki:dataset :text_dataset ;
>>>>>>>>> .
>>>>>>>>>
>>>>>>>
>>>>>
>>>>
>>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Andy Seaborne <an...@apache.org>.
On 30/09/2020 15:12, Mikael Pesonen wrote:
>
> Okay got the index done:
>
> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
> --desc=fuseki_config.ttl
> 16:51:57 INFO textindexer :: 159657 (15965 per second)properties
> indexed (15965 per second overall)
> 16:52:07 INFO textindexer :: 349257 (18960 per second)properties
> indexed (17462 per second overall)
> 16:52:17 INFO textindexer :: 539238 (18998 per second)properties
> indexed (17974 per second overall)
> 16:52:27 INFO textindexer :: 708454 (16921 per second)properties
> indexed (17711 per second overall)
> 16:52:37 INFO textindexer :: 888469 (18001 per second)properties
> indexed (17769 per second overall)
> 16:52:46 INFO textindexer :: 928952 (15744 per second) properties
> indexed
>
> but I'm getting no results. Tried (with data that should return matches)
>
> (?s ?score ?content) text:query (vcard:fn "Some Person" )
> and
> ?s text:query "something" .
>
>
> On startup Jena now says
>
> 2020-09-30 16:47:48,396 main ERROR Reconfiguration failed: No
> configuration found for '5bc2b487' at 'null' in 'null'
>
> if that is somehow related.
looks likely.
Earier you showed:
select * where
{
graph ?g {
?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
}
}
so also may be a namedgraph
Andy
>
>
>
> On 30.9.2020 15:18, Andy Seaborne wrote:
>> https://issues.apache.org/jira/browse/JENA-1890 and 1892
>>
>> are fixed in 3.16.0
>>
>> Its a dcode error - the TDB database is intact.
>>
>> On 30/09/2020 12:31, Mikael Pesonen wrote:
>>>
>>> I figured out the regexp. Seems that we have external data having non
>>> Ascii URLs that can't be altered. Is there any workaround, for
>>> example adding text index to selected graphs only?
>>>
>>> On 30.9.2020 13:57, Mikael Pesonen wrote:
>>>>
>>>> Ah, thanks. Is it possible to find such URis with SPARQL query?
>>>> SPARQL seems not to support \x -notation
>>>>
>>>> select * where
>>>> {
>>>> graph ?g {
>>>> ?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
>>>> }
>>>> }
>>>>
>>>>
>>>>
>>>> On 30.9.2020 13:29, Andy Seaborne wrote:
>>>>> In the data (probbaly in a URI) - it's reading the database.
>>>>>
>>>>> On 30/09/2020 10:36, Mikael Pesonen wrote:
>>>>>>
>>>>>> I couldn't find any non Ascii characters in the config file
>>>>>> ([^\x00-\x7F]+)...
>>>>>>
>>>>>> On 30.9.2020 0:48, Andy Seaborne wrote:
>>>>>>> Looks like
>>>>>>>
>>>>>>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>>>>>>
>>>>>>> Andy
>>>>>>>
>>>>>>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>>>>>>
>>>>>>>> Hi
>>>>>>>>
>>>>>>>> I'm building a new text index with following command and getting
>>>>>>>> java error.
>>>>>>>>
>>>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>>>> --desc=fuseki_config.ttl
>>>>>>>>
>>>>>>>> After the command I get 4 files in
>>>>>>>> /home/text/tools/jena_text_index/
>>>>>>>>
>>>>>>>> _0.fdt
>>>>>>>> _0.fdx
>>>>>>>> segments_1
>>>>>>>> write.lock
>>>>>>>>
>>>>>>>> Any idea what could case this?
>>>>>>>>
>>>>>>>>
>>>>>>>> Error is:
>>>>>>>>
>>>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>>>> range: 59
>>>>>>>> at
>>>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>>>> at
>>>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>>>> at
>>>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>>>
>>>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>>>> --desc=fuseki_config.ttl
>>>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>>>> range: 59
>>>>>>>> at
>>>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>>>> at
>>>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>>>> at
>>>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>>>
>>>>>>>> at
>>>>>>>> org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>>>> at
>>>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>>>
>>>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>>>>
>>>>>>>>
>>>>>>>> config:
>>>>>>>>
>>>>>>>> @prefix :<http://localhost/jena_example/#> .
>>>>>>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>>>>>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>>>>>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>>>>>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>>>>>>> @prefix text:<http://jena.apache.org/text#> .
>>>>>>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>>>>>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>>>>>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>>>>>>
>>>>>>>> ## Example of a TDB dataset and text index
>>>>>>>> ## Initialize TDB
>>>>>>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>>>>>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>>>>>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>>>>>>
>>>>>>>> ## Initialize text query
>>>>>>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>>>>>>> # A TextDataset is a regular dataset with a text index.
>>>>>>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>>>>>>> # Lucene index
>>>>>>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>>>>>>
>>>>>>>>
>>>>>>>> ## ---------------------------------------------------------------
>>>>>>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>>>>>>> --desc=fuseki_config.ttl
>>>>>>>>
>>>>>>>> :text_dataset rdf:type text:TextDataset ;
>>>>>>>> text:dataset :my_dataset ;
>>>>>>>> text:index <#indexLucene> ;
>>>>>>>> .
>>>>>>>>
>>>>>>>> # A TDB dataset used for RDF storage
>>>>>>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>>>>>>> tdb:location "/home/text/tools/jena_data/" ;
>>>>>>>> # tdb:unionDefaultGraph true ; # Optional
>>>>>>>> .
>>>>>>>>
>>>>>>>> # Text index description
>>>>>>>> <#indexLucene> a text:TextIndexLucene ;
>>>>>>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>>>>>>> text:entityMap <#entMap> ;
>>>>>>>> text:storeValues true ;
>>>>>>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>>>>>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>>>>>>> text:queryParser text:AnalyzingQueryParser ;
>>>>>>>> text:multilingualSupport true ;
>>>>>>>> .
>>>>>>>>
>>>>>>>> <#entMap> a text:EntityMap ;
>>>>>>>> text:defaultField "vcard_fn" ;
>>>>>>>> text:entityField "uri" ;
>>>>>>>> text:uidField "uid" ;
>>>>>>>> text:langField "lang" ;
>>>>>>>> text:graphField "graph" ;
>>>>>>>> text:map (
>>>>>>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>>>>>>> [ text:field "altLabel" ; text:predicate
>>>>>>>> skos:altLabel ]
>>>>>>>> ) .
>>>>>>>>
>>>>>>>> <#service> rdf:type fuseki:Service ;
>>>>>>>> fuseki:name "/ds" ; #
>>>>>>>> http://host:port/ds-ro
>>>>>>>> fuseki:serviceQuery "query" ; # SPARQL
>>>>>>>> query service
>>>>>>>> fuseki:serviceQuery "sparql" ; # SPARQL
>>>>>>>> query service
>>>>>>>> fuseki:serviceUpdate "update" ; # SPARQL
>>>>>>>> update service
>>>>>>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>>>>>>> upload service
>>>>>>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL
>>>>>>>> Graph store protocol (read and write)
>>>>>>>> fuseki:dataset :text_dataset ;
>>>>>>>> .
>>>>>>>>
>>>>>>
>>>>
>>>
>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Mikael Pesonen <mi...@lingsoft.fi>.
Okay got the index done:
/usr/bin/java -cp ./fuseki-server.jar jena.textindexer
--desc=fuseki_config.ttl
16:51:57 INFO textindexer :: 159657 (15965 per second)properties
indexed (15965 per second overall)
16:52:07 INFO textindexer :: 349257 (18960 per second)properties
indexed (17462 per second overall)
16:52:17 INFO textindexer :: 539238 (18998 per second)properties
indexed (17974 per second overall)
16:52:27 INFO textindexer :: 708454 (16921 per second)properties
indexed (17711 per second overall)
16:52:37 INFO textindexer :: 888469 (18001 per second)properties
indexed (17769 per second overall)
16:52:46 INFO textindexer :: 928952 (15744 per second) properties
indexed
but I'm getting no results. Tried (with data that should return matches)
(?s ?score ?content) text:query (vcard:fn "Some Person" )
and
?s text:query "something" .
On startup Jena now says
2020-09-30 16:47:48,396 main ERROR Reconfiguration failed: No
configuration found for '5bc2b487' at 'null' in 'null'
if that is somehow related.
On 30.9.2020 15:18, Andy Seaborne wrote:
> https://issues.apache.org/jira/browse/JENA-1890 and 1892
>
> are fixed in 3.16.0
>
> Its a dcode error - the TDB database is intact.
>
> On 30/09/2020 12:31, Mikael Pesonen wrote:
>>
>> I figured out the regexp. Seems that we have external data having non
>> Ascii URLs that can't be altered. Is there any workaround, for
>> example adding text index to selected graphs only?
>>
>> On 30.9.2020 13:57, Mikael Pesonen wrote:
>>>
>>> Ah, thanks. Is it possible to find such URis with SPARQL query?
>>> SPARQL seems not to support \x -notation
>>>
>>> select * where
>>> {
>>> graph ?g {
>>> ?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
>>> }
>>> }
>>>
>>>
>>>
>>> On 30.9.2020 13:29, Andy Seaborne wrote:
>>>> In the data (probbaly in a URI) - it's reading the database.
>>>>
>>>> On 30/09/2020 10:36, Mikael Pesonen wrote:
>>>>>
>>>>> I couldn't find any non Ascii characters in the config file
>>>>> ([^\x00-\x7F]+)...
>>>>>
>>>>> On 30.9.2020 0:48, Andy Seaborne wrote:
>>>>>> Looks like
>>>>>>
>>>>>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>>>>>
>>>>>> Andy
>>>>>>
>>>>>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>>>>>
>>>>>>> Hi
>>>>>>>
>>>>>>> I'm building a new text index with following command and getting
>>>>>>> java error.
>>>>>>>
>>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>>> --desc=fuseki_config.ttl
>>>>>>>
>>>>>>> After the command I get 4 files in
>>>>>>> /home/text/tools/jena_text_index/
>>>>>>>
>>>>>>> _0.fdt
>>>>>>> _0.fdx
>>>>>>> segments_1
>>>>>>> write.lock
>>>>>>>
>>>>>>> Any idea what could case this?
>>>>>>>
>>>>>>>
>>>>>>> Error is:
>>>>>>>
>>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>>> range: 59
>>>>>>> at
>>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>>> at
>>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>>> at
>>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>>
>>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>>> --desc=fuseki_config.ttl
>>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>>> range: 59
>>>>>>> at
>>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>>> at
>>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>>> at
>>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>>
>>>>>>> at
>>>>>>> org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>>> at
>>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>>
>>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>>>
>>>>>>>
>>>>>>> config:
>>>>>>>
>>>>>>> @prefix :<http://localhost/jena_example/#> .
>>>>>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>>>>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>>>>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>>>>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>>>>>> @prefix text:<http://jena.apache.org/text#> .
>>>>>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>>>>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>>>>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>>>>>
>>>>>>> ## Example of a TDB dataset and text index
>>>>>>> ## Initialize TDB
>>>>>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>>>>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>>>>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>>>>>
>>>>>>> ## Initialize text query
>>>>>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>>>>>> # A TextDataset is a regular dataset with a text index.
>>>>>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>>>>>> # Lucene index
>>>>>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>>>>>
>>>>>>>
>>>>>>> ## ---------------------------------------------------------------
>>>>>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>>>>>> --desc=fuseki_config.ttl
>>>>>>>
>>>>>>> :text_dataset rdf:type text:TextDataset ;
>>>>>>> text:dataset :my_dataset ;
>>>>>>> text:index <#indexLucene> ;
>>>>>>> .
>>>>>>>
>>>>>>> # A TDB dataset used for RDF storage
>>>>>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>>>>>> tdb:location "/home/text/tools/jena_data/" ;
>>>>>>> # tdb:unionDefaultGraph true ; # Optional
>>>>>>> .
>>>>>>>
>>>>>>> # Text index description
>>>>>>> <#indexLucene> a text:TextIndexLucene ;
>>>>>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>>>>>> text:entityMap <#entMap> ;
>>>>>>> text:storeValues true ;
>>>>>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>>>>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>>>>>> text:queryParser text:AnalyzingQueryParser ;
>>>>>>> text:multilingualSupport true ;
>>>>>>> .
>>>>>>>
>>>>>>> <#entMap> a text:EntityMap ;
>>>>>>> text:defaultField "vcard_fn" ;
>>>>>>> text:entityField "uri" ;
>>>>>>> text:uidField "uid" ;
>>>>>>> text:langField "lang" ;
>>>>>>> text:graphField "graph" ;
>>>>>>> text:map (
>>>>>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>>>>>> [ text:field "altLabel" ; text:predicate
>>>>>>> skos:altLabel ]
>>>>>>> ) .
>>>>>>>
>>>>>>> <#service> rdf:type fuseki:Service ;
>>>>>>> fuseki:name "/ds" ; #
>>>>>>> http://host:port/ds-ro
>>>>>>> fuseki:serviceQuery "query" ; # SPARQL
>>>>>>> query service
>>>>>>> fuseki:serviceQuery "sparql" ; # SPARQL
>>>>>>> query service
>>>>>>> fuseki:serviceUpdate "update" ; # SPARQL
>>>>>>> update service
>>>>>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>>>>>> upload service
>>>>>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL
>>>>>>> Graph store protocol (read and write)
>>>>>>> fuseki:dataset :text_dataset ;
>>>>>>> .
>>>>>>>
>>>>>
>>>
>>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Andy Seaborne <an...@apache.org>.
https://issues.apache.org/jira/browse/JENA-1890 and 1892
are fixed in 3.16.0
Its a dcode error - the TDB database is intact.
On 30/09/2020 12:31, Mikael Pesonen wrote:
>
> I figured out the regexp. Seems that we have external data having non
> Ascii URLs that can't be altered. Is there any workaround, for example
> adding text index to selected graphs only?
>
> On 30.9.2020 13:57, Mikael Pesonen wrote:
>>
>> Ah, thanks. Is it possible to find such URis with SPARQL query? SPARQL
>> seems not to support \x -notation
>>
>> select * where
>> {
>> graph ?g {
>> ?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
>> }
>> }
>>
>>
>>
>> On 30.9.2020 13:29, Andy Seaborne wrote:
>>> In the data (probbaly in a URI) - it's reading the database.
>>>
>>> On 30/09/2020 10:36, Mikael Pesonen wrote:
>>>>
>>>> I couldn't find any non Ascii characters in the config file
>>>> ([^\x00-\x7F]+)...
>>>>
>>>> On 30.9.2020 0:48, Andy Seaborne wrote:
>>>>> Looks like
>>>>>
>>>>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>>>>
>>>>> Andy
>>>>>
>>>>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>>>>
>>>>>> Hi
>>>>>>
>>>>>> I'm building a new text index with following command and getting
>>>>>> java error.
>>>>>>
>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>> --desc=fuseki_config.ttl
>>>>>>
>>>>>> After the command I get 4 files in /home/text/tools/jena_text_index/
>>>>>>
>>>>>> _0.fdt
>>>>>> _0.fdx
>>>>>> segments_1
>>>>>> write.lock
>>>>>>
>>>>>> Any idea what could case this?
>>>>>>
>>>>>>
>>>>>> Error is:
>>>>>>
>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>> range: 59
>>>>>> at
>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>> at
>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>
>>>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>> at
>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>
>>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>> at
>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>
>>>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>> at
>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>
>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>>> --desc=fuseki_config.ttl
>>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>>> range: 59
>>>>>> at
>>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>>> at
>>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>>
>>>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>>> at
>>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>>
>>>>>> at
>>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>>
>>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>>> at
>>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>>
>>>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>>> at
>>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>>
>>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>>
>>>>>>
>>>>>> config:
>>>>>>
>>>>>> @prefix :<http://localhost/jena_example/#> .
>>>>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>>>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>>>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>>>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>>>>> @prefix text:<http://jena.apache.org/text#> .
>>>>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>>>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>>>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>>>>
>>>>>> ## Example of a TDB dataset and text index
>>>>>> ## Initialize TDB
>>>>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>>>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>>>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>>>>
>>>>>> ## Initialize text query
>>>>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>>>>> # A TextDataset is a regular dataset with a text index.
>>>>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>>>>> # Lucene index
>>>>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>>>>
>>>>>>
>>>>>> ## ---------------------------------------------------------------
>>>>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>>>>> --desc=fuseki_config.ttl
>>>>>>
>>>>>> :text_dataset rdf:type text:TextDataset ;
>>>>>> text:dataset :my_dataset ;
>>>>>> text:index <#indexLucene> ;
>>>>>> .
>>>>>>
>>>>>> # A TDB dataset used for RDF storage
>>>>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>>>>> tdb:location "/home/text/tools/jena_data/" ;
>>>>>> # tdb:unionDefaultGraph true ; # Optional
>>>>>> .
>>>>>>
>>>>>> # Text index description
>>>>>> <#indexLucene> a text:TextIndexLucene ;
>>>>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>>>>> text:entityMap <#entMap> ;
>>>>>> text:storeValues true ;
>>>>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>>>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>>>>> text:queryParser text:AnalyzingQueryParser ;
>>>>>> text:multilingualSupport true ;
>>>>>> .
>>>>>>
>>>>>> <#entMap> a text:EntityMap ;
>>>>>> text:defaultField "vcard_fn" ;
>>>>>> text:entityField "uri" ;
>>>>>> text:uidField "uid" ;
>>>>>> text:langField "lang" ;
>>>>>> text:graphField "graph" ;
>>>>>> text:map (
>>>>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>>>>> [ text:field "altLabel" ; text:predicate skos:altLabel ]
>>>>>> ) .
>>>>>>
>>>>>> <#service> rdf:type fuseki:Service ;
>>>>>> fuseki:name "/ds" ; #
>>>>>> http://host:port/ds-ro
>>>>>> fuseki:serviceQuery "query" ; # SPARQL query
>>>>>> service
>>>>>> fuseki:serviceQuery "sparql" ; # SPARQL query
>>>>>> service
>>>>>> fuseki:serviceUpdate "update" ; # SPARQL update
>>>>>> service
>>>>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>>>>> upload service
>>>>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL
>>>>>> Graph store protocol (read and write)
>>>>>> fuseki:dataset :text_dataset ;
>>>>>> .
>>>>>>
>>>>
>>
>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Mikael Pesonen <mi...@lingsoft.fi>.
I figured out the regexp. Seems that we have external data having non
Ascii URLs that can't be altered. Is there any workaround, for example
adding text index to selected graphs only?
On 30.9.2020 13:57, Mikael Pesonen wrote:
>
> Ah, thanks. Is it possible to find such URis with SPARQL query? SPARQL
> seems not to support \x -notation
>
> select * where
> {
> graph ?g {
> ?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
> }
> }
>
>
>
> On 30.9.2020 13:29, Andy Seaborne wrote:
>> In the data (probbaly in a URI) - it's reading the database.
>>
>> On 30/09/2020 10:36, Mikael Pesonen wrote:
>>>
>>> I couldn't find any non Ascii characters in the config file
>>> ([^\x00-\x7F]+)...
>>>
>>> On 30.9.2020 0:48, Andy Seaborne wrote:
>>>> Looks like
>>>>
>>>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>>>
>>>> Andy
>>>>
>>>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>>>
>>>>> Hi
>>>>>
>>>>> I'm building a new text index with following command and getting
>>>>> java error.
>>>>>
>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>> --desc=fuseki_config.ttl
>>>>>
>>>>> After the command I get 4 files in /home/text/tools/jena_text_index/
>>>>>
>>>>> _0.fdt
>>>>> _0.fdx
>>>>> segments_1
>>>>> write.lock
>>>>>
>>>>> Any idea what could case this?
>>>>>
>>>>>
>>>>> Error is:
>>>>>
>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>> range: 59
>>>>> at
>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>> at
>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>
>>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>
>>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>> at
>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>
>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>> --desc=fuseki_config.ttl
>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>> range: 59
>>>>> at
>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>> at
>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>>
>>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>
>>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>> at
>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>
>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>
>>>>>
>>>>> config:
>>>>>
>>>>> @prefix :<http://localhost/jena_example/#> .
>>>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>>>> @prefix text:<http://jena.apache.org/text#> .
>>>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>>>
>>>>> ## Example of a TDB dataset and text index
>>>>> ## Initialize TDB
>>>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>>>
>>>>> ## Initialize text query
>>>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>>>> # A TextDataset is a regular dataset with a text index.
>>>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>>>> # Lucene index
>>>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>>>
>>>>>
>>>>> ## ---------------------------------------------------------------
>>>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>>>> --desc=fuseki_config.ttl
>>>>>
>>>>> :text_dataset rdf:type text:TextDataset ;
>>>>> text:dataset :my_dataset ;
>>>>> text:index <#indexLucene> ;
>>>>> .
>>>>>
>>>>> # A TDB dataset used for RDF storage
>>>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>>>> tdb:location "/home/text/tools/jena_data/" ;
>>>>> # tdb:unionDefaultGraph true ; # Optional
>>>>> .
>>>>>
>>>>> # Text index description
>>>>> <#indexLucene> a text:TextIndexLucene ;
>>>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>>>> text:entityMap <#entMap> ;
>>>>> text:storeValues true ;
>>>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>>>> text:queryParser text:AnalyzingQueryParser ;
>>>>> text:multilingualSupport true ;
>>>>> .
>>>>>
>>>>> <#entMap> a text:EntityMap ;
>>>>> text:defaultField "vcard_fn" ;
>>>>> text:entityField "uri" ;
>>>>> text:uidField "uid" ;
>>>>> text:langField "lang" ;
>>>>> text:graphField "graph" ;
>>>>> text:map (
>>>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>>>> [ text:field "altLabel" ; text:predicate skos:altLabel ]
>>>>> ) .
>>>>>
>>>>> <#service> rdf:type fuseki:Service ;
>>>>> fuseki:name "/ds" ; #
>>>>> http://host:port/ds-ro
>>>>> fuseki:serviceQuery "query" ; # SPARQL query
>>>>> service
>>>>> fuseki:serviceQuery "sparql" ; # SPARQL query
>>>>> service
>>>>> fuseki:serviceUpdate "update" ; # SPARQL update
>>>>> service
>>>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>>>> upload service
>>>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL
>>>>> Graph store protocol (read and write)
>>>>> fuseki:dataset :text_dataset ;
>>>>> .
>>>>>
>>>
>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Andy Seaborne <an...@apache.org>.
https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
\\ not \ because it is inside a string.
And the filter needs to look for charcaters outside 0-255
select * where
{
?s ?p ?o filter( regex(str(?s), "[\\u0100-\\uFFFF]"))
}
Andy
On 30/09/2020 11:57, Mikael Pesonen wrote:
>
> Ah, thanks. Is it possible to find such URis with SPARQL query? SPARQL
> seems not to support \x -notation
>
> select * where
> {
> graph ?g {
> ?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
> }
> }
>
>
>
> On 30.9.2020 13:29, Andy Seaborne wrote:
>> In the data (probbaly in a URI) - it's reading the database.
>>
>> On 30/09/2020 10:36, Mikael Pesonen wrote:
>>>
>>> I couldn't find any non Ascii characters in the config file
>>> ([^\x00-\x7F]+)...
>>>
>>> On 30.9.2020 0:48, Andy Seaborne wrote:
>>>> Looks like
>>>>
>>>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>>>
>>>> Andy
>>>>
>>>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>>>
>>>>> Hi
>>>>>
>>>>> I'm building a new text index with following command and getting
>>>>> java error.
>>>>>
>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>> --desc=fuseki_config.ttl
>>>>>
>>>>> After the command I get 4 files in /home/text/tools/jena_text_index/
>>>>>
>>>>> _0.fdt
>>>>> _0.fdx
>>>>> segments_1
>>>>> write.lock
>>>>>
>>>>> Any idea what could case this?
>>>>>
>>>>>
>>>>> Error is:
>>>>>
>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>> range: 59
>>>>> at
>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>> at
>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>
>>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>> at
>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>
>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>>> --desc=fuseki_config.ttl
>>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>>> range: 59
>>>>> at
>>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>>> at
>>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>>
>>>>> at
>>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>>
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>>> at
>>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>>
>>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>>> at
>>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>>
>>>>> at jena.textindexer.exec(textindexer.java:130)
>>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>>> at jena.textindexer.main(textindexer.java:52)
>>>>>
>>>>>
>>>>> config:
>>>>>
>>>>> @prefix :<http://localhost/jena_example/#> .
>>>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>>>> @prefix text:<http://jena.apache.org/text#> .
>>>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>>>
>>>>> ## Example of a TDB dataset and text index
>>>>> ## Initialize TDB
>>>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>>>
>>>>> ## Initialize text query
>>>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>>>> # A TextDataset is a regular dataset with a text index.
>>>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>>>> # Lucene index
>>>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>>>
>>>>>
>>>>> ## ---------------------------------------------------------------
>>>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>>>> --desc=fuseki_config.ttl
>>>>>
>>>>> :text_dataset rdf:type text:TextDataset ;
>>>>> text:dataset :my_dataset ;
>>>>> text:index <#indexLucene> ;
>>>>> .
>>>>>
>>>>> # A TDB dataset used for RDF storage
>>>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>>>> tdb:location "/home/text/tools/jena_data/" ;
>>>>> # tdb:unionDefaultGraph true ; # Optional
>>>>> .
>>>>>
>>>>> # Text index description
>>>>> <#indexLucene> a text:TextIndexLucene ;
>>>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>>>> text:entityMap <#entMap> ;
>>>>> text:storeValues true ;
>>>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>>>> text:queryParser text:AnalyzingQueryParser ;
>>>>> text:multilingualSupport true ;
>>>>> .
>>>>>
>>>>> <#entMap> a text:EntityMap ;
>>>>> text:defaultField "vcard_fn" ;
>>>>> text:entityField "uri" ;
>>>>> text:uidField "uid" ;
>>>>> text:langField "lang" ;
>>>>> text:graphField "graph" ;
>>>>> text:map (
>>>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>>>> [ text:field "altLabel" ; text:predicate skos:altLabel ]
>>>>> ) .
>>>>>
>>>>> <#service> rdf:type fuseki:Service ;
>>>>> fuseki:name "/ds" ; #
>>>>> http://host:port/ds-ro
>>>>> fuseki:serviceQuery "query" ; # SPARQL query
>>>>> service
>>>>> fuseki:serviceQuery "sparql" ; # SPARQL query
>>>>> service
>>>>> fuseki:serviceUpdate "update" ; # SPARQL update
>>>>> service
>>>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>>>> upload service
>>>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL Graph
>>>>> store protocol (read and write)
>>>>> fuseki:dataset :text_dataset ;
>>>>> .
>>>>>
>>>
>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Mikael Pesonen <mi...@lingsoft.fi>.
Ah, thanks. Is it possible to find such URis with SPARQL query? SPARQL
seems not to support \x -notation
select * where
{
graph ?g {
?s ?p ?o filter(regex(str(?s), "[\x00-\x7F]"))
}
}
On 30.9.2020 13:29, Andy Seaborne wrote:
> In the data (probbaly in a URI) - it's reading the database.
>
> On 30/09/2020 10:36, Mikael Pesonen wrote:
>>
>> I couldn't find any non Ascii characters in the config file
>> ([^\x00-\x7F]+)...
>>
>> On 30.9.2020 0:48, Andy Seaborne wrote:
>>> Looks like
>>>
>>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>>
>>> Andy
>>>
>>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>>
>>>> Hi
>>>>
>>>> I'm building a new text index with following command and getting
>>>> java error.
>>>>
>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>> --desc=fuseki_config.ttl
>>>>
>>>> After the command I get 4 files in /home/text/tools/jena_text_index/
>>>>
>>>> _0.fdt
>>>> _0.fdx
>>>> segments_1
>>>> write.lock
>>>>
>>>> Any idea what could case this?
>>>>
>>>>
>>>> Error is:
>>>>
>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>> range: 59
>>>> at
>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>> at
>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>> at
>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>
>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>> at
>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>
>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>> at
>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>
>>>> at jena.textindexer.exec(textindexer.java:130)
>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>> at jena.textindexer.main(textindexer.java:52)
>>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>>> --desc=fuseki_config.ttl
>>>> java.lang.StringIndexOutOfBoundsException: String index out of
>>>> range: 59
>>>> at
>>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>>> at java.base/java.lang.String.charAt(String.java:711)
>>>> at
>>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>>> at
>>>> org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>>
>>>> at
>>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>>
>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>>> at
>>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>>
>>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>>> at
>>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>>>
>>>> at jena.textindexer.exec(textindexer.java:130)
>>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>>> at jena.textindexer.main(textindexer.java:52)
>>>>
>>>>
>>>> config:
>>>>
>>>> @prefix :<http://localhost/jena_example/#> .
>>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>>> @prefix text:<http://jena.apache.org/text#> .
>>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>>
>>>> ## Example of a TDB dataset and text index
>>>> ## Initialize TDB
>>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>>
>>>> ## Initialize text query
>>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>>> # A TextDataset is a regular dataset with a text index.
>>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>>> # Lucene index
>>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>>
>>>>
>>>> ## ---------------------------------------------------------------
>>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>>> --desc=fuseki_config.ttl
>>>>
>>>> :text_dataset rdf:type text:TextDataset ;
>>>> text:dataset :my_dataset ;
>>>> text:index <#indexLucene> ;
>>>> .
>>>>
>>>> # A TDB dataset used for RDF storage
>>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>>> tdb:location "/home/text/tools/jena_data/" ;
>>>> # tdb:unionDefaultGraph true ; # Optional
>>>> .
>>>>
>>>> # Text index description
>>>> <#indexLucene> a text:TextIndexLucene ;
>>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>>> text:entityMap <#entMap> ;
>>>> text:storeValues true ;
>>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>>> text:queryParser text:AnalyzingQueryParser ;
>>>> text:multilingualSupport true ;
>>>> .
>>>>
>>>> <#entMap> a text:EntityMap ;
>>>> text:defaultField "vcard_fn" ;
>>>> text:entityField "uri" ;
>>>> text:uidField "uid" ;
>>>> text:langField "lang" ;
>>>> text:graphField "graph" ;
>>>> text:map (
>>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>>> [ text:field "altLabel" ; text:predicate skos:altLabel ]
>>>> ) .
>>>>
>>>> <#service> rdf:type fuseki:Service ;
>>>> fuseki:name "/ds" ; #
>>>> http://host:port/ds-ro
>>>> fuseki:serviceQuery "query" ; # SPARQL query
>>>> service
>>>> fuseki:serviceQuery "sparql" ; # SPARQL query
>>>> service
>>>> fuseki:serviceUpdate "update" ; # SPARQL update
>>>> service
>>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>>> upload service
>>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL Graph
>>>> store protocol (read and write)
>>>> fuseki:dataset :text_dataset ;
>>>> .
>>>>
>>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Andy Seaborne <an...@apache.org>.
In the data (probbaly in a URI) - it's reading the database.
On 30/09/2020 10:36, Mikael Pesonen wrote:
>
> I couldn't find any non Ascii characters in the config file
> ([^\x00-\x7F]+)...
>
> On 30.9.2020 0:48, Andy Seaborne wrote:
>> Looks like
>>
>> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>>
>> Andy
>>
>> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>>
>>> Hi
>>>
>>> I'm building a new text index with following command and getting java
>>> error.
>>>
>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>> --desc=fuseki_config.ttl
>>>
>>> After the command I get 4 files in /home/text/tools/jena_text_index/
>>>
>>> _0.fdt
>>> _0.fdx
>>> segments_1
>>> write.lock
>>>
>>> Any idea what could case this?
>>>
>>>
>>> Error is:
>>>
>>> java.lang.StringIndexOutOfBoundsException: String index out of range: 59
>>> at
>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>> at java.base/java.lang.String.charAt(String.java:711)
>>> at
>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>> at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>
>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>> at
>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>
>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>> at
>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>> at jena.textindexer.exec(textindexer.java:130)
>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>> at jena.textindexer.main(textindexer.java:52)
>>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>>> --desc=fuseki_config.ttl
>>> java.lang.StringIndexOutOfBoundsException: String index out of range: 59
>>> at
>>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>>> at java.base/java.lang.String.charAt(String.java:711)
>>> at
>>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>>> at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>>
>>> at
>>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>>
>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>>> at
>>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>>>
>>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>>> at
>>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>>> at jena.textindexer.exec(textindexer.java:130)
>>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>>> at jena.textindexer.main(textindexer.java:52)
>>>
>>>
>>> config:
>>>
>>> @prefix :<http://localhost/jena_example/#> .
>>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>>> @prefix text:<http://jena.apache.org/text#> .
>>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>>
>>> ## Example of a TDB dataset and text index
>>> ## Initialize TDB
>>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>>
>>> ## Initialize text query
>>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>>> # A TextDataset is a regular dataset with a text index.
>>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>>> # Lucene index
>>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>>
>>>
>>> ## ---------------------------------------------------------------
>>> # build: java -cp ./fuseki-server.jar jena.textindexer
>>> --desc=fuseki_config.ttl
>>>
>>> :text_dataset rdf:type text:TextDataset ;
>>> text:dataset :my_dataset ;
>>> text:index <#indexLucene> ;
>>> .
>>>
>>> # A TDB dataset used for RDF storage
>>> :my_dataset rdf:type tdb:DatasetTDB ;
>>> tdb:location "/home/text/tools/jena_data/" ;
>>> # tdb:unionDefaultGraph true ; # Optional
>>> .
>>>
>>> # Text index description
>>> <#indexLucene> a text:TextIndexLucene ;
>>> text:directory <file:/home/text/tools/jena_text_index/> ;
>>> text:entityMap <#entMap> ;
>>> text:storeValues true ;
>>> text:analyzer [ a text:StandardAnalyzer ] ;
>>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>>> text:queryParser text:AnalyzingQueryParser ;
>>> text:multilingualSupport true ;
>>> .
>>>
>>> <#entMap> a text:EntityMap ;
>>> text:defaultField "vcard_fn" ;
>>> text:entityField "uri" ;
>>> text:uidField "uid" ;
>>> text:langField "lang" ;
>>> text:graphField "graph" ;
>>> text:map (
>>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>>> [ text:field "altLabel" ; text:predicate skos:altLabel ]
>>> ) .
>>>
>>> <#service> rdf:type fuseki:Service ;
>>> fuseki:name "/ds" ; # http://host:port/ds-ro
>>> fuseki:serviceQuery "query" ; # SPARQL query
>>> service
>>> fuseki:serviceQuery "sparql" ; # SPARQL query
>>> service
>>> fuseki:serviceUpdate "update" ; # SPARQL update
>>> service
>>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>>> upload service
>>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL Graph
>>> store protocol (read and write)
>>> fuseki:dataset :text_dataset ;
>>> .
>>>
>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Mikael Pesonen <mi...@lingsoft.fi>.
I couldn't find any non Ascii characters in the config file
([^\x00-\x7F]+)...
On 30.9.2020 0:48, Andy Seaborne wrote:
> Looks like
>
> https://issues.apache.org/jira/browse/JENA-1892 , 1890
>
> Andy
>
> On 29/09/2020 15:13, Mikael Pesonen wrote:
>>
>> Hi
>>
>> I'm building a new text index with following command and getting java
>> error.
>>
>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>> --desc=fuseki_config.ttl
>>
>> After the command I get 4 files in /home/text/tools/jena_text_index/
>>
>> _0.fdt
>> _0.fdx
>> segments_1
>> write.lock
>>
>> Any idea what could case this?
>>
>>
>> Error is:
>>
>> java.lang.StringIndexOutOfBoundsException: String index out of range: 59
>> at
>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>> at java.base/java.lang.String.charAt(String.java:711)
>> at
>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>> at
>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>> at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>
>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>> at
>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>> at
>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>> at jena.textindexer.exec(textindexer.java:130)
>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>> at jena.textindexer.main(textindexer.java:52)
>> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
>> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
>> --desc=fuseki_config.ttl
>> java.lang.StringIndexOutOfBoundsException: String index out of range: 59
>> at
>> java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
>> at java.base/java.lang.String.charAt(String.java:711)
>> at
>> org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
>> at
>> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
>> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
>> at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>>
>> at
>> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>>
>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
>> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
>> at
>> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
>> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
>> at
>> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
>> at jena.textindexer.exec(textindexer.java:130)
>> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
>> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
>> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
>> at jena.textindexer.main(textindexer.java:52)
>>
>>
>> config:
>>
>> @prefix :<http://localhost/jena_example/#> .
>> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
>> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
>> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
>> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
>> @prefix text:<http://jena.apache.org/text#> .
>> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
>> @prefix fuseki:<http://jena.apache.org/fuseki#> .
>> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>>
>> ## Example of a TDB dataset and text index
>> ## Initialize TDB
>> [] ja:loadClass "org.apache.jena.tdb.TDB" .
>> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
>> tdb:GraphTDB rdfs:subClassOf ja:Model .
>>
>> ## Initialize text query
>> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
>> # A TextDataset is a regular dataset with a text index.
>> text:TextDataset rdfs:subClassOf ja:RDFDataset .
>> # Lucene index
>> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>>
>>
>> ## ---------------------------------------------------------------
>> # build: java -cp ./fuseki-server.jar jena.textindexer
>> --desc=fuseki_config.ttl
>>
>> :text_dataset rdf:type text:TextDataset ;
>> text:dataset :my_dataset ;
>> text:index <#indexLucene> ;
>> .
>>
>> # A TDB dataset used for RDF storage
>> :my_dataset rdf:type tdb:DatasetTDB ;
>> tdb:location "/home/text/tools/jena_data/" ;
>> # tdb:unionDefaultGraph true ; # Optional
>> .
>>
>> # Text index description
>> <#indexLucene> a text:TextIndexLucene ;
>> text:directory <file:/home/text/tools/jena_text_index/> ;
>> text:entityMap <#entMap> ;
>> text:storeValues true ;
>> text:analyzer [ a text:StandardAnalyzer ] ;
>> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
>> text:queryParser text:AnalyzingQueryParser ;
>> text:multilingualSupport true ;
>> .
>>
>> <#entMap> a text:EntityMap ;
>> text:defaultField "vcard_fn" ;
>> text:entityField "uri" ;
>> text:uidField "uid" ;
>> text:langField "lang" ;
>> text:graphField "graph" ;
>> text:map (
>> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
>> [ text:field "altLabel" ; text:predicate skos:altLabel ]
>> ) .
>>
>> <#service> rdf:type fuseki:Service ;
>> fuseki:name "/ds" ; # http://host:port/ds-ro
>> fuseki:serviceQuery "query" ; # SPARQL query
>> service
>> fuseki:serviceQuery "sparql" ; # SPARQL query
>> service
>> fuseki:serviceUpdate "update" ; # SPARQL update
>> service
>> fuseki:serviceUpload "upload" ; # Non-SPARQL
>> upload service
>> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL Graph
>> store protocol (read and write)
>> fuseki:dataset :text_dataset ;
>> .
>>
Re: java.lang.StringIndexOutOfBoundsException with Jena Text
Posted by Andy Seaborne <an...@apache.org>.
Looks like
https://issues.apache.org/jira/browse/JENA-1892 , 1890
Andy
On 29/09/2020 15:13, Mikael Pesonen wrote:
>
> Hi
>
> I'm building a new text index with following command and getting java
> error.
>
> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
> --desc=fuseki_config.ttl
>
> After the command I get 4 files in /home/text/tools/jena_text_index/
>
> _0.fdt
> _0.fdx
> segments_1
> write.lock
>
> Any idea what could case this?
>
>
> Error is:
>
> java.lang.StringIndexOutOfBoundsException: String index out of range: 59
> at java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
> at java.base/java.lang.String.charAt(String.java:711)
> at org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
> at
> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
> at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
> at
> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>
> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
> at
> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
> at
> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
> at jena.textindexer.exec(textindexer.java:130)
> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
> at jena.textindexer.main(textindexer.java:52)
> mikael@insight-dev:/home/text/tools/apache-jena-fuseki-3.14.0$
> /usr/bin/java -cp ./fuseki-server.jar jena.textindexer
> --desc=fuseki_config.ttl
> java.lang.StringIndexOutOfBoundsException: String index out of range: 59
> at java.base/java.lang.StringLatin1.charAt(StringLatin1.java:48)
> at java.base/java.lang.String.charAt(String.java:711)
> at org.apache.jena.atlas.lib.StrUtils.decodeHex(StrUtils.java:212)
> at
> org.apache.jena.tdb.store.nodetable.NodecSSE.decode(NodecSSE.java:121)
> at org.apache.jena.tdb.lib.NodeLib.decode(NodeLib.java:120)
> at org.apache.jena.tdb.lib.NodeLib.fetchDecode(NodeLib.java:97)
> at
> org.apache.jena.tdb.store.nodetable.NodeTableNative.readNodeFromTable(NodeTableNative.java:182)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableNative._retrieveNodeByNodeId(NodeTableNative.java:108)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableNative.getNodeForNodeId(NodeTableNative.java:67)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableCache._retrieveNodeByNodeId(NodeTableCache.java:128)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableCache.getNodeForNodeId(NodeTableCache.java:82)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableWrapper.getNodeForNodeId(NodeTableWrapper.java:50)
>
> at
> org.apache.jena.tdb.store.nodetable.NodeTableInline.getNodeForNodeId(NodeTableInline.java:67)
>
> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:126)
> at org.apache.jena.tdb.lib.TupleLib.quad(TupleLib.java:120)
> at
> org.apache.jena.tdb.lib.TupleLib.lambda$convertToQuads$3(TupleLib.java:59)
> at org.apache.jena.atlas.iterator.Iter$2.next(Iter.java:352)
> at
> org.apache.jena.atlas.iterator.IteratorCons.next(IteratorCons.java:104)
> at jena.textindexer.exec(textindexer.java:130)
> at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
> at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
> at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
> at jena.textindexer.main(textindexer.java:52)
>
>
> config:
>
> @prefix :<http://localhost/jena_example/#> .
> @prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
> @prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#> .
> @prefix tdb:<http://jena.hpl.hp.com/2008/tdb#> .
> @prefix ja:<http://jena.hpl.hp.com/2005/11/Assembler#> .
> @prefix text:<http://jena.apache.org/text#> .
> @prefix skos:<http://www.w3.org/2004/02/skos/core#> .
> @prefix fuseki:<http://jena.apache.org/fuseki#> .
> @prefix vcard:<http://www.w3.org/2006/vcard/ns#> .
>
> ## Example of a TDB dataset and text index
> ## Initialize TDB
> [] ja:loadClass "org.apache.jena.tdb.TDB" .
> tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
> tdb:GraphTDB rdfs:subClassOf ja:Model .
>
> ## Initialize text query
> [] ja:loadClass "org.apache.jena.query.text.TextQuery" .
> # A TextDataset is a regular dataset with a text index.
> text:TextDataset rdfs:subClassOf ja:RDFDataset .
> # Lucene index
> text:TextIndexLucene rdfs:subClassOf text:TextIndex .
>
>
> ## ---------------------------------------------------------------
> # build: java -cp ./fuseki-server.jar jena.textindexer
> --desc=fuseki_config.ttl
>
> :text_dataset rdf:type text:TextDataset ;
> text:dataset :my_dataset ;
> text:index <#indexLucene> ;
> .
>
> # A TDB dataset used for RDF storage
> :my_dataset rdf:type tdb:DatasetTDB ;
> tdb:location "/home/text/tools/jena_data/" ;
> # tdb:unionDefaultGraph true ; # Optional
> .
>
> # Text index description
> <#indexLucene> a text:TextIndexLucene ;
> text:directory <file:/home/text/tools/jena_text_index/> ;
> text:entityMap <#entMap> ;
> text:storeValues true ;
> text:analyzer [ a text:StandardAnalyzer ] ;
> text:queryAnalyzer [ a text:KeywordAnalyzer ] ;
> text:queryParser text:AnalyzingQueryParser ;
> text:multilingualSupport true ;
> .
>
> <#entMap> a text:EntityMap ;
> text:defaultField "vcard_fn" ;
> text:entityField "uri" ;
> text:uidField "uid" ;
> text:langField "lang" ;
> text:graphField "graph" ;
> text:map (
> [ text:field "vcard_fn" ; text:predicate vcard:fn ]
> [ text:field "altLabel" ; text:predicate skos:altLabel ]
> ) .
>
> <#service> rdf:type fuseki:Service ;
> fuseki:name "/ds" ; # http://host:port/ds-ro
> fuseki:serviceQuery "query" ; # SPARQL query service
> fuseki:serviceQuery "sparql" ; # SPARQL query service
> fuseki:serviceUpdate "update" ; # SPARQL update service
> fuseki:serviceUpload "upload" ; # Non-SPARQL upload
> service
> fuseki:serviceReadWriteGraphStore "data" ; # SPARQL Graph
> store protocol (read and write)
> fuseki:dataset :text_dataset ;
> .
>