You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@jena.apache.org by Laura Morales <la...@mail.com> on 2017/04/08 23:02:59 UTC

Bad character in IRI (space)

This is the head of my XML/RDF file "data.rdf"
 

<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:cc="http://web.resource.org/cc/"
   xmlns:dcam="http://purl.org/dc/dcam/"
   xmlns:dcterms="http://purl.org/dc/terms/"
   xmlns:marcrel="http://www.loc.gov/loc.terms/relators/"
   xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xml:base="http://www.gutenberg.org/">
  <cc:Work rdf:about="feeds/catalog.rdf">
    <cc:license rdf:resource="http://www.gnu.org/licenses/gpl.html"/>
  </cc:Work>
  <pgterms:ebook rdf:about="ebooks/0">
    <dcterms:issued>None</dcterms:issued>
    <dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
    <dcterms:license rdf:resource="license"/>
    <dcterms:publisher>Project Gutenberg</dcterms:publisher>
    <dcterms:rights>Public domain in the USA.</dcterms:rights>
    <dcterms:type>
      <rdf:Description>
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
        <rdf:value>Text</rdf:value>
      </rdf:Description>
    </dcterms:type>
  </pgterms:ebook>
</rdf:RDF>
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xml:base="http://www.gutenberg.org/"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/"
  xmlns:dcterms="http://purl.org/dc/terms/"
  xmlns:dcam="http://purl.org/dc/dcam/"
  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
  xmlns:cc="http://web.resource.org/cc/"
>
  <cc:Work rdf:about="">
    <rdfs:comment>Archives containing the RDF files for *all* our books can be downloaded at
            http://www.gutenberg.org/wiki/Gutenberg:Feeds#The_Complete_Project_Gutenberg_Catalog</rdfs:comment>
    <cc:license rdf:resource="https://creativecommons.org/publicdomain/zero/1.0/"/>
  </cc:Work>
  <pgterms:ebook rdf:about="ebooks/10000">
    <pgterms:bookshelf>
      <rdf:Description rdf:nodeID="N707cb91bad8c4547ab2a4fc44d5a3aae">
        <dcam:memberOf rdf:resource="2009/pgterms/Bookshelf"/>
        <rdf:value>Best Books Ever Listings</rdf:value>
      </rdf:Description>
    </pgterms:bookshelf>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/10000.epub.images">
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">34727</dcterms:extent>
         ........
 

But for whatever reason it seems invalid....
 

$ ./riot --check data.rdf
00:56:18 ERROR riot                 :: [line: 27, col: 6 ] The processing instruction target matching "[xX][mM][lL]" is not allowed.
 

$ ./tdbloader --loc=... data.rdf
00:57:51 INFO  loader               :: -- Start triples data phase
00:57:51 INFO  loader               :: ** Load empty triples table
00:57:51 INFO  loader               :: -- Start quads data phase
00:57:51 INFO  loader               :: ** Load empty quads table
00:57:51 INFO  loader               :: Load: data.rdf -- 2017/04/09 00:57:51 CEST
00:57:51 ERROR riot                 :: [line: 27, col: 6 ] The processing instruction target matching "[xX][mM][lL]" is not allowed.
org.apache.jena.riot.RiotException: [line: 27, col: 6 ] The processing instruction target matching "[xX][mM][lL]" is not allowed.
    at org.apache.jena.riot.system.ErrorHandlerFactory$ErrorHandlerStd.fatal(ErrorHandlerFactory.java:136)
    at org.apache.jena.riot.lang.LangRDFXML$ErrorHandlerBridge.fatalError(LangRDFXML.java:238)
    at org.apache.jena.rdfxml.xmlinput.impl.ARPSaxErrorHandler.fatalError(ARPSaxErrorHandler.java:47)
    at org.apache.jena.rdfxml.xmlinput.impl.XMLHandler.warning(XMLHandler.java:199)
    at org.apache.jena.rdfxml.xmlinput.impl.XMLHandler.fatalError(XMLHandler.java:229)
    at org.apache.xerces.util.ErrorHandlerWrapper.fatalError(Unknown Source)
    at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
    at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
    at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
    at org.apache.xerces.impl.XMLScanner.reportFatalError(Unknown Source)
    at org.apache.xerces.impl.XMLScanner.scanPIData(Unknown Source)
    at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanPIData(Unknown Source)
    at org.apache.xerces.impl.XMLScanner.scanPI(Unknown Source)
    at org.apache.xerces.impl.XMLDocumentScannerImpl$TrailingMiscDispatcher.dispatch(Unknown Source)
    at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source)
    at org.apache.xerces.parsers.DTDConfiguration.parse(Unknown Source)
    at org.apache.xerces.parsers.DTDConfiguration.parse(Unknown Source)
    at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
    at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source)
    at org.apache.jena.rdfxml.xmlinput.impl.RDFXMLParser.parse(RDFXMLParser.java:150)
    at org.apache.jena.rdfxml.xmlinput.ARP.load(ARP.java:118)
    at org.apache.jena.riot.lang.LangRDFXML.parse(LangRDFXML.java:134)
    at org.apache.jena.riot.RDFParserRegistry$ReaderRIOTLang.read(RDFParserRegistry.java:179)
    at org.apache.jena.riot.RDFDataMgr.process(RDFDataMgr.java:859)
    at org.apache.jena.riot.RDFDataMgr.parse(RDFDataMgr.java:667)
    at org.apache.jena.riot.RDFDataMgr.parse(RDFDataMgr.java:637)
    at org.apache.jena.riot.RDFDataMgr.parse(RDFDataMgr.java:626)
    at org.apache.jena.tdb.store.bulkloader.BulkLoader.loadQuads$(BulkLoader.java:143)
    at org.apache.jena.tdb.store.bulkloader.BulkLoader.loadDataset(BulkLoader.java:109)
    at org.apache.jena.tdb.TDBLoader.loadDataset$(TDBLoader.java:261)
    at org.apache.jena.tdb.TDBLoader.loadDataset(TDBLoader.java:193)
    at org.apache.jena.tdb.TDBLoader.load(TDBLoader.java:74)
    at tdb.tdbloader.loadQuads(tdbloader.java:133)
    at tdb.tdbloader.exec(tdbloader.java:101)
    at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
    at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
    at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
    at tdb.tdbloader.main(tdbloader.java:48)
 
 
the RDF file seems in order, I don't understand what's going on.... what am I doing wrong?

Fw: Bad character in IRI (space)

Posted by Laura Morales <la...@mail.com>.
Nevermind, please excuse me for this email. I was stupidly concatenating multiple XML files and parsing the whole bunch as a single file, which clearly can't work.
 
 
---------------------------------------------------------
Sent: Sunday, April 09, 2017 at 1:02 AM
From: "Laura Morales" <la...@mail.com>
To: jena-users-ml <us...@jena.apache.org>
Subject: Bad character in IRI (space)
This is the head of my XML/RDF file "data.rdf"
 

<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
   xmlns:cc="http://web.resource.org/cc/"
   xmlns:dcam="http://purl.org/dc/dcam/"
   xmlns:dcterms="http://purl.org/dc/terms/"
   xmlns:marcrel="http://www.loc.gov/loc.terms/relators/"
   xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xml:base="http://www.gutenberg.org/">
  <cc:Work rdf:about="feeds/catalog.rdf">
    <cc:license rdf:resource="http://www.gnu.org/licenses/gpl.html"/>
  </cc:Work>
  <pgterms:ebook rdf:about="ebooks/0">
    <dcterms:issued>None</dcterms:issued>
    <dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
    <dcterms:license rdf:resource="license"/>
    <dcterms:publisher>Project Gutenberg</dcterms:publisher>
    <dcterms:rights>Public domain in the USA.</dcterms:rights>
    <dcterms:type>
      <rdf:Description>
        <dcam:memberOf rdf:resource="http://purl.org/dc/terms/DCMIType"/>
        <rdf:value>Text</rdf:value>
      </rdf:Description>
    </dcterms:type>
  </pgterms:ebook>
</rdf:RDF>
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xml:base="http://www.gutenberg.org/"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/"
  xmlns:dcterms="http://purl.org/dc/terms/"
  xmlns:dcam="http://purl.org/dc/dcam/"
  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
  xmlns:cc="http://web.resource.org/cc/"
>
  <cc:Work rdf:about="">
    <rdfs:comment>Archives containing the RDF files for *all* our books can be downloaded at
            http://www.gutenberg.org/wiki/Gutenberg:Feeds#The_Complete_Project_Gutenberg_Catalog</rdfs:comment>
    <cc:license rdf:resource="https://creativecommons.org/publicdomain/zero/1.0/"/>
  </cc:Work>
  <pgterms:ebook rdf:about="ebooks/10000">
    <pgterms:bookshelf>
      <rdf:Description rdf:nodeID="N707cb91bad8c4547ab2a4fc44d5a3aae">
        <dcam:memberOf rdf:resource="2009/pgterms/Bookshelf"/>
        <rdf:value>Best Books Ever Listings</rdf:value>
      </rdf:Description>
    </pgterms:bookshelf>
    <dcterms:hasFormat>
      <pgterms:file rdf:about="http://www.gutenberg.org/ebooks/10000.epub.images">
        <dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">34727</dcterms:extent>
         ........
 

But for whatever reason it seems invalid....
 

$ ./riot --check data.rdf
00:56:18 ERROR riot                 :: [line: 27, col: 6 ] The processing instruction target matching "[xX][mM][lL]" is not allowed.
 

$ ./tdbloader --loc=... data.rdf
00:57:51 INFO  loader               :: -- Start triples data phase
00:57:51 INFO  loader               :: ** Load empty triples table
00:57:51 INFO  loader               :: -- Start quads data phase
00:57:51 INFO  loader               :: ** Load empty quads table
00:57:51 INFO  loader               :: Load: data.rdf -- 2017/04/09 00:57:51 CEST
00:57:51 ERROR riot                 :: [line: 27, col: 6 ] The processing instruction target matching "[xX][mM][lL]" is not allowed.
org.apache.jena.riot.RiotException: [line: 27, col: 6 ] The processing instruction target matching "[xX][mM][lL]" is not allowed.
    at org.apache.jena.riot.system.ErrorHandlerFactory$ErrorHandlerStd.fatal(ErrorHandlerFactory.java:136)
    at org.apache.jena.riot.lang.LangRDFXML$ErrorHandlerBridge.fatalError(LangRDFXML.java:238)
    at org.apache.jena.rdfxml.xmlinput.impl.ARPSaxErrorHandler.fatalError(ARPSaxErrorHandler.java:47)
    at org.apache.jena.rdfxml.xmlinput.impl.XMLHandler.warning(XMLHandler.java:199)
    at org.apache.jena.rdfxml.xmlinput.impl.XMLHandler.fatalError(XMLHandler.java:229)
    at org.apache.xerces.util.ErrorHandlerWrapper.fatalError(Unknown Source)
    at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
    at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
    at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
    at org.apache.xerces.impl.XMLScanner.reportFatalError(Unknown Source)
    at org.apache.xerces.impl.XMLScanner.scanPIData(Unknown Source)
    at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanPIData(Unknown Source)
    at org.apache.xerces.impl.XMLScanner.scanPI(Unknown Source)
    at org.apache.xerces.impl.XMLDocumentScannerImpl$TrailingMiscDispatcher.dispatch(Unknown Source)
    at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source)
    at org.apache.xerces.parsers.DTDConfiguration.parse(Unknown Source)
    at org.apache.xerces.parsers.DTDConfiguration.parse(Unknown Source)
    at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
    at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source)
    at org.apache.jena.rdfxml.xmlinput.impl.RDFXMLParser.parse(RDFXMLParser.java:150)
    at org.apache.jena.rdfxml.xmlinput.ARP.load(ARP.java:118)
    at org.apache.jena.riot.lang.LangRDFXML.parse(LangRDFXML.java:134)
    at org.apache.jena.riot.RDFParserRegistry$ReaderRIOTLang.read(RDFParserRegistry.java:179)
    at org.apache.jena.riot.RDFDataMgr.process(RDFDataMgr.java:859)
    at org.apache.jena.riot.RDFDataMgr.parse(RDFDataMgr.java:667)
    at org.apache.jena.riot.RDFDataMgr.parse(RDFDataMgr.java:637)
    at org.apache.jena.riot.RDFDataMgr.parse(RDFDataMgr.java:626)
    at org.apache.jena.tdb.store.bulkloader.BulkLoader.loadQuads$(BulkLoader.java:143)
    at org.apache.jena.tdb.store.bulkloader.BulkLoader.loadDataset(BulkLoader.java:109)
    at org.apache.jena.tdb.TDBLoader.loadDataset$(TDBLoader.java:261)
    at org.apache.jena.tdb.TDBLoader.loadDataset(TDBLoader.java:193)
    at org.apache.jena.tdb.TDBLoader.load(TDBLoader.java:74)
    at tdb.tdbloader.loadQuads(tdbloader.java:133)
    at tdb.tdbloader.exec(tdbloader.java:101)
    at jena.cmd.CmdMain.mainMethod(CmdMain.java:93)
    at jena.cmd.CmdMain.mainRun(CmdMain.java:58)
    at jena.cmd.CmdMain.mainRun(CmdMain.java:45)
    at tdb.tdbloader.main(tdbloader.java:48)
 
 
the RDF file seems in order, I don't understand what's going on.... what am I doing wrong?