You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Günter Kukies <gu...@heuft.com> on 2003/02/21 09:17:14 UTC

Why is document.get("contents")); null ?

Hello,

Why is document.get("contents"); null ?

Thanks,

Günter


private static void addContent(PortalServlet servlet, Document document, InputStream is, String documentLocation ) throws IOException {
        try {
            
            PDFParser parser = new PDFParser( is );
            parser.parse();
            
            COSDocument pdfDocument = parser.getDocument();
            
            if( pdfDocument.isEncrypted() ) {
                DecryptDocument decryptor = new DecryptDocument( pdfDocument );
                /*Just try using the default password and move on */
                decryptor.decryptDocument( "" );
            }
            
            /*create a tmp output stream with the size of the content.*/
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.writeText( pdfDocument, new OutputStreamWriter( out ) );
            
            byte[] contents = out.toByteArray();
            InputStreamReader input = new InputStreamReader( new ByteArrayInputStream( contents ) );
            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            document.add(Field.Text("contents", input ));
            servlet.log("documentstripper: "+stripper.getText(pdfDocument));
            servlet.log("documentLocation: "+documentLocation);
            servlet.log("contents: "+input+" doc: "+document.get("contents"));
            servlet.log("document: "+document);
 
        }
        catch( CryptographyException e ) {
            throw new IOException( "Error decrypting document(" + documentLocation + "): " + e );
        }
        catch( InvalidPasswordException e ) {
            throw new IOException( "Error: The document(" + documentLocation + ") is encrypted and will not be indexed." );
        }
        finally {
            if( is != null ) {
                is.close();
            }
        }
    }