You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tika.apache.org by "Dan Coldrick (Jira)" <ji...@apache.org> on 2022/04/27 20:10:00 UTC

[jira] [Comment Edited] (TIKA-3742) Advice around DGN7 parser and whether to add to TIKA

    [ https://issues.apache.org/jira/browse/TIKA-3742?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17529023#comment-17529023 ] 

Dan Coldrick edited comment on TIKA-3742 at 4/27/22 8:09 PM:
-------------------------------------------------------------

 
{code:java}
package org.apache.tika.parser.dgn;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Set;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class DGN7Parser extends AbstractParser {    

private static final long serialVersionUID = 7609445358323296566L;    

Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.image("vnd.dgn; version=7"));    

@Override
    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }    @Override
    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
            throws IOException, TikaException, SAXException {
        File file = new File("G:/temp/Drawing.dgn");
        try (OutputStream outputStream = new FileOutputStream(file)) {
            IOUtils.copy(stream, outputStream);
        }
        Runtime rt = Runtime.getRuntime();
        String[] commands = {"C:\\Users\\monkm\\DGN\\dgndump.exe","-r","10000", "G:\\temp\\Drawing.dgn"};
        Process proc = rt.exec(commands);        

        BufferedReader stdInput = new BufferedReader(new 
             InputStreamReader(proc.getInputStream()));        
        BufferedReader stdError = new BufferedReader(new 
             InputStreamReader(proc.getErrorStream()));
        
        ArrayList<String> ar = new ArrayList<String>();

        String s = null;
        while ((s = stdInput.readLine()) != null) {
            if(s.startsWith("  string = \"")) {
                ar.add(s.substring(12, s.length()-1).trim());
            }
            System.out.println(s);
        }
            System.out.println(ar);
        while ((s = stdError.readLine()) != null) {
            System.out.println(s);
        }
    }}
  {code}
 

 


was (Author: monkmachine):
 
{code:java}
package org.apache.tika.parser.dgn;import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Set;import org.apache.commons.compress.utils.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;public 

class DGN7Parser extends AbstractParser {    

private static final long serialVersionUID = 7609445358323296566L;    

Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.image("vnd.dgn; version=7"));    

@Override
    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }    @Override
    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
            throws IOException, TikaException, SAXException {
        File file = new File("G:/temp/Drawing.dgn");
        try (OutputStream outputStream = new FileOutputStream(file)) {
            IOUtils.copy(stream, outputStream);
        }
        Runtime rt = Runtime.getRuntime();
        String[] commands = {"C:\\Users\\monkm\\DGN\\dgndump.exe","-r","10000", "G:\\temp\\Drawing.dgn"};
        Process proc = rt.exec(commands);        

        BufferedReader stdInput = new BufferedReader(new 
             InputStreamReader(proc.getInputStream()));        
        BufferedReader stdError = new BufferedReader(new 
             InputStreamReader(proc.getErrorStream()));
        
        ArrayList<String> ar = new ArrayList<String>();

        String s = null;
        while ((s = stdInput.readLine()) != null) {
            if(s.startsWith("  string = \"")) {
                ar.add(s.substring(12, s.length()-1).trim());
            }
            System.out.println(s);
        }
            System.out.println(ar);
        while ((s = stdError.readLine()) != null) {
            System.out.println(s);
        }
    }}
  {code}
 

 

> Advice around DGN7 parser and whether to add to TIKA
> ----------------------------------------------------
>
>                 Key: TIKA-3742
>                 URL: https://issues.apache.org/jira/browse/TIKA-3742
>             Project: Tika
>          Issue Type: Task
>          Components: parser
>            Reporter: Dan Coldrick
>            Priority: Minor
>         Attachments: DGN.zip, ExampleOutput.txt
>
>
> Hi [~tallison] & Whoever else. 
> I managed to compile the C/C++ library [http://dgnlib.maptools.org/]  for DGN7 which produces an dgndump.exe which will dump all the data from the DGN. From my initial testing it looks pretty good. 
> Would you guys think it was worth adding this or just keep it as a custom parser rather than in the main source code? It's under MIT license. I've attached the exe (zipped), a copy of the output from the dump and my very dirty testing calling the exe (my code I was only interested in the Strings so am only pulling those into a string array at the moment to check it's pulling out the correct data).



--
This message was sent by Atlassian Jira
(v8.20.7#820007)