You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:42 UTC

[36/39] tika git commit: Convert new lines from windows to unix

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/module/advanced/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/module/advanced/internal/Activator.java b/tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/module/advanced/internal/Activator.java
index cc22347..1695859 100644
--- a/tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/module/advanced/internal/Activator.java
+++ b/tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/module/advanced/internal/Activator.java
@@ -1,36 +1,36 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.module.advanced.internal;
-
-import org.apache.tika.osgi.TikaAbstractBundleActivator;
-import org.osgi.framework.BundleContext;
-
-public class Activator extends TikaAbstractBundleActivator {
-
-    @Override
-    public void start(BundleContext context) throws Exception {
-
-        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
-
-    }
-
-    @Override
-    public void stop(BundleContext context) throws Exception {
-
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module.advanced.internal;
+
+import org.apache.tika.osgi.TikaAbstractBundleActivator;
+import org.osgi.framework.BundleContext;
+
+public class Activator extends TikaAbstractBundleActivator {
+
+    @Override
+    public void start(BundleContext context) throws Exception {
+
+        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
+
+    }
+
+    @Override
+    public void stop(BundleContext context) throws Exception {
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-cad-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/pom.xml b/tika-parser-modules/tika-parser-cad-module/pom.xml
index 6e7efb6..a9f8f31 100644
--- a/tika-parser-modules/tika-parser-cad-module/pom.xml
+++ b/tika-parser-modules/tika-parser-cad-module/pom.xml
@@ -1,56 +1,56 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
-  license agreements. See the NOTICE file distributed with this work for additional 
-  information regarding copyright ownership. The ASF licenses this file to 
-  you under the Apache License, Version 2.0 (the "License"); you may not use 
-  this file except in compliance with the License. You may obtain a copy of 
-  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
-  by applicable law or agreed to in writing, software distributed under the 
-  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
-  OF ANY KIND, either express or implied. See the License for the specific 
-  language governing permissions and limitations under the License. -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.tika</groupId>
-    <artifactId>tika-parser-modules</artifactId>
-    <version>2.0-SNAPSHOT</version>
-  </parent>
-
-  <artifactId>tika-parser-cad-module</artifactId>
-  <name>Apache Tika parser CAD module</name>
-  <url>http://tika.apache.org/</url>
-  
-  <dependencies>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    
-    <dependency>
-      <groupId>commons-lang</groupId>
-      <artifactId>commons-lang</artifactId>
-      <version>2.6</version>
-    </dependency>
-
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-text-module</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-  
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  you under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-parser-modules</artifactId>
+    <version>2.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>tika-parser-cad-module</artifactId>
+  <name>Apache Tika parser CAD module</name>
+  <url>http://tika.apache.org/</url>
+  
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+      <version>2.6</version>
+    </dependency>
+
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-text-module</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
 </project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/module/cad/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/module/cad/internal/Activator.java b/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/module/cad/internal/Activator.java
index 4a23b73..29a099c 100644
--- a/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/module/cad/internal/Activator.java
+++ b/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/module/cad/internal/Activator.java
@@ -1,36 +1,36 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.module.cad.internal;
-
-import org.apache.tika.osgi.TikaAbstractBundleActivator;
-import org.osgi.framework.BundleContext;
-
-public class Activator extends TikaAbstractBundleActivator {
-
-    @Override
-    public void start(BundleContext context) throws Exception {
-
-        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
-
-    }
-
-    @Override
-    public void stop(BundleContext context) throws Exception {
-
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module.cad.internal;
+
+import org.apache.tika.osgi.TikaAbstractBundleActivator;
+import org.osgi.framework.BundleContext;
+
+public class Activator extends TikaAbstractBundleActivator {
+
+    @Override
+    public void start(BundleContext context) throws Exception {
+
+        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
+
+    }
+
+    @Override
+    public void stop(BundleContext context) throws Exception {
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGParser.java b/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
index 3f29c1f..875c4ee 100644
--- a/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
+++ b/tika-parser-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
@@ -1,356 +1,356 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.dwg;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Set;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.io.StringUtil;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.EndianUtils;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Property;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * DWG (CAD Drawing) parser. This is a very basic parser, which just
- *  looks for bits of the headers.
- * Note that we use Apache POI for various parts of the processing, as
- *  lots of the low level string/int/short concepts are the same.
- */
-public class DWGParser extends AbstractParser {
-
-    /** Serial version UID */
-    private static final long serialVersionUID = -7744232583079169119L;
-
-    private static MediaType TYPE = MediaType.image("vnd.dwg");
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return Collections.singleton(TYPE);
-    }
-
-    /** The order of the fields in the header */
-    private static final Property[] HEADER_PROPERTIES_ENTRIES = {
-        TikaCoreProperties.TITLE, 
-        TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_DESCRIPTION,
-        TikaCoreProperties.CREATOR,
-        TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT,
-        TikaCoreProperties.COMMENTS,
-        TikaCoreProperties.MODIFIER,
-        null, // Unknown?
-        TikaCoreProperties.RELATION, // Hyperlink
-    };
-
-    /** For the 2000 file, they're indexed */
-    private static final Property[] HEADER_2000_PROPERTIES_ENTRIES = {
-       null, 
-       TikaCoreProperties.RELATION, // 0x01
-       TikaCoreProperties.TITLE,    // 0x02
-       TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_DESCRIPTION,  // 0x03
-       TikaCoreProperties.CREATOR,   // 0x04
-       null,
-       TikaCoreProperties.COMMENTS,// 0x06 
-       TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT,    // 0x07
-       TikaCoreProperties.MODIFIER, // 0x08
-   };
-
-    private static final String HEADER_2000_PROPERTIES_MARKER_STR =
-            "DWGPROPS COOKIE";
-
-    private static final byte[] HEADER_2000_PROPERTIES_MARKER =
-            new byte[HEADER_2000_PROPERTIES_MARKER_STR.length()];
-
-    static {
-        StringUtil.putCompressedUnicode(
-                HEADER_2000_PROPERTIES_MARKER_STR,
-                HEADER_2000_PROPERTIES_MARKER, 0);
-    }
-
-    /** 
-     * How far to skip after the last standard property, before
-     *  we find any custom properties that might be there.
-     */
-    private static final int CUSTOM_PROPERTIES_SKIP = 20;
-    
-    /** 
-     * The value of padding bytes other than 0 in some DWG files.
-     */
-    private static final int[] CUSTOM_PROPERTIES_ALT_PADDING_VALUES = new int[] {0x2, 0, 0, 0};
-
-    public void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, TikaException, SAXException {
-        // First up, which version of the format are we handling?
-        byte[] header = new byte[128];
-        IOUtils.readFully(stream, header);
-        String version = new String(header, 0, 6, "US-ASCII");
-
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-
-        if (version.equals("AC1015")) {
-            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
-            if (skipTo2000PropertyInfoSection(stream, header)) {
-                get2000Props(stream,metadata,xhtml);
-            }
-        } else if (version.equals("AC1018")) {
-            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
-            if (skipToPropertyInfoSection(stream, header)) {
-                get2004Props(stream,metadata,xhtml);
-            }
-        } else if (version.equals("AC1021") || version.equals("AC1024")) {
-            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
-            if (skipToPropertyInfoSection(stream, header)) {
-                get2007and2010Props(stream,metadata,xhtml);
-            }
-        } else {
-            throw new TikaException(
-                    "Unsupported AutoCAD drawing version: " + version);
-        }
-
-        xhtml.endDocument();
-    }
-
-    /**
-     * Stored as US-ASCII
-     */
-    private void get2004Props(
-            InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
-            throws IOException, TikaException, SAXException {
-       // Standard properties
-        for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
-            String headerValue = read2004String(stream);
-            handleHeader(i, headerValue, metadata, xhtml);
-        }
-
-        // Custom properties
-        int customCount = skipToCustomProperties(stream);
-        for (int i = 0; i < customCount; i++) {
-           String propName = read2004String(stream);
-           String propValue = read2004String(stream);
-           if(propName.length() > 0 && propValue.length() > 0) {
-              metadata.add(propName, propValue);
-           }
-        }
-    }
-
-    private String read2004String(InputStream stream) throws IOException, TikaException {
-       int stringLen = EndianUtils.readUShortLE(stream);
-
-       byte[] stringData = new byte[stringLen];
-       IOUtils.readFully(stream, stringData);
-
-       // Often but not always null terminated
-       if (stringData[stringLen-1] == 0) {
-           stringLen--;
-       }
-       String value = StringUtil.getFromCompressedUnicode(stringData, 0, stringLen);
-       return value;
-    }
-
-    /**
-     * Stored as UCS2, so 16 bit "unicode"
-     */
-    private void get2007and2010Props(
-            InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
-            throws IOException, TikaException, SAXException {
-        // Standard properties
-        for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
-            String headerValue = read2007and2010String(stream);
-            handleHeader(i, headerValue, metadata, xhtml);
-        }
-
-        // Custom properties
-        int customCount = skipToCustomProperties(stream);
-        for (int i = 0; i < customCount; i++) {
-           String propName = read2007and2010String(stream);
-           String propValue = read2007and2010String(stream);
-           if(propName.length() > 0 && propValue.length() > 0) {
-              metadata.add(propName, propValue);
-           }
-        }
-    }
-
-    private String read2007and2010String(InputStream stream) throws IOException, TikaException {
-       int stringLen = EndianUtils.readUShortLE(stream);
-
-       byte[] stringData = new byte[stringLen * 2];
-       IOUtils.readFully(stream, stringData);
-       String value = StringUtil.getFromUnicodeLE(stringData);
-
-       // Some strings are null terminated
-       if(value.charAt(value.length()-1) == 0) {
-           value = value.substring(0, value.length()-1);
-       }
-
-       return value;
-    }
-
-    private void get2000Props(
-            InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
-            throws IOException, TikaException, SAXException {
-        int propCount = 0;
-        while(propCount < 30) {
-            int propIdx = EndianUtils.readUShortLE(stream);
-            int length = EndianUtils.readUShortLE(stream);
-            int valueType = stream.read();
-            
-            if(propIdx == 0x28) {
-               // This one seems not to follow the pattern
-               length = 0x19;
-            } else if(propIdx == 90) {
-               // We think this means the end of properties
-               break;
-            }
-
-            byte[] value = new byte[length];
-            IOUtils.readFully(stream, value);
-            if(valueType == 0x1e) {
-                // Normal string, good
-                String val = StringUtil.getFromCompressedUnicode(value, 0, length);
-                
-                // Is it one we can look up by index?
-                if(propIdx < HEADER_2000_PROPERTIES_ENTRIES.length) {
-                   metadata.add(HEADER_2000_PROPERTIES_ENTRIES[propIdx], val);
-                   xhtml.element("p", val);
-                } else if(propIdx == 0x012c) {
-                   int splitAt = val.indexOf('='); 
-                   if(splitAt > -1) {
-                      String propName = val.substring(0, splitAt);
-                      String propVal = val.substring(splitAt+1);
-                      metadata.add(propName, propVal);
-                   }
-                }
-            } else {
-                // No idea...
-            }
-            
-            propCount++;
-        }
-    }
-
-    private void handleHeader(
-            int headerNumber, String value, Metadata metadata,
-            XHTMLContentHandler xhtml) throws SAXException {
-        if(value == null || value.length() == 0) {
-            return;
-        }
-
-        Property headerProp = HEADER_PROPERTIES_ENTRIES[headerNumber];
-        if(headerProp != null) {
-            metadata.set(headerProp, value);
-        }
-
-        xhtml.element("p", value);
-    }
-
-    /**
-     * Grab the offset, then skip there
-     */
-    private boolean skipToPropertyInfoSection(InputStream stream, byte[] header)
-            throws IOException, TikaException {
-        // The offset is stored in the header from 0x20 onwards
-        long offsetToSection = EndianUtils.getLongLE(header, 0x20);
-        
-        // Sanity check the offset. Some files seem to use a different format,
-        //  and the offset isn't available at 0x20. Until we can work out how
-        //  to find the offset in those files, skip them if detected
-        if (offsetToSection > 0xa00000l) {
-           // Header should never be more than 10mb into the file, something is wrong
-           offsetToSection = 0;
-        }
-        
-        // Work out how far to skip, and sanity check
-        long toSkip = offsetToSection - header.length;
-        if(offsetToSection == 0){
-            return false;
-        }        
-        while (toSkip > 0) {
-            byte[] skip = new byte[Math.min((int) toSkip, 0x4000)];
-            IOUtils.readFully(stream, skip);
-            toSkip -= skip.length;
-        }
-        return true;
-    }
-
-    /**
-     * We think it can be anywhere...
-     */
-    private boolean skipTo2000PropertyInfoSection(InputStream stream, byte[] header)
-            throws IOException {
-       int val = 0;
-       while(val != -1) {
-          val = stream.read();
-          if(val == HEADER_2000_PROPERTIES_MARKER[0]) {
-             boolean going = true;
-             for(int i=1; i<HEADER_2000_PROPERTIES_MARKER.length && going; i++) {
-                val = stream.read();
-                if(val != HEADER_2000_PROPERTIES_MARKER[i]) going = false;
-             }
-             if(going) {
-                // Bingo, found it
-                return true;
-             }
-          }
-       }
-       return false;
-    }
-
-    private int skipToCustomProperties(InputStream stream) 
-            throws IOException, TikaException {
-       // There should be 4 zero bytes or CUSTOM_PROPERTIES_ALT_PADDING_VALUES next
-       byte[] padding = new byte[4];
-       IOUtils.readFully(stream, padding);
-       if((padding[0] == 0 && padding[1] == 0 &&
-             padding[2] == 0 && padding[3] == 0) ||
-             (padding[0] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[0] && 
-               padding[1] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[1] &&
-               padding[2] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[2] &&
-               padding[3] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[3])) {
-           
-          // Looks hopeful, skip on
-          padding = new byte[CUSTOM_PROPERTIES_SKIP];
-          IOUtils.readFully(stream, padding);
-          
-          // We should now have the count
-          int count = EndianUtils.readUShortLE(stream);
-          
-          // Sanity check it
-          if(count > 0 && count < 0x7f) {
-             // Looks plausible
-             return count;
-          } else {
-             // No properties / count is too high to trust
-             return 0;
-          }
-       } else {
-          // No padding. That probably means no custom props
-          return 0;
-       }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.dwg;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.io.StringUtil;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.EndianUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * DWG (CAD Drawing) parser. This is a very basic parser, which just
+ *  looks for bits of the headers.
+ * Note that we use Apache POI for various parts of the processing, as
+ *  lots of the low level string/int/short concepts are the same.
+ */
+public class DWGParser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = -7744232583079169119L;
+
+    private static MediaType TYPE = MediaType.image("vnd.dwg");
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return Collections.singleton(TYPE);
+    }
+
+    /** The order of the fields in the header */
+    private static final Property[] HEADER_PROPERTIES_ENTRIES = {
+        TikaCoreProperties.TITLE, 
+        TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_DESCRIPTION,
+        TikaCoreProperties.CREATOR,
+        TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT,
+        TikaCoreProperties.COMMENTS,
+        TikaCoreProperties.MODIFIER,
+        null, // Unknown?
+        TikaCoreProperties.RELATION, // Hyperlink
+    };
+
+    /** For the 2000 file, they're indexed */
+    private static final Property[] HEADER_2000_PROPERTIES_ENTRIES = {
+       null, 
+       TikaCoreProperties.RELATION, // 0x01
+       TikaCoreProperties.TITLE,    // 0x02
+       TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_DESCRIPTION,  // 0x03
+       TikaCoreProperties.CREATOR,   // 0x04
+       null,
+       TikaCoreProperties.COMMENTS,// 0x06 
+       TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT,    // 0x07
+       TikaCoreProperties.MODIFIER, // 0x08
+   };
+
+    private static final String HEADER_2000_PROPERTIES_MARKER_STR =
+            "DWGPROPS COOKIE";
+
+    private static final byte[] HEADER_2000_PROPERTIES_MARKER =
+            new byte[HEADER_2000_PROPERTIES_MARKER_STR.length()];
+
+    static {
+        StringUtil.putCompressedUnicode(
+                HEADER_2000_PROPERTIES_MARKER_STR,
+                HEADER_2000_PROPERTIES_MARKER, 0);
+    }
+
+    /** 
+     * How far to skip after the last standard property, before
+     *  we find any custom properties that might be there.
+     */
+    private static final int CUSTOM_PROPERTIES_SKIP = 20;
+    
+    /** 
+     * The value of padding bytes other than 0 in some DWG files.
+     */
+    private static final int[] CUSTOM_PROPERTIES_ALT_PADDING_VALUES = new int[] {0x2, 0, 0, 0};
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, TikaException, SAXException {
+        // First up, which version of the format are we handling?
+        byte[] header = new byte[128];
+        IOUtils.readFully(stream, header);
+        String version = new String(header, 0, 6, "US-ASCII");
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        if (version.equals("AC1015")) {
+            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
+            if (skipTo2000PropertyInfoSection(stream, header)) {
+                get2000Props(stream,metadata,xhtml);
+            }
+        } else if (version.equals("AC1018")) {
+            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
+            if (skipToPropertyInfoSection(stream, header)) {
+                get2004Props(stream,metadata,xhtml);
+            }
+        } else if (version.equals("AC1021") || version.equals("AC1024")) {
+            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
+            if (skipToPropertyInfoSection(stream, header)) {
+                get2007and2010Props(stream,metadata,xhtml);
+            }
+        } else {
+            throw new TikaException(
+                    "Unsupported AutoCAD drawing version: " + version);
+        }
+
+        xhtml.endDocument();
+    }
+
+    /**
+     * Stored as US-ASCII
+     */
+    private void get2004Props(
+            InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
+            throws IOException, TikaException, SAXException {
+       // Standard properties
+        for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
+            String headerValue = read2004String(stream);
+            handleHeader(i, headerValue, metadata, xhtml);
+        }
+
+        // Custom properties
+        int customCount = skipToCustomProperties(stream);
+        for (int i = 0; i < customCount; i++) {
+           String propName = read2004String(stream);
+           String propValue = read2004String(stream);
+           if(propName.length() > 0 && propValue.length() > 0) {
+              metadata.add(propName, propValue);
+           }
+        }
+    }
+
+    private String read2004String(InputStream stream) throws IOException, TikaException {
+       int stringLen = EndianUtils.readUShortLE(stream);
+
+       byte[] stringData = new byte[stringLen];
+       IOUtils.readFully(stream, stringData);
+
+       // Often but not always null terminated
+       if (stringData[stringLen-1] == 0) {
+           stringLen--;
+       }
+       String value = StringUtil.getFromCompressedUnicode(stringData, 0, stringLen);
+       return value;
+    }
+
+    /**
+     * Stored as UCS2, so 16 bit "unicode"
+     */
+    private void get2007and2010Props(
+            InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
+            throws IOException, TikaException, SAXException {
+        // Standard properties
+        for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
+            String headerValue = read2007and2010String(stream);
+            handleHeader(i, headerValue, metadata, xhtml);
+        }
+
+        // Custom properties
+        int customCount = skipToCustomProperties(stream);
+        for (int i = 0; i < customCount; i++) {
+           String propName = read2007and2010String(stream);
+           String propValue = read2007and2010String(stream);
+           if(propName.length() > 0 && propValue.length() > 0) {
+              metadata.add(propName, propValue);
+           }
+        }
+    }
+
+    private String read2007and2010String(InputStream stream) throws IOException, TikaException {
+       int stringLen = EndianUtils.readUShortLE(stream);
+
+       byte[] stringData = new byte[stringLen * 2];
+       IOUtils.readFully(stream, stringData);
+       String value = StringUtil.getFromUnicodeLE(stringData);
+
+       // Some strings are null terminated
+       if(value.charAt(value.length()-1) == 0) {
+           value = value.substring(0, value.length()-1);
+       }
+
+       return value;
+    }
+
+    private void get2000Props(
+            InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
+            throws IOException, TikaException, SAXException {
+        int propCount = 0;
+        while(propCount < 30) {
+            int propIdx = EndianUtils.readUShortLE(stream);
+            int length = EndianUtils.readUShortLE(stream);
+            int valueType = stream.read();
+            
+            if(propIdx == 0x28) {
+               // This one seems not to follow the pattern
+               length = 0x19;
+            } else if(propIdx == 90) {
+               // We think this means the end of properties
+               break;
+            }
+
+            byte[] value = new byte[length];
+            IOUtils.readFully(stream, value);
+            if(valueType == 0x1e) {
+                // Normal string, good
+                String val = StringUtil.getFromCompressedUnicode(value, 0, length);
+                
+                // Is it one we can look up by index?
+                if(propIdx < HEADER_2000_PROPERTIES_ENTRIES.length) {
+                   metadata.add(HEADER_2000_PROPERTIES_ENTRIES[propIdx], val);
+                   xhtml.element("p", val);
+                } else if(propIdx == 0x012c) {
+                   int splitAt = val.indexOf('='); 
+                   if(splitAt > -1) {
+                      String propName = val.substring(0, splitAt);
+                      String propVal = val.substring(splitAt+1);
+                      metadata.add(propName, propVal);
+                   }
+                }
+            } else {
+                // No idea...
+            }
+            
+            propCount++;
+        }
+    }
+
+    private void handleHeader(
+            int headerNumber, String value, Metadata metadata,
+            XHTMLContentHandler xhtml) throws SAXException {
+        if(value == null || value.length() == 0) {
+            return;
+        }
+
+        Property headerProp = HEADER_PROPERTIES_ENTRIES[headerNumber];
+        if(headerProp != null) {
+            metadata.set(headerProp, value);
+        }
+
+        xhtml.element("p", value);
+    }
+
+    /**
+     * Grab the offset, then skip there
+     */
+    private boolean skipToPropertyInfoSection(InputStream stream, byte[] header)
+            throws IOException, TikaException {
+        // The offset is stored in the header from 0x20 onwards
+        long offsetToSection = EndianUtils.getLongLE(header, 0x20);
+        
+        // Sanity check the offset. Some files seem to use a different format,
+        //  and the offset isn't available at 0x20. Until we can work out how
+        //  to find the offset in those files, skip them if detected
+        if (offsetToSection > 0xa00000l) {
+           // Header should never be more than 10mb into the file, something is wrong
+           offsetToSection = 0;
+        }
+        
+        // Work out how far to skip, and sanity check
+        long toSkip = offsetToSection - header.length;
+        if(offsetToSection == 0){
+            return false;
+        }        
+        while (toSkip > 0) {
+            byte[] skip = new byte[Math.min((int) toSkip, 0x4000)];
+            IOUtils.readFully(stream, skip);
+            toSkip -= skip.length;
+        }
+        return true;
+    }
+
+    /**
+     * We think it can be anywhere...
+     */
+    private boolean skipTo2000PropertyInfoSection(InputStream stream, byte[] header)
+            throws IOException {
+       int val = 0;
+       while(val != -1) {
+          val = stream.read();
+          if(val == HEADER_2000_PROPERTIES_MARKER[0]) {
+             boolean going = true;
+             for(int i=1; i<HEADER_2000_PROPERTIES_MARKER.length && going; i++) {
+                val = stream.read();
+                if(val != HEADER_2000_PROPERTIES_MARKER[i]) going = false;
+             }
+             if(going) {
+                // Bingo, found it
+                return true;
+             }
+          }
+       }
+       return false;
+    }
+
+    private int skipToCustomProperties(InputStream stream) 
+            throws IOException, TikaException {
+       // There should be 4 zero bytes or CUSTOM_PROPERTIES_ALT_PADDING_VALUES next
+       byte[] padding = new byte[4];
+       IOUtils.readFully(stream, padding);
+       if((padding[0] == 0 && padding[1] == 0 &&
+             padding[2] == 0 && padding[3] == 0) ||
+             (padding[0] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[0] && 
+               padding[1] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[1] &&
+               padding[2] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[2] &&
+               padding[3] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[3])) {
+           
+          // Looks hopeful, skip on
+          padding = new byte[CUSTOM_PROPERTIES_SKIP];
+          IOUtils.readFully(stream, padding);
+          
+          // We should now have the count
+          int count = EndianUtils.readUShortLE(stream);
+          
+          // Sanity check it
+          if(count > 0 && count < 0x7f) {
+             // Looks plausible
+             return count;
+          } else {
+             // No properties / count is too high to trust
+             return 0;
+          }
+       } else {
+          // No padding. That probably means no custom props
+          return 0;
+       }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-code-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-code-module/pom.xml b/tika-parser-modules/tika-parser-code-module/pom.xml
index cf59c0e..5d33f82 100644
--- a/tika-parser-modules/tika-parser-code-module/pom.xml
+++ b/tika-parser-modules/tika-parser-code-module/pom.xml
@@ -1,69 +1,69 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
-  license agreements. See the NOTICE file distributed with this work for additional 
-  information regarding copyright ownership. The ASF licenses this file to 
-  you under the Apache License, Version 2.0 (the "License"); you may not use 
-  this file except in compliance with the License. You may obtain a copy of 
-  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
-  by applicable law or agreed to in writing, software distributed under the 
-  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
-  OF ANY KIND, either express or implied. See the License for the specific 
-  language governing permissions and limitations under the License. -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.tika</groupId>
-    <artifactId>tika-parser-modules</artifactId>
-    <version>2.0-SNAPSHOT</version>
-  </parent>
-
-  <artifactId>tika-parser-code-module</artifactId>
-  <name>Apache Tika parser code module</name>
-  <url>http://tika.apache.org/</url>
-  
-  <dependencies>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.ow2.asm</groupId>
-      <artifactId>asm</artifactId>
-      <version>5.0.4</version>
-    </dependency>
-    <dependency>
-      <groupId>org.codelibs</groupId>
-      <artifactId>jhighlight</artifactId>
-      <version>1.0.2</version>
-    </dependency>
-    <dependency>
-      <groupId>org.ccil.cowan.tagsoup</groupId>
-      <artifactId>tagsoup</artifactId>
-      <version>1.2.1</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>${commons.io.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-text-module</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-  
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  you under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-parser-modules</artifactId>
+    <version>2.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>tika-parser-code-module</artifactId>
+  <name>Apache Tika parser code module</name>
+  <url>http://tika.apache.org/</url>
+  
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.ow2.asm</groupId>
+      <artifactId>asm</artifactId>
+      <version>5.0.4</version>
+    </dependency>
+    <dependency>
+      <groupId>org.codelibs</groupId>
+      <artifactId>jhighlight</artifactId>
+      <version>1.0.2</version>
+    </dependency>
+    <dependency>
+      <groupId>org.ccil.cowan.tagsoup</groupId>
+      <artifactId>tagsoup</artifactId>
+      <version>1.2.1</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>${commons.io.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-text-module</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
 </project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/module/code/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/module/code/internal/Activator.java b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/module/code/internal/Activator.java
index 040618d..095e643 100644
--- a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/module/code/internal/Activator.java
+++ b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/module/code/internal/Activator.java
@@ -1,36 +1,36 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.module.code.internal;
-
-import org.apache.tika.osgi.TikaAbstractBundleActivator;
-import org.osgi.framework.BundleContext;
-
-public class Activator extends TikaAbstractBundleActivator {
-
-    @Override
-    public void start(BundleContext context) throws Exception {
-
-        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
-
-    }
-
-    @Override
-    public void stop(BundleContext context) throws Exception {
-
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module.code.internal;
+
+import org.apache.tika.osgi.TikaAbstractBundleActivator;
+import org.osgi.framework.BundleContext;
+
+public class Activator extends TikaAbstractBundleActivator {
+
+    @Override
+    public void start(BundleContext context) throws Exception {
+
+        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
+
+    }
+
+    @Override
+    public void stop(BundleContext context) throws Exception {
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/ClassParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/ClassParser.java b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/ClassParser.java
index 48f8cbf..481046f 100644
--- a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/ClassParser.java
+++ b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/ClassParser.java
@@ -1,54 +1,54 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.asm;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Parser for Java .class files.
- */
-public class ClassParser extends AbstractParser {
-
-    /** Serial version UID */
-    private static final long serialVersionUID = -3531388963354454357L;
-
-    private static final Set<MediaType> SUPPORTED_TYPES =
-        Collections.singleton(MediaType.application("java-vm"));
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    public void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-        new XHTMLClassVisitor(handler, metadata).parse(stream);
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for Java .class files.
+ */
+public class ClassParser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = -3531388963354454357L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.singleton(MediaType.application("java-vm"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        new XHTMLClassVisitor(handler, metadata).parse(stream);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
index 03deb43..c8ea317 100644
--- a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
+++ b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
@@ -1,323 +1,323 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.asm;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.objectweb.asm.AnnotationVisitor;
-import org.objectweb.asm.Attribute;
-import org.objectweb.asm.ClassReader;
-import org.objectweb.asm.ClassVisitor;
-import org.objectweb.asm.FieldVisitor;
-import org.objectweb.asm.MethodVisitor;
-import org.objectweb.asm.Opcodes;
-import org.objectweb.asm.Type;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Class visitor that generates XHTML SAX events to describe the
- * contents of the visited class.
- */
-class XHTMLClassVisitor extends ClassVisitor {
-
-    private final XHTMLContentHandler xhtml;
-
-    private final Metadata metadata;
-
-    private Type type;
-
-    private String packageName;
-
-    public XHTMLClassVisitor(ContentHandler handler, Metadata metadata) {
-        super(Opcodes.ASM5);
-        this.xhtml = new XHTMLContentHandler(handler, metadata);
-        this.metadata = metadata;
-    }
-
-    public void parse(InputStream stream)
-            throws TikaException, SAXException, IOException {
-        try {
-            ClassReader reader = new ClassReader(stream);
-            reader.accept(this, ClassReader.SKIP_FRAMES | ClassReader.SKIP_CODE);
-        } catch (RuntimeException e) {
-            if (e.getCause() instanceof SAXException) {
-                throw (SAXException) e.getCause();
-            } else {
-                throw new TikaException("Failed to parse a Java class", e);
-            }
-        }
-    }
-
-    public void visit(
-            int version, int access, String name, String signature,
-            String superName, String[] interfaces) {
-        type = Type.getObjectType(name);
-
-        String className = type.getClassName();
-        int dot = className.lastIndexOf('.');
-        if (dot != -1) {
-            packageName = className.substring(0, dot);
-            className = className.substring(dot + 1);
-        }
-
-        metadata.set(TikaCoreProperties.TITLE, className);
-        metadata.set(Metadata.RESOURCE_NAME_KEY, className + ".class");
-
-        try {
-            xhtml.startDocument();
-            xhtml.startElement("pre");
-
-            if (packageName != null) {
-                writeKeyword("package");
-                xhtml.characters(" " + packageName + ";\n");
-            }
-
-            writeAccess(access);
-            if (isSet(access, Opcodes.ACC_INTERFACE)) {
-                writeKeyword("interface");
-                writeSpace();
-                writeType(type);
-                writeSpace();
-                writeInterfaces("extends", interfaces);
-            } else if (isSet(access, Opcodes.ACC_ENUM)) {
-                writeKeyword("enum");
-                writeSpace();
-                writeType(type);
-                writeSpace();
-            } else {
-                writeKeyword("class");
-                writeSpace();
-                writeType(type);
-                writeSpace();
-                if (superName != null) {
-                    Type superType = Type.getObjectType(superName);
-                    if (!superType.getClassName().equals("java.lang.Object")) {
-                        writeKeyword("extends");
-                        writeSpace();
-                        writeType(superType);
-                        writeSpace();
-                    }
-                }
-                writeInterfaces("implements", interfaces);
-            }
-            xhtml.characters("{\n");
-        } catch (SAXException e) {
-            throw new RuntimeException(e);
-        }
-    }
-
-    private void writeInterfaces(String keyword, String[] interfaces)
-            throws SAXException {
-        if (interfaces != null && interfaces.length > 0) {
-            writeKeyword(keyword);
-            String separator = " ";
-            for (String iface : interfaces) {
-                xhtml.characters(separator);
-                writeType(Type.getObjectType(iface));
-                separator = ", ";
-            }
-            writeSpace();
-        }
-    }
-
-    public void visitEnd() {
-        try {
-            xhtml.characters("}\n");
-            xhtml.endElement("pre");
-            xhtml.endDocument();
-        } catch (SAXException e) {
-            throw new RuntimeException(e);
-        }
-    }
-
-    /**
-     * Ignored.
-     */
-    public void visitOuterClass(String owner, String name, String desc) {
-    }
-
-    /**
-     * Ignored.
-     */
-    public void visitSource(String source, String debug) {
-    }
-
-
-    /**
-     * Ignored.
-     */
-    public AnnotationVisitor visitAnnotation(String desc, boolean visible) {
-        return null;
-    }
-
-    /**
-     * Ignored.
-     */
-    public void visitAttribute(Attribute attr) {
-    }
-
-    /**
-     * Ignored.
-     */
-    public void visitInnerClass(
-            String name, String outerName, String innerName, int access) {
-    }
-
-    /**
-     * Visits a field.
-     */
-    public FieldVisitor visitField(
-            int access, String name, String desc, String signature,
-            Object value) {
-        if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
-            try {
-                xhtml.characters("    ");
-                writeAccess(access);
-                writeType(Type.getType(desc));
-                writeSpace();
-                writeIdentifier(name);
-
-                if (isSet(access, Opcodes.ACC_STATIC) && value != null) {
-                    xhtml.characters(" = ");
-                    xhtml.characters(value.toString());
-                }
-
-                writeSemicolon();
-                writeNewline();
-            } catch (SAXException e) {
-                throw new RuntimeException(e);
-            }
-        }
-
-        return null;
-    }
-
-    /**
-     * Visits a method.
-     */
-    public MethodVisitor visitMethod(
-            int access, String name, String desc, String signature,
-            String[] exceptions) {
-        if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
-            try {
-                xhtml.characters("    ");
-                writeAccess(access);
-                writeType(Type.getReturnType(desc));
-                writeSpace();
-                if ("<init>".equals(name)) {
-                    writeType(type);
-                } else {
-                    writeIdentifier(name);
-                }
-
-                xhtml.characters("(");
-                String separator = "";
-                for (Type arg : Type.getArgumentTypes(desc)) {
-                    xhtml.characters(separator);
-                    writeType(arg);
-                    separator = ", ";
-                }
-                xhtml.characters(")");
-
-                if (exceptions != null && exceptions.length > 0) {
-                    writeSpace();
-                    writeKeyword("throws");
-                    separator = " ";
-                    for (String exception : exceptions) {
-                        xhtml.characters(separator);
-                        writeType(Type.getObjectType(exception));
-                        separator = ", ";
-                    }
-                }
-
-                writeSemicolon();
-                writeNewline();
-            } catch (SAXException e) {
-                throw new RuntimeException(e);
-            }
-        }
-
-        return null;
-    }
-
-    private void writeIdentifier(String identifier) throws SAXException {
-        xhtml.startElement("span", "class", "java-identifier");
-        xhtml.characters(identifier);
-        xhtml.endElement("span");
-    }
-
-    private void writeKeyword(String keyword) throws SAXException {
-        xhtml.startElement("span", "class", "java-keyword");
-        xhtml.characters(keyword);
-        xhtml.endElement("span");
-    }
-
-    private void writeSemicolon() throws SAXException {
-        xhtml.characters(";");
-    }
-
-    private void writeSpace() throws SAXException {
-        xhtml.characters(" ");
-    }
-
-    private void writeNewline() throws SAXException {
-        xhtml.characters("\n");
-    }
-
-    private void writeAccess(int access) throws SAXException {
-        writeAccess(access, Opcodes.ACC_PRIVATE, "private");
-        writeAccess(access, Opcodes.ACC_PROTECTED, "protected");
-        writeAccess(access, Opcodes.ACC_PUBLIC, "public");
-        writeAccess(access, Opcodes.ACC_STATIC, "static");
-        writeAccess(access, Opcodes.ACC_FINAL, "final");
-        writeAccess(access, Opcodes.ACC_ABSTRACT, "abstract");
-        writeAccess(access, Opcodes.ACC_SYNCHRONIZED, "synchronized");
-        writeAccess(access, Opcodes.ACC_TRANSIENT, "transient");
-        writeAccess(access, Opcodes.ACC_VOLATILE, "volatile");
-        writeAccess(access, Opcodes.ACC_NATIVE, "native");
-    }
-
-    private void writeAccess(int access, int code, String keyword)
-            throws SAXException {
-        if (isSet(access, code)) {
-            writeKeyword(keyword);
-            xhtml.characters(" ");
-        }
-    }
-
-    private void writeType(Type type) throws SAXException {
-        String name = type.getClassName();
-        if (name.startsWith(packageName + ".")) {
-            xhtml.characters(name.substring(packageName.length() + 1));
-        } else if (name.startsWith("java.lang.")) {
-            xhtml.characters(name.substring("java.lang.".length()));
-        } else {
-            xhtml.characters(name);
-        }
-    }
-
-    private static boolean isSet(int value, int flag) {
-        return (value & flag) != 0;
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.objectweb.asm.AnnotationVisitor;
+import org.objectweb.asm.Attribute;
+import org.objectweb.asm.ClassReader;
+import org.objectweb.asm.ClassVisitor;
+import org.objectweb.asm.FieldVisitor;
+import org.objectweb.asm.MethodVisitor;
+import org.objectweb.asm.Opcodes;
+import org.objectweb.asm.Type;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Class visitor that generates XHTML SAX events to describe the
+ * contents of the visited class.
+ */
+class XHTMLClassVisitor extends ClassVisitor {
+
+    private final XHTMLContentHandler xhtml;
+
+    private final Metadata metadata;
+
+    private Type type;
+
+    private String packageName;
+
+    public XHTMLClassVisitor(ContentHandler handler, Metadata metadata) {
+        super(Opcodes.ASM5);
+        this.xhtml = new XHTMLContentHandler(handler, metadata);
+        this.metadata = metadata;
+    }
+
+    public void parse(InputStream stream)
+            throws TikaException, SAXException, IOException {
+        try {
+            ClassReader reader = new ClassReader(stream);
+            reader.accept(this, ClassReader.SKIP_FRAMES | ClassReader.SKIP_CODE);
+        } catch (RuntimeException e) {
+            if (e.getCause() instanceof SAXException) {
+                throw (SAXException) e.getCause();
+            } else {
+                throw new TikaException("Failed to parse a Java class", e);
+            }
+        }
+    }
+
+    public void visit(
+            int version, int access, String name, String signature,
+            String superName, String[] interfaces) {
+        type = Type.getObjectType(name);
+
+        String className = type.getClassName();
+        int dot = className.lastIndexOf('.');
+        if (dot != -1) {
+            packageName = className.substring(0, dot);
+            className = className.substring(dot + 1);
+        }
+
+        metadata.set(TikaCoreProperties.TITLE, className);
+        metadata.set(Metadata.RESOURCE_NAME_KEY, className + ".class");
+
+        try {
+            xhtml.startDocument();
+            xhtml.startElement("pre");
+
+            if (packageName != null) {
+                writeKeyword("package");
+                xhtml.characters(" " + packageName + ";\n");
+            }
+
+            writeAccess(access);
+            if (isSet(access, Opcodes.ACC_INTERFACE)) {
+                writeKeyword("interface");
+                writeSpace();
+                writeType(type);
+                writeSpace();
+                writeInterfaces("extends", interfaces);
+            } else if (isSet(access, Opcodes.ACC_ENUM)) {
+                writeKeyword("enum");
+                writeSpace();
+                writeType(type);
+                writeSpace();
+            } else {
+                writeKeyword("class");
+                writeSpace();
+                writeType(type);
+                writeSpace();
+                if (superName != null) {
+                    Type superType = Type.getObjectType(superName);
+                    if (!superType.getClassName().equals("java.lang.Object")) {
+                        writeKeyword("extends");
+                        writeSpace();
+                        writeType(superType);
+                        writeSpace();
+                    }
+                }
+                writeInterfaces("implements", interfaces);
+            }
+            xhtml.characters("{\n");
+        } catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private void writeInterfaces(String keyword, String[] interfaces)
+            throws SAXException {
+        if (interfaces != null && interfaces.length > 0) {
+            writeKeyword(keyword);
+            String separator = " ";
+            for (String iface : interfaces) {
+                xhtml.characters(separator);
+                writeType(Type.getObjectType(iface));
+                separator = ", ";
+            }
+            writeSpace();
+        }
+    }
+
+    public void visitEnd() {
+        try {
+            xhtml.characters("}\n");
+            xhtml.endElement("pre");
+            xhtml.endDocument();
+        } catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitOuterClass(String owner, String name, String desc) {
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitSource(String source, String debug) {
+    }
+
+
+    /**
+     * Ignored.
+     */
+    public AnnotationVisitor visitAnnotation(String desc, boolean visible) {
+        return null;
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitAttribute(Attribute attr) {
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitInnerClass(
+            String name, String outerName, String innerName, int access) {
+    }
+
+    /**
+     * Visits a field.
+     */
+    public FieldVisitor visitField(
+            int access, String name, String desc, String signature,
+            Object value) {
+        if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
+            try {
+                xhtml.characters("    ");
+                writeAccess(access);
+                writeType(Type.getType(desc));
+                writeSpace();
+                writeIdentifier(name);
+
+                if (isSet(access, Opcodes.ACC_STATIC) && value != null) {
+                    xhtml.characters(" = ");
+                    xhtml.characters(value.toString());
+                }
+
+                writeSemicolon();
+                writeNewline();
+            } catch (SAXException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Visits a method.
+     */
+    public MethodVisitor visitMethod(
+            int access, String name, String desc, String signature,
+            String[] exceptions) {
+        if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
+            try {
+                xhtml.characters("    ");
+                writeAccess(access);
+                writeType(Type.getReturnType(desc));
+                writeSpace();
+                if ("<init>".equals(name)) {
+                    writeType(type);
+                } else {
+                    writeIdentifier(name);
+                }
+
+                xhtml.characters("(");
+                String separator = "";
+                for (Type arg : Type.getArgumentTypes(desc)) {
+                    xhtml.characters(separator);
+                    writeType(arg);
+                    separator = ", ";
+                }
+                xhtml.characters(")");
+
+                if (exceptions != null && exceptions.length > 0) {
+                    writeSpace();
+                    writeKeyword("throws");
+                    separator = " ";
+                    for (String exception : exceptions) {
+                        xhtml.characters(separator);
+                        writeType(Type.getObjectType(exception));
+                        separator = ", ";
+                    }
+                }
+
+                writeSemicolon();
+                writeNewline();
+            } catch (SAXException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        return null;
+    }
+
+    private void writeIdentifier(String identifier) throws SAXException {
+        xhtml.startElement("span", "class", "java-identifier");
+        xhtml.characters(identifier);
+        xhtml.endElement("span");
+    }
+
+    private void writeKeyword(String keyword) throws SAXException {
+        xhtml.startElement("span", "class", "java-keyword");
+        xhtml.characters(keyword);
+        xhtml.endElement("span");
+    }
+
+    private void writeSemicolon() throws SAXException {
+        xhtml.characters(";");
+    }
+
+    private void writeSpace() throws SAXException {
+        xhtml.characters(" ");
+    }
+
+    private void writeNewline() throws SAXException {
+        xhtml.characters("\n");
+    }
+
+    private void writeAccess(int access) throws SAXException {
+        writeAccess(access, Opcodes.ACC_PRIVATE, "private");
+        writeAccess(access, Opcodes.ACC_PROTECTED, "protected");
+        writeAccess(access, Opcodes.ACC_PUBLIC, "public");
+        writeAccess(access, Opcodes.ACC_STATIC, "static");
+        writeAccess(access, Opcodes.ACC_FINAL, "final");
+        writeAccess(access, Opcodes.ACC_ABSTRACT, "abstract");
+        writeAccess(access, Opcodes.ACC_SYNCHRONIZED, "synchronized");
+        writeAccess(access, Opcodes.ACC_TRANSIENT, "transient");
+        writeAccess(access, Opcodes.ACC_VOLATILE, "volatile");
+        writeAccess(access, Opcodes.ACC_NATIVE, "native");
+    }
+
+    private void writeAccess(int access, int code, String keyword)
+            throws SAXException {
+        if (isSet(access, code)) {
+            writeKeyword(keyword);
+            xhtml.characters(" ");
+        }
+    }
+
+    private void writeType(Type type) throws SAXException {
+        String name = type.getClassName();
+        if (name.startsWith(packageName + ".")) {
+            xhtml.characters(name.substring(packageName.length() + 1));
+        } else if (name.startsWith("java.lang.")) {
+            xhtml.characters(name.substring("java.lang.".length()));
+        } else {
+            xhtml.characters(name);
+        }
+    }
+
+    private static boolean isSet(int value, int flag) {
+        return (value & flag) != 0;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
index 63e4bf6..d17bde7 100644
--- a/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
+++ b/tika-parser-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
@@ -1,142 +1,142 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.code;
-
-import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.CPP;
-import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.GROOVY;
-import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.JAVA;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.nio.charset.Charset;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.commons.io.input.CloseShieldInputStream;
-import org.apache.tika.config.ServiceLoader;
-import org.apache.tika.detect.AutoDetectReader;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.ccil.cowan.tagsoup.HTMLSchema;
-import org.ccil.cowan.tagsoup.Schema;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-
-import com.uwyn.jhighlight.renderer.Renderer;
-import com.uwyn.jhighlight.renderer.XhtmlRendererFactory;
-/**
- * Generic Source code parser for Java, Groovy, C++.
- * Aware: This parser uses JHightlight library (https://github.com/codelibs/jhighlight) under CDDL/LGPL dual license
- *
- * @author Hong-Thai.Nguyen
- * @since 1.6
- */
-public class SourceCodeParser implements Parser {
-
-  private static final long serialVersionUID = -4543476498190054160L;
-
-  private static final Pattern authorPattern = Pattern.compile("(?im)@author (.*) *$");
-
-  private static final Map<MediaType, String> TYPES_TO_RENDERER = new HashMap<MediaType, String>() {
-    private static final long serialVersionUID = -741976157563751152L;
-    {
-      put(MediaType.text("x-c++src"), CPP);
-      put(MediaType.text("x-java-source"), JAVA);
-      put(MediaType.text("x-groovy"), GROOVY);
-    }
-  };
-
-  private static final ServiceLoader LOADER = new ServiceLoader(SourceCodeParser.class.getClassLoader());
-  
-  //Parse the HTML document
-  private static final Schema HTML_SCHEMA = new HTMLSchema();
-  
-  @Override
-  public Set<MediaType> getSupportedTypes(ParseContext context) {
-    return TYPES_TO_RENDERER.keySet();
-  }
-
-  @Override
-  public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
-      throws IOException, SAXException, TikaException {
-
-    try (AutoDetectReader reader = new AutoDetectReader(
-            new CloseShieldInputStream(stream), metadata,
-            context.get(ServiceLoader.class, LOADER))) {
-      Charset charset = reader.getCharset();
-      String mediaType = metadata.get(Metadata.CONTENT_TYPE);
-      String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
-      if (mediaType != null && name != null) {
-        MediaType type = MediaType.parse(mediaType);
-        metadata.set(Metadata.CONTENT_TYPE, type.toString());
-        metadata.set(Metadata.CONTENT_ENCODING, charset.name());
-
-        StringBuilder out = new StringBuilder();
-        String line;
-        int nbLines =  0;
-        while ((line = reader.readLine()) != null) {
-            out.append(line + System.getProperty("line.separator"));
-            String author = parserAuthor(line);
-            if (author != null) {
-              metadata.add(TikaCoreProperties.CREATOR, author);
-            }
-            nbLines ++;
-        }
-        metadata.set("LoC", String.valueOf(nbLines));
-        Renderer renderer = getRenderer(type.toString());
-
-        String codeAsHtml = renderer.highlight(name, out.toString(), charset.name(), false);
-
-        Schema schema = context.get(Schema.class, HTML_SCHEMA);
-
-        org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();
-        parser.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, schema);
-        parser.setContentHandler(handler);
-        parser.parse(new InputSource(new StringReader(codeAsHtml)));
-      }
-    }
-
-  }
-
-  private Renderer getRenderer(String mimeType) {
-    MediaType mt = MediaType.parse(mimeType);
-    String type = TYPES_TO_RENDERER.get(mt);
-    if (type == null) {
-      throw new RuntimeException("unparseable content type " + mimeType);
-    }
-    return XhtmlRendererFactory.getRenderer(type);
-  }
-
-
-  private String parserAuthor(String line) {
-    Matcher m = authorPattern.matcher(line);
-    if (m.find()) {
-      return m.group(1).trim();
-    }
-
-    return null;
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.code;
+
+import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.CPP;
+import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.GROOVY;
+import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.JAVA;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.detect.AutoDetectReader;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.ccil.cowan.tagsoup.HTMLSchema;
+import org.ccil.cowan.tagsoup.Schema;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import com.uwyn.jhighlight.renderer.Renderer;
+import com.uwyn.jhighlight.renderer.XhtmlRendererFactory;
+/**
+ * Generic Source code parser for Java, Groovy, C++.
+ * Aware: This parser uses JHightlight library (https://github.com/codelibs/jhighlight) under CDDL/LGPL dual license
+ *
+ * @author Hong-Thai.Nguyen
+ * @since 1.6
+ */
+public class SourceCodeParser implements Parser {
+
+  private static final long serialVersionUID = -4543476498190054160L;
+
+  private static final Pattern authorPattern = Pattern.compile("(?im)@author (.*) *$");
+
+  private static final Map<MediaType, String> TYPES_TO_RENDERER = new HashMap<MediaType, String>() {
+    private static final long serialVersionUID = -741976157563751152L;
+    {
+      put(MediaType.text("x-c++src"), CPP);
+      put(MediaType.text("x-java-source"), JAVA);
+      put(MediaType.text("x-groovy"), GROOVY);
+    }
+  };
+
+  private static final ServiceLoader LOADER = new ServiceLoader(SourceCodeParser.class.getClassLoader());
+  
+  //Parse the HTML document
+  private static final Schema HTML_SCHEMA = new HTMLSchema();
+  
+  @Override
+  public Set<MediaType> getSupportedTypes(ParseContext context) {
+    return TYPES_TO_RENDERER.keySet();
+  }
+
+  @Override
+  public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+      throws IOException, SAXException, TikaException {
+
+    try (AutoDetectReader reader = new AutoDetectReader(
+            new CloseShieldInputStream(stream), metadata,
+            context.get(ServiceLoader.class, LOADER))) {
+      Charset charset = reader.getCharset();
+      String mediaType = metadata.get(Metadata.CONTENT_TYPE);
+      String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+      if (mediaType != null && name != null) {
+        MediaType type = MediaType.parse(mediaType);
+        metadata.set(Metadata.CONTENT_TYPE, type.toString());
+        metadata.set(Metadata.CONTENT_ENCODING, charset.name());
+
+        StringBuilder out = new StringBuilder();
+        String line;
+        int nbLines =  0;
+        while ((line = reader.readLine()) != null) {
+            out.append(line + System.getProperty("line.separator"));
+            String author = parserAuthor(line);
+            if (author != null) {
+              metadata.add(TikaCoreProperties.CREATOR, author);
+            }
+            nbLines ++;
+        }
+        metadata.set("LoC", String.valueOf(nbLines));
+        Renderer renderer = getRenderer(type.toString());
+
+        String codeAsHtml = renderer.highlight(name, out.toString(), charset.name(), false);
+
+        Schema schema = context.get(Schema.class, HTML_SCHEMA);
+
+        org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();
+        parser.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, schema);
+        parser.setContentHandler(handler);
+        parser.parse(new InputSource(new StringReader(codeAsHtml)));
+      }
+    }
+
+  }
+
+  private Renderer getRenderer(String mimeType) {
+    MediaType mt = MediaType.parse(mimeType);
+    String type = TYPES_TO_RENDERER.get(mt);
+    if (type == null) {
+      throw new RuntimeException("unparseable content type " + mimeType);
+    }
+    return XhtmlRendererFactory.getRenderer(type);
+  }
+
+
+  private String parserAuthor(String line) {
+    Matcher m = authorPattern.matcher(line);
+    if (m.find()) {
+      return m.group(1).trim();
+    }
+
+    return null;
+  }
+}