You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/11/19 12:53:21 UTC

[tika] branch main updated: Simplify some call of String operation (#383)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 7ed64a5  Simplify some call of String operation (#383)
7ed64a5 is described below

commit 7ed64a5bd8e564bbe8fce0948a71c1db249fa2fb
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Thu Nov 19 20:53:12 2020 +0800

    Simplify some call of String operation (#383)
    
    * Simplify some call of StringBuilder#append
    
    * Simplify some call of String#substring
    
    * Remove unnecessary call of #toString
---
 tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java   | 2 +-
 .../src/main/java/org/apache/tika/sax/DIFContentHandler.java      | 2 +-
 .../apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java  | 4 ++--
 .../java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java   | 2 +-
 .../org/apache/tika/parser/microsoft/AbstractListManager.java     | 2 +-
 .../org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java  | 2 +-
 .../main/java/org/apache/tika/parser/microsoft/ListManager.java   | 2 +-
 .../parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java | 2 +-
 .../main/java/org/apache/tika/parser/dif/DIFContentHandler.java   | 2 +-
 .../src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java | 2 +-
 .../java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java  | 2 +-
 .../apache/tika/parser/geoinfo/GeographicInformationParser.java   | 8 ++++----
 12 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java b/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java
index e63fea5..e352e35 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java
@@ -46,7 +46,7 @@ public class CleanPhoneText {
             if (start > 0) {
                 prefix = text.substring(start-1, start);
             }
-            if (digits.substring(0, 2).equals("82") && prefix.equals("*")) {
+            if (digits.startsWith("82") && prefix.equals("*")) {
                 // this number overlaps with a *82 sequence
                 idx += 2;
             } else {
diff --git a/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java
index df29e5a..7881dd6 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java
@@ -53,7 +53,7 @@ public class DIFContentHandler extends DefaultHandler {
 	@Override
 	public void characters(char[] ch, int start, int length)
 			throws SAXException {
-		String value = (new String(ch, start, length)).toString();
+		String value = new String(ch, start, length);
 		this.dataStack.push(value);
 
 		if (this.treeStack.peek().equals("Entry_Title")) {
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
index 8a314d8..781760d 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
@@ -171,7 +171,7 @@ public class ProbabilisticMimeDetectionTestWithTika {
         try {
             Metadata metadata = new Metadata();
             // String mime = this.proDetector.detect(in, metadata).toString();
-            String mime = tika.detect(in, metadata).toString();
+            String mime = tika.detect(in, metadata);
             assertEquals(
                     urlOrFileName + " is not properly detected: detected.",
                     expected, mime);
@@ -179,7 +179,7 @@ public class ProbabilisticMimeDetectionTestWithTika {
             // Add resource name and test again
             metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, urlOrFileName);
             // mime = this.proDetector.detect(in, metadata).toString();
-            mime = tika.detect(in, metadata).toString();
+            mime = tika.detect(in, metadata);
             assertEquals(urlOrFileName
                     + " is not properly detected after adding resource name.",
                     expected, mime);
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index ae8e924..f9bceff 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -150,7 +150,7 @@ public class TesseractOCRParserTest extends TikaTest {
         String contents = runOCR(resource, nonOCRContains, numMetadatas,
                 BasicContentHandlerFactory.HANDLER_TYPE.TEXT, TesseractOCRConfig.OUTPUT_TYPE.TXT);
         if (canRun()) {
-        	if(resource.substring(resource.lastIndexOf('.'), resource.length()).equals(".jpg")) {
+            if(resource.substring(resource.lastIndexOf('.')).equals(".jpg")) {
         		assertContains("Apache", contents);
         	} else {
         		assertContains("Happy New Year 2003!", contents);
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java
index 2c02dfc..26dc0a0 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java
@@ -112,7 +112,7 @@ public abstract class AbstractListManager {
             Matcher m = LEVEL_INTERPOLATOR.matcher(lvlText);
             int last = 0;
             while (m.find()) {
-                sb.append(lvlText.substring(last, m.start()));
+                sb.append(lvlText, last, m.start());
                 String lvlString = m.group(1);
                 int lvlNum = -1;
                 try {
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
index 2b8a603..86bf8f1 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
@@ -238,7 +238,7 @@ abstract class AbstractPOIFSExtractor {
                     }
 
                     // Record what we can do about it
-                    metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
+                    metadata.set(Metadata.CONTENT_TYPE, mediaType.getType());
                     metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, rName + extension);
                 } catch (Exception e) {
                     EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
index 6c5a5f1..a19adc6 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
@@ -150,7 +150,7 @@ public class ListManager extends AbstractListManager {
                 //silently stop
                 break;
             }
-            sb.append(numberText.substring(last, offset - 1));
+            sb.append(numberText, last, offset - 1);
             //need to add one because newer format
             //adds one.  In .doc, this was the array index;
             //but in .docx, this is the level number
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
index 0629610..f387999 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
@@ -371,7 +371,7 @@ public class SXSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
                     xhtml.startElement("b");
                     boolean authorExists = false;
                     if (authorString != null) {
-                        xhtml.characters(authorString.toString());
+                        xhtml.characters(authorString);
                         authorExists = true;
                     }
                     if (authorExists && authorInitials != null) {
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java b/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java
index cc11316..c30f05f 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java
+++ b/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java
@@ -53,7 +53,7 @@ public class DIFContentHandler extends DefaultHandler {
 	@Override
 	public void characters(char[] ch, int start, int length)
 			throws SAXException {
-		String value = (new String(ch, start, length)).toString();
+		String value = new String(ch, start, length);
 		this.dataStack.push(value);
 
 		if (this.treeStack.peek().equals("Entry_Title")) {
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java
index 82fd39b..7deb427 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java
+++ b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java
@@ -42,7 +42,7 @@ public class HwpV5ParserTest extends MultiThreadedTikaTest {
             assertEquals("Apache Tika", metadata.get(TikaCoreProperties.TITLE));
             assertEquals("SooMyung Lee", metadata.get(TikaCoreProperties.CREATOR));
 
-            assertContains("Apache Tika", result.xml.toString());
+            assertContains("Apache Tika", result.xml);
         }
     }
 
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java
index 3a0774c..10c78e2 100644
--- a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java
@@ -85,7 +85,7 @@ public class RegexNERecogniser implements NERecogniser {
                     continue;
                 }
                 String type = line.substring(0, delim).trim();
-                String patternStr = line.substring(delim+1, line.length()).trim();
+                String patternStr = line.substring(delim+1).trim();
                 patterns.put(type, Pattern.compile(patternStr));
                 entityTypes.add(type);
             }
diff --git a/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java b/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
index 0d46ac7..d4b2a10 100644
--- a/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
+++ b/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
@@ -147,7 +147,7 @@ public class GeographicInformationParser extends AbstractParser{
                 xhtmlContentHandler.startElement("h3");
                 xhtmlContentHandler.newline();
                 xhtmlContentHandler.characters("CitedResponsiblePartyRole " + r.getRole().toString());
-                xhtmlContentHandler.characters("CitedResponsiblePartyName " + r.getIndividualName().toString());
+                xhtmlContentHandler.characters("CitedResponsiblePartyName " + r.getIndividualName());
                 xhtmlContentHandler.endElement("h3");
                 xhtmlContentHandler.newline();
             }
@@ -240,7 +240,7 @@ public class GeographicInformationParser extends AbstractParser{
                 if(r.getRole()!=null)
                     metadata.add("CitedResponsiblePartyRole ",r.getRole().toString());
                 if(r.getIndividualName()!=null)
-                    metadata.add("CitedResponsiblePartyName ",r.getIndividualName().toString());
+                    metadata.add("CitedResponsiblePartyName ", r.getIndividualName());
                 if(r.getOrganisationName()!=null)
                     metadata.add("CitedResponsiblePartyOrganizationName ", r.getOrganisationName().toString());
                 if(r.getPositionName()!=null)
@@ -248,7 +248,7 @@ public class GeographicInformationParser extends AbstractParser{
 
                 if(r.getContactInfo()!=null){
                     for(String s:r.getContactInfo().getAddress().getElectronicMailAddresses()) {
-                        metadata.add("CitedResponsiblePartyEMail ",s.toString());
+                        metadata.add("CitedResponsiblePartyEMail ", s);
                     }
                 }
             }
@@ -314,7 +314,7 @@ public class GeographicInformationParser extends AbstractParser{
 
                     if(g instanceof DefaultGeographicDescription){
                         if(((DefaultGeographicDescription) g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getCode()!=null )
-                            metadata.add("GeographicIdentifierCode ",((DefaultGeographicDescription) g).getGeographicIdentifier().getCode().toString());
+                            metadata.add("GeographicIdentifierCode ", ((DefaultGeographicDescription) g).getGeographicIdentifier().getCode());
                         if(((DefaultGeographicDescription) g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getTitle()!=null )
                         metadata.add("GeographicIdentifierAuthorityTitle ",((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getTitle().toString());