You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/11/19 12:53:21 UTC
[tika] branch main updated: Simplify some call of String operation
(#383)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 7ed64a5 Simplify some call of String operation (#383)
7ed64a5 is described below
commit 7ed64a5bd8e564bbe8fce0948a71c1db249fa2fb
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Thu Nov 19 20:53:12 2020 +0800
Simplify some call of String operation (#383)
* Simplify some call of StringBuilder#append
* Simplify some call of String#substring
* Remove unnecessary call of #toString
---
tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java | 2 +-
.../src/main/java/org/apache/tika/sax/DIFContentHandler.java | 2 +-
.../apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java | 4 ++--
.../java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java | 2 +-
.../org/apache/tika/parser/microsoft/AbstractListManager.java | 2 +-
.../org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java | 2 +-
.../main/java/org/apache/tika/parser/microsoft/ListManager.java | 2 +-
.../parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java | 2 +-
.../main/java/org/apache/tika/parser/dif/DIFContentHandler.java | 2 +-
.../src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java | 2 +-
.../java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java | 2 +-
.../apache/tika/parser/geoinfo/GeographicInformationParser.java | 8 ++++----
12 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java b/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java
index e63fea5..e352e35 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/CleanPhoneText.java
@@ -46,7 +46,7 @@ public class CleanPhoneText {
if (start > 0) {
prefix = text.substring(start-1, start);
}
- if (digits.substring(0, 2).equals("82") && prefix.equals("*")) {
+ if (digits.startsWith("82") && prefix.equals("*")) {
// this number overlaps with a *82 sequence
idx += 2;
} else {
diff --git a/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java
index df29e5a..7881dd6 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/DIFContentHandler.java
@@ -53,7 +53,7 @@ public class DIFContentHandler extends DefaultHandler {
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
- String value = (new String(ch, start, length)).toString();
+ String value = new String(ch, start, length);
this.dataStack.push(value);
if (this.treeStack.peek().equals("Entry_Title")) {
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
index 8a314d8..781760d 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
@@ -171,7 +171,7 @@ public class ProbabilisticMimeDetectionTestWithTika {
try {
Metadata metadata = new Metadata();
// String mime = this.proDetector.detect(in, metadata).toString();
- String mime = tika.detect(in, metadata).toString();
+ String mime = tika.detect(in, metadata);
assertEquals(
urlOrFileName + " is not properly detected: detected.",
expected, mime);
@@ -179,7 +179,7 @@ public class ProbabilisticMimeDetectionTestWithTika {
// Add resource name and test again
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, urlOrFileName);
// mime = this.proDetector.detect(in, metadata).toString();
- mime = tika.detect(in, metadata).toString();
+ mime = tika.detect(in, metadata);
assertEquals(urlOrFileName
+ " is not properly detected after adding resource name.",
expected, mime);
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index ae8e924..f9bceff 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -150,7 +150,7 @@ public class TesseractOCRParserTest extends TikaTest {
String contents = runOCR(resource, nonOCRContains, numMetadatas,
BasicContentHandlerFactory.HANDLER_TYPE.TEXT, TesseractOCRConfig.OUTPUT_TYPE.TXT);
if (canRun()) {
- if(resource.substring(resource.lastIndexOf('.'), resource.length()).equals(".jpg")) {
+ if(resource.substring(resource.lastIndexOf('.')).equals(".jpg")) {
assertContains("Apache", contents);
} else {
assertContains("Happy New Year 2003!", contents);
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java
index 2c02dfc..26dc0a0 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractListManager.java
@@ -112,7 +112,7 @@ public abstract class AbstractListManager {
Matcher m = LEVEL_INTERPOLATOR.matcher(lvlText);
int last = 0;
while (m.find()) {
- sb.append(lvlText.substring(last, m.start()));
+ sb.append(lvlText, last, m.start());
String lvlString = m.group(1);
int lvlNum = -1;
try {
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
index 2b8a603..86bf8f1 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
@@ -238,7 +238,7 @@ abstract class AbstractPOIFSExtractor {
}
// Record what we can do about it
- metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
+ metadata.set(Metadata.CONTENT_TYPE, mediaType.getType());
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, rName + extension);
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
index 6c5a5f1..a19adc6 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
@@ -150,7 +150,7 @@ public class ListManager extends AbstractListManager {
//silently stop
break;
}
- sb.append(numberText.substring(last, offset - 1));
+ sb.append(numberText, last, offset - 1);
//need to add one because newer format
//adds one. In .doc, this was the array index;
//but in .docx, this is the level number
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
index 0629610..f387999 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
@@ -371,7 +371,7 @@ public class SXSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
xhtml.startElement("b");
boolean authorExists = false;
if (authorString != null) {
- xhtml.characters(authorString.toString());
+ xhtml.characters(authorString);
authorExists = true;
}
if (authorExists && authorInitials != null) {
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java b/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java
index cc11316..c30f05f 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java
+++ b/tika-parser-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/dif/DIFContentHandler.java
@@ -53,7 +53,7 @@ public class DIFContentHandler extends DefaultHandler {
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
- String value = (new String(ch, start, length)).toString();
+ String value = new String(ch, start, length);
this.dataStack.push(value);
if (this.treeStack.peek().equals("Entry_Title")) {
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java
index 82fd39b..7deb427 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java
+++ b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/hwp/HwpV5ParserTest.java
@@ -42,7 +42,7 @@ public class HwpV5ParserTest extends MultiThreadedTikaTest {
assertEquals("Apache Tika", metadata.get(TikaCoreProperties.TITLE));
assertEquals("SooMyung Lee", metadata.get(TikaCoreProperties.CREATOR));
- assertContains("Apache Tika", result.xml.toString());
+ assertContains("Apache Tika", result.xml);
}
}
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java
index 3a0774c..10c78e2 100644
--- a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/regex/RegexNERecogniser.java
@@ -85,7 +85,7 @@ public class RegexNERecogniser implements NERecogniser {
continue;
}
String type = line.substring(0, delim).trim();
- String patternStr = line.substring(delim+1, line.length()).trim();
+ String patternStr = line.substring(delim+1).trim();
patterns.put(type, Pattern.compile(patternStr));
entityTypes.add(type);
}
diff --git a/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java b/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
index 0d46ac7..d4b2a10 100644
--- a/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
+++ b/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
@@ -147,7 +147,7 @@ public class GeographicInformationParser extends AbstractParser{
xhtmlContentHandler.startElement("h3");
xhtmlContentHandler.newline();
xhtmlContentHandler.characters("CitedResponsiblePartyRole " + r.getRole().toString());
- xhtmlContentHandler.characters("CitedResponsiblePartyName " + r.getIndividualName().toString());
+ xhtmlContentHandler.characters("CitedResponsiblePartyName " + r.getIndividualName());
xhtmlContentHandler.endElement("h3");
xhtmlContentHandler.newline();
}
@@ -240,7 +240,7 @@ public class GeographicInformationParser extends AbstractParser{
if(r.getRole()!=null)
metadata.add("CitedResponsiblePartyRole ",r.getRole().toString());
if(r.getIndividualName()!=null)
- metadata.add("CitedResponsiblePartyName ",r.getIndividualName().toString());
+ metadata.add("CitedResponsiblePartyName ", r.getIndividualName());
if(r.getOrganisationName()!=null)
metadata.add("CitedResponsiblePartyOrganizationName ", r.getOrganisationName().toString());
if(r.getPositionName()!=null)
@@ -248,7 +248,7 @@ public class GeographicInformationParser extends AbstractParser{
if(r.getContactInfo()!=null){
for(String s:r.getContactInfo().getAddress().getElectronicMailAddresses()) {
- metadata.add("CitedResponsiblePartyEMail ",s.toString());
+ metadata.add("CitedResponsiblePartyEMail ", s);
}
}
}
@@ -314,7 +314,7 @@ public class GeographicInformationParser extends AbstractParser{
if(g instanceof DefaultGeographicDescription){
if(((DefaultGeographicDescription) g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getCode()!=null )
- metadata.add("GeographicIdentifierCode ",((DefaultGeographicDescription) g).getGeographicIdentifier().getCode().toString());
+ metadata.add("GeographicIdentifierCode ", ((DefaultGeographicDescription) g).getGeographicIdentifier().getCode());
if(((DefaultGeographicDescription) g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getTitle()!=null )
metadata.add("GeographicIdentifierAuthorityTitle ",((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getTitle().toString());