You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@creadur.apache.org by se...@apache.org on 2013/05/19 04:24:19 UTC
svn commit: r1484209 -
/creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java
Author: sebb
Date: Sun May 19 02:24:19 2013
New Revision: 1484209
URL: http://svn.apache.org/r1484209
Log:
RAT-138 RAT runs very slowly on some input
Ensure buffer only grows sufficiently large to allow a match
Speeds up processing considerably
Modified:
creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java
Modified: creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java
URL: http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java?rev=1484209&r1=1484208&r2=1484209&view=diff
==============================================================================
--- creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java (original)
+++ creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java Sun May 19 02:24:19 2013
@@ -38,7 +38,14 @@ import org.apache.rat.api.MetaData.Datum
public class FullTextMatchingLicense extends BaseLicense
implements IHeaderMatcher {
+ // Number of match characters assumed to be present on first line
+ private static final int DEFAULT_INITIAL_LINE_LENGTH = 20;
+
private String fullText;
+
+ private String firstLine;
+
+ private boolean seenFirstLine = false;
private final StringBuilder buffer = new StringBuilder();
@@ -54,25 +61,60 @@ public class FullTextMatchingLicense ext
}
public final void setFullText(String text) {
+ int offset = text.indexOf('\n');
+ if (offset == -1) {
+ offset = Math.min(DEFAULT_INITIAL_LINE_LENGTH, text.length());
+ }
+ firstLine = prune(text.substring(0, offset)).toLowerCase(Locale.ENGLISH);
fullText = prune(text).toLowerCase(Locale.ENGLISH);
+ init();
}
public final boolean hasFullText() {
return fullText != null;
}
- // TODO this is still quite inefficient if the match does not occur near the start of the buffer
- // see RAT-138
public boolean match(Document subject, String line) throws RatHeaderAnalysisException {
- buffer.append(prune(line).toLowerCase(Locale.ENGLISH));
- if (buffer.toString().contains(fullText)) {
- reportOnLicense(subject);
- return true;
+ final String inputToMatch = prune(line).toLowerCase(Locale.ENGLISH);
+ if (seenFirstLine) { // Accumulate more input
+ buffer.append(inputToMatch);
+ } else {
+ int offset = inputToMatch.indexOf(firstLine);
+ if (offset >= 0) {
+ // we have a match, save the text starting with the match
+ buffer.append(inputToMatch.substring(offset));
+ seenFirstLine = true;
+ // Drop out to check whether full text is matched
+ } else {
+ // we assume that the first line must appear in a single line
+ return false; // no more to do here
+ }
+ }
+
+ if (buffer.length() >= fullText.length()) { // we have enough data to match
+ if (buffer.toString().contains(fullText)) {
+ reportOnLicense(subject);
+ return true; // we found a match
+ } else { // buffer contains first line but does not contain full text
+ // It's possible that the buffer contains the first line again
+ int offset = buffer.substring(1).toString().indexOf(firstLine);
+ if (offset >= 0) { // first line found again
+ buffer.delete(0,offset); // reset buffer to the new start
+ } else { // buffer does not even contain first line, so cannot be used to match full text
+ init();
+ }
+ }
}
return false;
}
public void reset() {
+ init();
+ }
+
+ // This is called indirectly from a ctor so must be final or private
+ private void init() {
buffer.setLength(0);
+ seenFirstLine = false;
}
}
\ No newline at end of file