You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2016/03/30 21:08:20 UTC
lucene-solr:branch_6x: SOLR-8903: Move SolrJ DateUtil to
contrib/extraction as ExtractionDateUtil. And removed obsolete methods.
(cherry picked from commit 5e5fd66)
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 72f5eac2c -> 44e0ac385
SOLR-8903: Move SolrJ DateUtil to contrib/extraction as ExtractionDateUtil.
And removed obsolete methods.
(cherry picked from commit 5e5fd66)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/44e0ac38
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/44e0ac38
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/44e0ac38
Branch: refs/heads/branch_6x
Commit: 44e0ac38567e19465ebf74a160064b8a642ec6b6
Parents: 72f5eac
Author: David Smiley <ds...@apache.org>
Authored: Wed Mar 30 15:00:29 2016 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Wed Mar 30 15:07:52 2016 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 6 +
.../extraction/ExtractingRequestHandler.java | 23 +-
.../handler/extraction/ExtractionDateUtil.java | 178 +++++++++++++
.../handler/extraction/SolrContentHandler.java | 12 +-
.../extraction/TestExtractionDateUtil.java | 61 +++++
.../solr/morphlines/cell/SolrCellBuilder.java | 18 +-
.../morphlines/cell/SolrCellMorphlineTest.java | 4 +-
.../org/apache/solr/common/util/DateUtil.java | 259 -------------------
.../apache/solr/common/util/TestDateUtil.java | 35 ---
9 files changed, 270 insertions(+), 326 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0397e7a..2c5fc28 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -168,6 +168,9 @@ Upgrading from Solr 5.x
When there is a non-zero number of milliseconds, it is padded with zeros to 3 digits. Negative year (BC) dates are
now possible. Parsing: It is now an error to supply a portion of the date out of its, range, like 67 seconds.
+* SolrJ no longer includes DateUtil. If for some reason you need to format or parse dates, simply use Instant.format()
+ and Instant.parse().
+
Detailed Change List
----------------------
@@ -525,6 +528,9 @@ Other Changes
now parse (and format) dates with a leading '+' or '-' (BC dates or dates > 4 digit year.
[value] and ms() and contrib/analytics now parse with date math. (David Smiley)
+* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil. Obsolete methods were removed.
+ (David Smiley)
+
================== 5.5.1 ==================
Bug Fixes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
index af70c62..82fe633 100644
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
@@ -17,31 +17,30 @@
package org.apache.solr.handler.extraction;
+import java.io.File;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.util.DateUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.ContentStreamHandlerBase;
+import org.apache.solr.handler.loader.ContentStreamLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.security.AuthorizationContext;
import org.apache.solr.security.PermissionNameProvider;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.util.plugin.SolrCoreAware;
-import org.apache.solr.handler.ContentStreamHandlerBase;
-import org.apache.solr.handler.loader.ContentStreamLoader;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.mime.MimeTypeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.File;
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-
/**
* Handler for rich documents like PDF or Word or any other file format that Tika handles that need the text to be extracted
@@ -59,7 +58,7 @@ public class ExtractingRequestHandler extends ContentStreamHandlerBase implement
protected ParseContextConfig parseContextConfig;
- protected Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
+ protected Collection<String> dateFormats = ExtractionDateUtil.DEFAULT_DATE_FORMATS;
protected SolrContentHandlerFactory factory;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
new file mode 100644
index 0000000..b7ccf82
--- /dev/null
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.TimeZone;
+
+
+/**
+ * This class has some code from HttpClient DateUtil.
+ */
+public class ExtractionDateUtil {
+ //start HttpClient
+ /**
+ * Date format pattern used to parse HTTP date headers in RFC 1123 format.
+ */
+ public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss zzz";
+
+ /**
+ * Date format pattern used to parse HTTP date headers in RFC 1036 format.
+ */
+ public static final String PATTERN_RFC1036 = "EEEE, dd-MMM-yy HH:mm:ss zzz";
+
+ /**
+ * Date format pattern used to parse HTTP date headers in ANSI C
+ * <code>asctime()</code> format.
+ */
+ public static final String PATTERN_ASCTIME = "EEE MMM d HH:mm:ss yyyy";
+ //These are included for back compat
+ private static final Collection<String> DEFAULT_HTTP_CLIENT_PATTERNS = Arrays.asList(
+ PATTERN_ASCTIME, PATTERN_RFC1036, PATTERN_RFC1123);
+
+ private static final Date DEFAULT_TWO_DIGIT_YEAR_START;
+
+ static {
+ Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
+ calendar.set(2000, Calendar.JANUARY, 1, 0, 0);
+ DEFAULT_TWO_DIGIT_YEAR_START = calendar.getTime();
+ }
+
+ private static final TimeZone GMT = TimeZone.getTimeZone("GMT");
+
+ //end HttpClient
+
+ //---------------------------------------------------------------------------------------
+
+ /**
+ * Differs by {@link DateTimeFormatter#ISO_INSTANT} in that it's lenient.
+ */
+ public static final DateTimeFormatter ISO_8601_PARSER = new DateTimeFormatterBuilder()
+ .parseCaseInsensitive().parseLenient().appendInstant().toFormatter(Locale.ROOT);
+
+ /**
+ * A suite of default date formats that can be parsed, and thus transformed to the Solr specific format
+ */
+ public static final Collection<String> DEFAULT_DATE_FORMATS = new ArrayList<>();
+
+ static {
+ DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss'Z'");
+ DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss");
+ DEFAULT_DATE_FORMATS.add("yyyy-MM-dd");
+ DEFAULT_DATE_FORMATS.add("yyyy-MM-dd hh:mm:ss");
+ DEFAULT_DATE_FORMATS.add("yyyy-MM-dd HH:mm:ss");
+ DEFAULT_DATE_FORMATS.add("EEE MMM d hh:mm:ss z yyyy");
+ DEFAULT_DATE_FORMATS.addAll(DEFAULT_HTTP_CLIENT_PATTERNS);
+ }
+
+ /**
+ * Returns a formatter that can be use by the current thread if needed to
+ * convert Date objects to the Internal representation.
+ *
+ * @param d The input date to parse
+ * @return The parsed {@link java.util.Date}
+ * @throws java.text.ParseException If the input can't be parsed
+ */
+ public static Date parseDate(String d) throws ParseException {
+ return parseDate(d, DEFAULT_DATE_FORMATS);
+ }
+
+ public static Date parseDate(String d, Collection<String> fmts) throws ParseException {
+ if (d.length() > 0 && d.charAt(d.length() - 1) == 'Z') {
+ try {
+ return new Date(ISO_8601_PARSER.parse(d, Instant::from).toEpochMilli());
+ } catch (Exception e) {
+ //ignore; perhaps we can parse with one of the formats below...
+ }
+ }
+ return parseDate(d, fmts, null);
+ }
+
+ /**
+ * Slightly modified from org.apache.commons.httpclient.util.DateUtil.parseDate
+ * <p>
+ * Parses the date value using the given date formats.
+ *
+ * @param dateValue the date value to parse
+ * @param dateFormats the date formats to use
+ * @param startDate During parsing, two digit years will be placed in the range
+ * <code>startDate</code> to <code>startDate + 100 years</code>. This value may
+ * be <code>null</code>. When <code>null</code> is given as a parameter, year
+ * <code>2000</code> will be used.
+ * @return the parsed date
+ * @throws ParseException if none of the dataFormats could parse the dateValue
+ */
+ public static Date parseDate(
+ String dateValue,
+ Collection<String> dateFormats,
+ Date startDate
+ ) throws ParseException {
+
+ if (dateValue == null) {
+ throw new IllegalArgumentException("dateValue is null");
+ }
+ if (dateFormats == null) {
+ dateFormats = DEFAULT_HTTP_CLIENT_PATTERNS;
+ }
+ if (startDate == null) {
+ startDate = DEFAULT_TWO_DIGIT_YEAR_START;
+ }
+ // trim single quotes around date if present
+ // see issue #5279
+ if (dateValue.length() > 1
+ && dateValue.startsWith("'")
+ && dateValue.endsWith("'")
+ ) {
+ dateValue = dateValue.substring(1, dateValue.length() - 1);
+ }
+
+ //TODO upgrade to Java 8 DateTimeFormatter. But how to deal with the GMT as a default?
+ SimpleDateFormat dateParser = null;
+ Iterator formatIter = dateFormats.iterator();
+
+ while (formatIter.hasNext()) {
+ String format = (String) formatIter.next();
+ if (dateParser == null) {
+ dateParser = new SimpleDateFormat(format, Locale.ENGLISH);
+ dateParser.setTimeZone(GMT);
+ dateParser.set2DigitYearStart(startDate);
+ } else {
+ dateParser.applyPattern(format);
+ }
+ try {
+ return dateParser.parse(dateValue);
+ } catch (ParseException pe) {
+ // ignore this exception, we will try the next format
+ }
+ }
+
+ // we were unable to parse the date
+ throw new ParseException("Unable to parse the date " + dateValue, 0);
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
index 442e64c..7779451 100644
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
@@ -17,7 +17,6 @@
package org.apache.solr.handler.extraction;
import java.lang.invoke.MethodHandles;
-import java.text.DateFormat;
import java.util.ArrayDeque;
import java.util.Collection;
import java.util.Collections;
@@ -31,7 +30,6 @@ import java.util.Set;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.DateUtil;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieDateField;
@@ -83,7 +81,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
private Set<String> literalFieldNames = null;
public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
- this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
+ this(metadata, params, schema, ExtractionDateUtil.DEFAULT_DATE_FORMATS);
}
@@ -317,7 +315,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
/**
* Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField}
* <p>
- * This implementation only formats dates using the {@link org.apache.solr.common.util.DateUtil}.
+ * This implementation only formats dates using the {@link ExtractionDateUtil}.
*
* @param val The value to transform
* @param schFld The {@link org.apache.solr.schema.SchemaField}
@@ -328,10 +326,8 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
if (schFld != null && schFld.getType() instanceof TrieDateField) {
//try to transform the date
try {
- Date date = DateUtil.parseDate(val, dateFormats);
- DateFormat df = DateUtil.getThreadLocalDateFormat();
- result = df.format(date);
-
+ Date date = ExtractionDateUtil.parseDate(val, dateFormats); // may throw
+ result = date.toInstant().toString();//ISO format
} catch (Exception e) {
// Let the specific fieldType handle errors
// throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid value: " + val + " for field: " + schFld, e);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
new file mode 100644
index 0000000..9632cb9
--- /dev/null
+++ b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.text.ParseException;
+import java.util.Date;
+import java.util.Locale;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestExtractionDateUtil extends LuceneTestCase {
+
+ public void testISO8601() throws Exception {
+ // dates with atypical years
+ assertParseFormatEquals("0001-01-01T01:01:01Z", null);
+ assertParseFormatEquals("+12021-12-01T03:03:03Z", null);
+
+ assertParseFormatEquals("0000-04-04T04:04:04Z", null); // note: 0 AD is also known as 1 BC
+
+ // dates with negative years (BC)
+ assertParseFormatEquals("-0005-05-05T05:05:05Z", null);
+ assertParseFormatEquals("-2021-12-01T04:04:04Z", null);
+ assertParseFormatEquals("-12021-12-01T02:02:02Z", null);
+
+ // dates that only parse thanks to lenient mode of DateTimeFormatter
+ assertParseFormatEquals("10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z"); // missing '+' 5 digit year
+ assertParseFormatEquals("995-1-2T3:4:5Z", "0995-01-02T03:04:05Z"); // wasn't 0 padded
+ }
+
+ private static void assertParseFormatEquals(String inputStr, String expectedStr) throws ParseException {
+ if (expectedStr == null) {
+ expectedStr = inputStr;
+ }
+ Date inputDate = ExtractionDateUtil.parseDate(inputStr);
+ String resultStr = inputDate.toInstant().toString();
+ assertEquals("d:" + inputDate.getTime(), expectedStr, resultStr);
+ }
+
+ public void testParseDate() throws ParseException {
+ assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
+ }
+
+ private static void assertParsedDate(long ts, String dateStr) throws ParseException {
+ long parsed = ExtractionDateUtil.parseDate(dateStr).getTime();
+ assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)", ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
index a5c73bc..00045b2 100644
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
+++ b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
@@ -29,13 +29,19 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.io.Closeables;
+import com.typesafe.config.Config;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.DateUtil;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.handler.extraction.ExtractingParams;
+import org.apache.solr.handler.extraction.ExtractionDateUtil;
import org.apache.solr.handler.extraction.SolrContentHandler;
import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
import org.apache.solr.morphlines.solr.SolrLocator;
@@ -50,7 +56,6 @@ import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
-
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineCompilationException;
@@ -63,13 +68,6 @@ import org.kitesdk.morphline.stdio.AbstractParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
-import com.google.common.io.Closeables;
-import com.typesafe.config.Config;
-
/**
* Command that pipes the first attachment of a record into one of the given Tika parsers, then maps
* the Tika output back to a record using SolrCell.
@@ -151,7 +149,7 @@ public final class SolrCellBuilder implements CommandBuilder {
cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
}
- this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(DateUtil.DEFAULT_DATE_FORMATS));
+ this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(ExtractionDateUtil.DEFAULT_DATE_FORMATS));
String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
Class<? extends SolrContentHandlerFactory> factoryClass;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
index 9a43b42..2cdbd3c 100644
--- a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
+++ b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
@@ -25,7 +25,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.lucene.util.Constants;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
-import org.apache.solr.common.util.DateUtil;
+import org.apache.solr.handler.extraction.ExtractionDateUtil;
import org.apache.solr.handler.extraction.SolrContentHandler;
import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
import org.apache.solr.schema.IndexSchema;
@@ -270,7 +270,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
// which will cause the ContentHandler to be invoked.
metadata.set(fieldName, getFoobarWithNonChars());
StripNonCharSolrContentHandlerFactory contentHandlerFactory =
- new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS);
+ new StripNonCharSolrContentHandlerFactory(ExtractionDateUtil.DEFAULT_DATE_FORMATS);
IndexSchema schema = h.getCore().getLatestSchema();
SolrContentHandler contentHandler =
contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java
deleted file mode 100644
index 9bf7a2b..0000000
--- a/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.common.util;
-import java.io.IOException;
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Calendar;
-import java.util.Collection;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.Locale;
-import java.util.TimeZone;
-
-
-/**
- * This class has some code from HttpClient DateUtil.
- */
-public class DateUtil {
- //start HttpClient
- /**
- * Date format pattern used to parse HTTP date headers in RFC 1123 format.
- */
- public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss zzz";
-
- /**
- * Date format pattern used to parse HTTP date headers in RFC 1036 format.
- */
- public static final String PATTERN_RFC1036 = "EEEE, dd-MMM-yy HH:mm:ss zzz";
-
- /**
- * Date format pattern used to parse HTTP date headers in ANSI C
- * <code>asctime()</code> format.
- */
- public static final String PATTERN_ASCTIME = "EEE MMM d HH:mm:ss yyyy";
- //These are included for back compat
- private static final Collection<String> DEFAULT_HTTP_CLIENT_PATTERNS = Arrays.asList(
- PATTERN_ASCTIME, PATTERN_RFC1036, PATTERN_RFC1123);
-
- private static final Date DEFAULT_TWO_DIGIT_YEAR_START;
-
- static {
- Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
- calendar.set(2000, Calendar.JANUARY, 1, 0, 0);
- DEFAULT_TWO_DIGIT_YEAR_START = calendar.getTime();
- }
-
- private static final TimeZone GMT = TimeZone.getTimeZone("GMT");
-
- //end HttpClient
-
- //---------------------------------------------------------------------------------------
-
- /**
- * A suite of default date formats that can be parsed, and thus transformed to the Solr specific format
- */
- public static final Collection<String> DEFAULT_DATE_FORMATS = new ArrayList<>();
-
- static {
- DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss'Z'");
- DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss");
- DEFAULT_DATE_FORMATS.add("yyyy-MM-dd");
- DEFAULT_DATE_FORMATS.add("yyyy-MM-dd hh:mm:ss");
- DEFAULT_DATE_FORMATS.add("yyyy-MM-dd HH:mm:ss");
- DEFAULT_DATE_FORMATS.add("EEE MMM d hh:mm:ss z yyyy");
- DEFAULT_DATE_FORMATS.addAll(DEFAULT_HTTP_CLIENT_PATTERNS);
- }
-
- /**
- * Returns a formatter that can be use by the current thread if needed to
- * convert Date objects to the Internal representation.
- *
- * @param d The input date to parse
- * @return The parsed {@link java.util.Date}
- * @throws java.text.ParseException If the input can't be parsed
- */
- public static Date parseDate(String d) throws ParseException {
- return parseDate(d, DEFAULT_DATE_FORMATS);
- }
-
- public static Date parseDate(String d, Collection<String> fmts) throws ParseException {
- // 2007-04-26T08:05:04Z
- if (d.endsWith("Z") && d.length() > 20) {
- return getThreadLocalDateFormat().parse(d);
- }
- return parseDate(d, fmts, null);
- }
-
- /**
- * Slightly modified from org.apache.commons.httpclient.util.DateUtil.parseDate
- * <p>
- * Parses the date value using the given date formats.
- *
- * @param dateValue the date value to parse
- * @param dateFormats the date formats to use
- * @param startDate During parsing, two digit years will be placed in the range
- * <code>startDate</code> to <code>startDate + 100 years</code>. This value may
- * be <code>null</code>. When <code>null</code> is given as a parameter, year
- * <code>2000</code> will be used.
- * @return the parsed date
- * @throws ParseException if none of the dataFormats could parse the dateValue
- */
- public static Date parseDate(
- String dateValue,
- Collection<String> dateFormats,
- Date startDate
- ) throws ParseException {
-
- if (dateValue == null) {
- throw new IllegalArgumentException("dateValue is null");
- }
- if (dateFormats == null) {
- dateFormats = DEFAULT_HTTP_CLIENT_PATTERNS;
- }
- if (startDate == null) {
- startDate = DEFAULT_TWO_DIGIT_YEAR_START;
- }
- // trim single quotes around date if present
- // see issue #5279
- if (dateValue.length() > 1
- && dateValue.startsWith("'")
- && dateValue.endsWith("'")
- ) {
- dateValue = dateValue.substring(1, dateValue.length() - 1);
- }
-
- SimpleDateFormat dateParser = null;
- Iterator formatIter = dateFormats.iterator();
-
- while (formatIter.hasNext()) {
- String format = (String) formatIter.next();
- if (dateParser == null) {
- dateParser = new SimpleDateFormat(format, Locale.ENGLISH);
- dateParser.setTimeZone(GMT);
- dateParser.set2DigitYearStart(startDate);
- } else {
- dateParser.applyPattern(format);
- }
- try {
- return dateParser.parse(dateValue);
- } catch (ParseException pe) {
- // ignore this exception, we will try the next format
- }
- }
-
- // we were unable to parse the date
- throw new ParseException("Unable to parse the date " + dateValue, 0);
- }
-
-
- /**
- * Returns a formatter that can be use by the current thread if needed to
- * convert Date objects to the Internal representation.
- *
- * @return The {@link java.text.DateFormat} for the current thread
- */
- public static DateFormat getThreadLocalDateFormat() {
- return fmtThreadLocal.get();
- }
-
- public static TimeZone UTC = TimeZone.getTimeZone("UTC");
- private static ThreadLocalDateFormat fmtThreadLocal = new ThreadLocalDateFormat();
-
- private static class ThreadLocalDateFormat extends ThreadLocal<DateFormat> {
- DateFormat proto;
-
- public ThreadLocalDateFormat() {
- super();
- //2007-04-26T08:05:04Z
- SimpleDateFormat tmp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
- tmp.setTimeZone(UTC);
- proto = tmp;
- }
-
- @Override
- protected DateFormat initialValue() {
- return (DateFormat) proto.clone();
- }
- }
-
- /** Formats the date and returns the calendar instance that was used (which may be reused) */
- public static Calendar formatDate(Date date, Calendar cal, Appendable out) throws IOException {
- // using a stringBuilder for numbers can be nice since
- // a temporary string isn't used (it's added directly to the
- // builder's buffer.
-
- StringBuilder sb = out instanceof StringBuilder ? (StringBuilder)out : new StringBuilder();
- if (cal==null) cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
- cal.setTime(date);
-
- int i = cal.get(Calendar.YEAR);
- sb.append(i);
- sb.append('-');
- i = cal.get(Calendar.MONTH) + 1; // 0 based, so add 1
- if (i<10) sb.append('0');
- sb.append(i);
- sb.append('-');
- i=cal.get(Calendar.DAY_OF_MONTH);
- if (i<10) sb.append('0');
- sb.append(i);
- sb.append('T');
- i=cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format
- if (i<10) sb.append('0');
- sb.append(i);
- sb.append(':');
- i=cal.get(Calendar.MINUTE);
- if (i<10) sb.append('0');
- sb.append(i);
- sb.append(':');
- i=cal.get(Calendar.SECOND);
- if (i<10) sb.append('0');
- sb.append(i);
- i=cal.get(Calendar.MILLISECOND);
- if (i != 0) {
- sb.append('.');
- if (i<100) sb.append('0');
- if (i<10) sb.append('0');
- sb.append(i);
-
- // handle canonical format specifying fractional
- // seconds shall not end in '0'. Given the slowness of
- // integer div/mod, simply checking the last character
- // is probably the fastest way to check.
- int lastIdx = sb.length()-1;
- if (sb.charAt(lastIdx)=='0') {
- lastIdx--;
- if (sb.charAt(lastIdx)=='0') {
- lastIdx--;
- }
- sb.setLength(lastIdx+1);
- }
-
- }
- sb.append('Z');
-
- if (out != sb)
- out.append(sb);
-
- return cal;
- }
-
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44e0ac38/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java b/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
deleted file mode 100644
index 1eab4f5..0000000
--- a/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.common.util;
-
-import java.text.ParseException;
-import java.util.Locale;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestDateUtil extends LuceneTestCase {
-
- public void testParseDate() throws ParseException {
- assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
- }
-
- private static void assertParsedDate(long ts, String dateStr) throws ParseException {
- long parsed = DateUtil.parseDate(dateStr).getTime();
- assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)", ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
- }
-
-}