You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2018/05/22 16:06:03 UTC
commons-io git commit: [IO-577] Add readers to filter out given
characters: CharacterSetFilterReader and CharacterFilterReader.
Repository: commons-io
Updated Branches:
refs/heads/master bc10af423 -> 7791a851c
[IO-577] Add readers to filter out given characters:
CharacterSetFilterReader and CharacterFilterReader.
Project: http://git-wip-us.apache.org/repos/asf/commons-io/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-io/commit/7791a851
Tree: http://git-wip-us.apache.org/repos/asf/commons-io/tree/7791a851
Diff: http://git-wip-us.apache.org/repos/asf/commons-io/diff/7791a851
Branch: refs/heads/master
Commit: 7791a851c4a303f47743b156007fb19c6ed8ed81
Parents: bc10af4
Author: Gary Gregory <ga...@gmail.com>
Authored: Tue May 22 10:05:59 2018 -0600
Committer: Gary Gregory <ga...@gmail.com>
Committed: Tue May 22 10:05:59 2018 -0600
----------------------------------------------------------------------
src/changes/changes.xml | 3 +
.../io/input/AbstractCharacterFilterReader.java | 74 +++++++++++
.../commons/io/input/CharacterFilterReader.java | 48 +++++++
.../io/input/CharacterSetFilterReader.java | 55 ++++++++
.../io/input/CharacterFilterReaderTest.java | 72 ++++++++++
.../io/input/CharacterSetFilterReaderTest.java | 131 +++++++++++++++++++
6 files changed, 383 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 9f6fd85..f296eb0 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -65,6 +65,9 @@ The <action> type attribute can be add,update,fix,remove.
<action issue="IO-572" dev="ggregory" type="update" due-to="Pranet Verma">
Refactor duplicate code in org.apache.commons.io.FileUtils.
</action>
+ <action issue="IO-577" dev="ggregory" type="add" due-to="Gary Gregory">
+ Add readers to filter out given characters: CharacterSetFilterReader and CharacterFilterReader.
+ </action>
</release>
<release version="2.6" date="2017-10-15" description="Java 7 required, Java 9 supported.">
http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java b/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java
new file mode 100644
index 0000000..8deeebe
--- /dev/null
+++ b/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * A filter reader that filters out characters where subclasses decide which characters to filter out.
+ */
+public abstract class AbstractCharacterFilterReader extends FilterReader {
+
+ /**
+ * Constructs a new reader.
+ *
+ * @param reader
+ * the reader to filter
+ */
+ protected AbstractCharacterFilterReader(final Reader reader) {
+ super(reader);
+ }
+
+ @Override
+ public int read() throws IOException {
+ int ch;
+ do {
+ ch = in.read();
+ } while (filter(ch));
+ return ch;
+ }
+
+ /**
+ * Returns true if the given character should be filtered out, false to keep the character.
+ *
+ * @param ch
+ * the character to test.
+ * @return true if the given character should be filtered out, false to keep the character.
+ */
+ protected abstract boolean filter(int ch);
+
+ @Override
+ public int read(final char[] cbuf, final int off, final int len) throws IOException {
+ final int read = super.read(cbuf, off, len);
+ if (read == -1) {
+ return -1;
+ }
+ int pos = off - 1;
+ for (int readPos = off; readPos < off + read; readPos++) {
+ if (filter(read)) {
+ continue;
+ }
+ pos++;
+ if (pos < readPos) {
+ cbuf[pos] = cbuf[readPos];
+ }
+ }
+ return pos - off + 1;
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java b/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java
new file mode 100644
index 0000000..092e0f5
--- /dev/null
+++ b/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.Reader;
+
+/**
+ * A filter reader that filters out a given character represented as an <code>int</code> code point, handy to remove
+ * known junk characters from CSV files for example. This class is the most efficient way to filter out a single
+ * character, as opposed to using a {@link CharacterSetFilterReader}. You can also nest {@link CharacterFilterReader}s.
+ */
+public class CharacterFilterReader extends AbstractCharacterFilterReader {
+
+ private final int skip;
+
+ /**
+ * Constructs a new reader.
+ *
+ * @param reader
+ * the reader to filter.
+ * @param skip
+ * the character to filter out.
+ */
+ public CharacterFilterReader(final Reader reader, final int skip) {
+ super(reader);
+ this.skip = skip;
+ }
+
+ @Override
+ protected boolean filter(final int ch) {
+ return ch == skip;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java b/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java
new file mode 100644
index 0000000..2810a40
--- /dev/null
+++ b/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.Reader;
+import java.util.Collections;
+import java.util.Set;
+
+/**
+ * A filter reader that removes a given set of characters represented as <code>int</code> code points, handy to remove
+ * known junk characters from CSV files for example.
+ * <p>
+ * This class must convert each <code>int</code> read to an <code>Integer</code>. You can increase the Integer cache
+ * with a system property, see {@link Integer}.
+ * </p>
+ */
+public class CharacterSetFilterReader extends AbstractCharacterFilterReader {
+
+ private static final Set<Integer> EMPTY_SET = Collections.emptySet();
+ private final Set<Integer> skipSet;
+
+ /**
+ * Constructs a new reader.
+ *
+ * @param reader
+ * the reader to filter.
+ * @param skip
+ * the set of characters to filter out.
+ */
+ public CharacterSetFilterReader(final Reader reader, final Set<Integer> skip) {
+ super(reader);
+ this.skipSet = skip == null ? EMPTY_SET : Collections.unmodifiableSet(skip);
+ }
+
+ @Override
+ protected boolean filter(final int ch) {
+ // Note WRT Integer.valueOf(): You can increase the Integer cache with a system property, see {@link Integer}.
+ return skipSet.contains(Integer.valueOf(ch));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java b/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java
new file mode 100644
index 0000000..fda062d
--- /dev/null
+++ b/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class CharacterFilterReaderTest {
+
+ @Test
+ public void testInputSize0FilterSize1() throws IOException {
+ final StringReader input = new StringReader("");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('a'));
+ try (CharacterFilterReader reader = new CharacterFilterReader(input, 'A')) {
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize1FilterSize1() throws IOException {
+ try (StringReader input = new StringReader("a");
+ CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) {
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize1FilterAll() throws IOException {
+ final StringReader input = new StringReader("aa");
+ try (CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) {
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize1FilterFirst() throws IOException {
+ final StringReader input = new StringReader("ab");
+ try (CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) {
+ Assert.assertEquals('b', reader.read());
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize1FilterLast() throws IOException {
+ final StringReader input = new StringReader("ab");
+ try (CharacterFilterReader reader = new CharacterFilterReader(input, 'b')) {
+ Assert.assertEquals('a', reader.read());
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java b/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java
new file mode 100644
index 0000000..f1a2dc0
--- /dev/null
+++ b/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class CharacterSetFilterReaderTest {
+
+ @Test
+ public void testInputSize0FilterSize0() throws IOException {
+ final StringReader input = new StringReader("");
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, new HashSet<Integer>(0))) {
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize0FilterSize1() throws IOException {
+ final StringReader input = new StringReader("");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('a'));
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize0NullFilter() throws IOException {
+ final StringReader input = new StringReader("");
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, null)) {
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize1FilterSize1() throws IOException {
+ try (StringReader input = new StringReader("a")) {
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('a'));
+ final CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints);
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize1FilterAll() throws IOException {
+ final StringReader input = new StringReader("aa");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('a'));
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize1FilterFirst() throws IOException {
+ final StringReader input = new StringReader("ab");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('a'));
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+ Assert.assertEquals('b', reader.read());
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize1FilterLast() throws IOException {
+ final StringReader input = new StringReader("ab");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('b'));
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+ Assert.assertEquals('a', reader.read());
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize2FilterFirst() throws IOException {
+ final StringReader input = new StringReader("ab");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('a'));
+ codePoints.add(Integer.valueOf('y'));
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+ Assert.assertEquals('b', reader.read());
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize2FilterLast() throws IOException {
+ final StringReader input = new StringReader("ab");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('x'));
+ codePoints.add(Integer.valueOf('b'));
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+ Assert.assertEquals('a', reader.read());
+ Assert.assertEquals(-1, reader.read());
+ }
+ }
+
+ @Test
+ public void testInputSize2FilterSize2FilterNone() throws IOException {
+ final StringReader input = new StringReader("ab");
+ final HashSet<Integer> codePoints = new HashSet<>();
+ codePoints.add(Integer.valueOf('x'));
+ codePoints.add(Integer.valueOf('y'));
+ try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+ Assert.assertEquals('a', reader.read());
+ Assert.assertEquals('b', reader.read());
+ }
+ }
+}