You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2018/05/22 16:06:03 UTC

commons-io git commit: [IO-577] Add readers to filter out given characters: CharacterSetFilterReader and CharacterFilterReader.

Repository: commons-io
Updated Branches:
  refs/heads/master bc10af423 -> 7791a851c


[IO-577] Add readers to filter out given characters:
CharacterSetFilterReader and CharacterFilterReader.

Project: http://git-wip-us.apache.org/repos/asf/commons-io/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-io/commit/7791a851
Tree: http://git-wip-us.apache.org/repos/asf/commons-io/tree/7791a851
Diff: http://git-wip-us.apache.org/repos/asf/commons-io/diff/7791a851

Branch: refs/heads/master
Commit: 7791a851c4a303f47743b156007fb19c6ed8ed81
Parents: bc10af4
Author: Gary Gregory <ga...@gmail.com>
Authored: Tue May 22 10:05:59 2018 -0600
Committer: Gary Gregory <ga...@gmail.com>
Committed: Tue May 22 10:05:59 2018 -0600

----------------------------------------------------------------------
 src/changes/changes.xml                         |   3 +
 .../io/input/AbstractCharacterFilterReader.java |  74 +++++++++++
 .../commons/io/input/CharacterFilterReader.java |  48 +++++++
 .../io/input/CharacterSetFilterReader.java      |  55 ++++++++
 .../io/input/CharacterFilterReaderTest.java     |  72 ++++++++++
 .../io/input/CharacterSetFilterReaderTest.java  | 131 +++++++++++++++++++
 6 files changed, 383 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 9f6fd85..f296eb0 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -65,6 +65,9 @@ The <action> type attribute can be add,update,fix,remove.
       <action issue="IO-572" dev="ggregory" type="update" due-to="Pranet Verma">
         Refactor duplicate code in org.apache.commons.io.FileUtils.
       </action>
+      <action issue="IO-577" dev="ggregory" type="add" due-to="Gary Gregory">
+        Add readers to filter out given characters: CharacterSetFilterReader and CharacterFilterReader.
+      </action>
     </release>
 
     <release version="2.6" date="2017-10-15" description="Java 7 required, Java 9 supported.">

http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java b/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java
new file mode 100644
index 0000000..8deeebe
--- /dev/null
+++ b/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * A filter reader that filters out characters where subclasses decide which characters to filter out.
+ */
+public abstract class AbstractCharacterFilterReader extends FilterReader {
+
+    /**
+     * Constructs a new reader.
+     *
+     * @param reader
+     *            the reader to filter
+     */
+    protected AbstractCharacterFilterReader(final Reader reader) {
+        super(reader);
+    }
+
+    @Override
+    public int read() throws IOException {
+        int ch;
+        do {
+            ch = in.read();
+        } while (filter(ch));
+        return ch;
+    }
+
+    /**
+     * Returns true if the given character should be filtered out, false to keep the character.
+     * 
+     * @param ch
+     *            the character to test.
+     * @return true if the given character should be filtered out, false to keep the character.
+     */
+    protected abstract boolean filter(int ch);
+
+    @Override
+    public int read(final char[] cbuf, final int off, final int len) throws IOException {
+        final int read = super.read(cbuf, off, len);
+        if (read == -1) {
+            return -1;
+        }
+        int pos = off - 1;
+        for (int readPos = off; readPos < off + read; readPos++) {
+            if (filter(read)) {
+                continue;
+            }
+            pos++;
+            if (pos < readPos) {
+                cbuf[pos] = cbuf[readPos];
+            }
+        }
+        return pos - off + 1;
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java b/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java
new file mode 100644
index 0000000..092e0f5
--- /dev/null
+++ b/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.Reader;
+
+/**
+ * A filter reader that filters out a given character represented as an <code>int</code> code point, handy to remove
+ * known junk characters from CSV files for example. This class is the most efficient way to filter out a single
+ * character, as opposed to using a {@link CharacterSetFilterReader}. You can also nest {@link CharacterFilterReader}s.
+ */
+public class CharacterFilterReader extends AbstractCharacterFilterReader {
+
+    private final int skip;
+
+    /**
+     * Constructs a new reader.
+     *
+     * @param reader
+     *            the reader to filter.
+     * @param skip
+     *            the character to filter out.
+     */
+    public CharacterFilterReader(final Reader reader, final int skip) {
+        super(reader);
+        this.skip = skip;
+    }
+
+    @Override
+    protected boolean filter(final int ch) {
+        return ch == skip;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java b/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java
new file mode 100644
index 0000000..2810a40
--- /dev/null
+++ b/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.Reader;
+import java.util.Collections;
+import java.util.Set;
+
+/**
+ * A filter reader that removes a given set of characters represented as <code>int</code> code points, handy to remove
+ * known junk characters from CSV files for example.
+ * <p>
+ * This class must convert each <code>int</code> read to an <code>Integer</code>. You can increase the Integer cache
+ * with a system property, see {@link Integer}.
+ * </p>
+ */
+public class CharacterSetFilterReader extends AbstractCharacterFilterReader {
+
+    private static final Set<Integer> EMPTY_SET = Collections.emptySet();
+    private final Set<Integer> skipSet;
+
+    /**
+     * Constructs a new reader.
+     *
+     * @param reader
+     *            the reader to filter.
+     * @param skip
+     *            the set of characters to filter out.
+     */
+    public CharacterSetFilterReader(final Reader reader, final Set<Integer> skip) {
+        super(reader);
+        this.skipSet = skip == null ? EMPTY_SET : Collections.unmodifiableSet(skip);
+    }
+
+    @Override
+    protected boolean filter(final int ch) {
+        // Note WRT Integer.valueOf(): You can increase the Integer cache with a system property, see {@link Integer}.
+        return skipSet.contains(Integer.valueOf(ch));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java b/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java
new file mode 100644
index 0000000..fda062d
--- /dev/null
+++ b/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class CharacterFilterReaderTest {
+
+    @Test
+    public void testInputSize0FilterSize1() throws IOException {
+        final StringReader input = new StringReader("");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('a'));
+        try (CharacterFilterReader reader = new CharacterFilterReader(input, 'A')) {
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize1FilterSize1() throws IOException {
+        try (StringReader input = new StringReader("a");
+                CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) {
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize1FilterAll() throws IOException {
+        final StringReader input = new StringReader("aa");
+        try (CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) {
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize1FilterFirst() throws IOException {
+        final StringReader input = new StringReader("ab");
+        try (CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) {
+            Assert.assertEquals('b', reader.read());
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize1FilterLast() throws IOException {
+        final StringReader input = new StringReader("ab");
+        try (CharacterFilterReader reader = new CharacterFilterReader(input, 'b')) {
+            Assert.assertEquals('a', reader.read());
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java b/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java
new file mode 100644
index 0000000..f1a2dc0
--- /dev/null
+++ b/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class CharacterSetFilterReaderTest {
+
+    @Test
+    public void testInputSize0FilterSize0() throws IOException {
+        final StringReader input = new StringReader("");
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, new HashSet<Integer>(0))) {
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize0FilterSize1() throws IOException {
+        final StringReader input = new StringReader("");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('a'));
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize0NullFilter() throws IOException {
+        final StringReader input = new StringReader("");
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, null)) {
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize1FilterSize1() throws IOException {
+        try (StringReader input = new StringReader("a")) {
+            final HashSet<Integer> codePoints = new HashSet<>();
+            codePoints.add(Integer.valueOf('a'));
+            final CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints);
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize1FilterAll() throws IOException {
+        final StringReader input = new StringReader("aa");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('a'));
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize1FilterFirst() throws IOException {
+        final StringReader input = new StringReader("ab");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('a'));
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+            Assert.assertEquals('b', reader.read());
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize1FilterLast() throws IOException {
+        final StringReader input = new StringReader("ab");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('b'));
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+            Assert.assertEquals('a', reader.read());
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize2FilterFirst() throws IOException {
+        final StringReader input = new StringReader("ab");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('a'));
+        codePoints.add(Integer.valueOf('y'));
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+            Assert.assertEquals('b', reader.read());
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize2FilterLast() throws IOException {
+        final StringReader input = new StringReader("ab");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('x'));
+        codePoints.add(Integer.valueOf('b'));
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+            Assert.assertEquals('a', reader.read());
+            Assert.assertEquals(-1, reader.read());
+        }
+    }
+
+    @Test
+    public void testInputSize2FilterSize2FilterNone() throws IOException {
+        final StringReader input = new StringReader("ab");
+        final HashSet<Integer> codePoints = new HashSet<>();
+        codePoints.add(Integer.valueOf('x'));
+        codePoints.add(Integer.valueOf('y'));
+        try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) {
+            Assert.assertEquals('a', reader.read());
+            Assert.assertEquals('b', reader.read());
+        }
+    }
+}