You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@netbeans.apache.org by ma...@apache.org on 2018/11/03 22:32:31 UTC
[incubator-netbeans] 02/03: Improve entity parsing,
supporting multi-line entity definitions
This is an automated email from the ASF dual-hosted git repository.
matthiasblaesing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-netbeans.git
commit 85e8b4781b4782b7091ea450ecd94ccc033a782e
Author: Matthias Bläsing <mb...@doppel-helix.eu>
AuthorDate: Sun Oct 14 17:03:25 2018 +0200
Improve entity parsing, supporting multi-line entity definitions
---
ide/schema2beans/nbproject/project.properties | 2 +-
.../modules/schema2beansdev/DocDefParser.java | 6 +-
.../modules/schema2beansdev/EntityParser.java | 151 +++++++--------------
ide/schema2beans/test/unit/data/TestEntity.dtd | 57 ++++++++
.../tests/SecondaryTest/testEntityParser.pass | 55 ++++++++
.../test/unit/src/tests/SecondaryTest.java | 74 ++++++++++
6 files changed, 237 insertions(+), 108 deletions(-)
diff --git a/ide/schema2beans/nbproject/project.properties b/ide/schema2beans/nbproject/project.properties
index 75849c7..5e0c56d 100644
--- a/ide/schema2beans/nbproject/project.properties
+++ b/ide/schema2beans/nbproject/project.properties
@@ -18,4 +18,4 @@
is.autoload=true
cp.extra=\
${ant.core.lib}
-javac.source=1.6
+javac.source=1.8
diff --git a/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/DocDefParser.java b/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/DocDefParser.java
index 7c919e2..1acb44f 100644
--- a/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/DocDefParser.java
+++ b/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/DocDefParser.java
@@ -82,8 +82,10 @@ public class DocDefParser extends GeneralParser implements SchemaParser {
if (schemaIn == null) {
schemaIn = new FileInputStream(filename);
}
- EntityParser entityParser = new EntityParser(new InputStreamReader(schemaIn));
- entityParser.parse();
+ EntityParser entityParser = new EntityParser();
+ try (Reader r = new InputStreamReader(schemaIn)) {
+ entityParser.parse(r);
+ }
reader = entityParser.getReader();
}
diff --git a/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/EntityParser.java b/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/EntityParser.java
index 2a5fb52..b0a0449 100644
--- a/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/EntityParser.java
+++ b/ide/schema2beans/src/org/netbeans/modules/schema2beansdev/EntityParser.java
@@ -21,6 +21,8 @@ package org.netbeans.modules.schema2beansdev;
import java.io.*;
import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* EntityParser.java - parses the DTD file for entity declarations and creates new Reader
* that replaces the entity references with values
@@ -29,56 +31,59 @@ import java.util.*;
* @author mkuchtiak
*/
public class EntityParser {
- private java.util.Map entityMap;
- private final String text;
- public EntityParser(Reader reader) throws IOException {
- StringWriter w = new StringWriter();
+ private static final Pattern ENTITY_PATTERN = Pattern.compile("<!ENTITY\\s+%\\s+(\\S+)\\s+\"([^\"]*)\"\\s*>");
+ private static final Pattern ENTITY_USE_PATTERN = Pattern.compile("%([\\S;]+);");
+
+ private final Map<String,String> entityMap = new HashMap<>();
+ private String remainingText = "";
+
+ public EntityParser() throws IOException {
+ }
+
+ /**
+ * Parses file for ENTITY declaration, creates map with entities
+ */
+ public void parse(Reader reader) throws IOException {
+ StringBuilder w = new StringBuilder();
char[] buf = new char[4096];
int read;
while ((read = reader.read(buf)) != -1) {
- w.write(buf, 0, read);
+ w.append(buf, 0, read);
}
- this.text = w.toString();
- entityMap = new java.util.HashMap();
- }
- /** Parses file for ENTITY declaration, creates map with entities
- */
- public void parse() throws IOException {
- BufferedReader br = new BufferedReader(new StringReader(text));
- String line = null;
- while ((line=br.readLine())!=null) {
- int startPos = line.indexOf("<!ENTITY ");
- if (startPos>=0) addEntity(br,line.substring(startPos+9));
+
+ String originalText = w.toString();
+
+ StringBuffer buffer = new StringBuffer(originalText.length());
+ Matcher entityMatcher = ENTITY_PATTERN.matcher(originalText);
+ while(entityMatcher.find()) {
+ addEntity(entityMatcher);
+ entityMatcher.appendReplacement(buffer, "");
+ }
+ entityMatcher.appendTail(buffer);
+
+ StringBuffer buffer2 = new StringBuffer(originalText.length());
+ Matcher entityReplacementMatcher = ENTITY_USE_PATTERN.matcher(buffer);
+ while(entityReplacementMatcher.find()) {
+ String entity = entityReplacementMatcher.group(1);
+ if(entityMap.containsKey(entity)) {
+ entityReplacementMatcher.appendReplacement(buffer2, entityMap.get(entity));
+ }
}
- br.close();
+ entityReplacementMatcher.appendTail(buffer2);
+
+ remainingText = buffer2.toString();
}
-
- private void addEntity(BufferedReader br, String line) throws IOException {
- StringTokenizer tok = new StringTokenizer(line);
- if (!tok.hasMoreTokens()) return;
- String percentage = tok.nextToken();
- if (!"%".equals(percentage)) return; //incorrect ENTITY declaration (missing %)
- if (!tok.hasMoreTokens()) return; //incorrect ENTITY declaration (missing entity name)
-
- // cut the first part including entity key
- String key = tok.nextToken();
- int valueStartPos = line.indexOf(key)+key.length();
- String rest = line.substring(valueStartPos);
-
- // looking for starting quotes
- valueStartPos = rest.indexOf("\"");
- if (valueStartPos<0) return;
-
- // looking for entity value
- rest = rest.substring(valueStartPos+1);
- String value = resolveValue (rest,br);
- // write ENTITY into map
+ private void addEntity(Matcher m) throws IOException {
+ String key = m.group(1);
+ String value = m.group(2);
+
+ // write ENTITY into map
if (value!=null) {
int refStart = value.indexOf("%");
int refEnd = value.indexOf(";");
if (refStart>=0 && refEnd>refStart) { //references other entity
- String entityKey = value.substring(refStart+1,refEnd);
+ String entityKey = value.substring(refStart+1,refEnd);
String val = (String)entityMap.get(entityKey);
if (val!=null) {
String newValue = value.substring(0,refStart)+val+value.substring(refEnd+1);
@@ -92,76 +97,12 @@ public class EntityParser {
}
}
}
-
- private String resolveValue(String lineRest, BufferedReader br) throws IOException {
- // looking for closing quotes
- int index = lineRest.indexOf("\"");
- if (index>=0) return lineRest.substring(0,index);
- // value across multiple lines
- StringBuffer buf = new StringBuffer(lineRest);
- buf.append("\n");
- int ch=br.read();
- while ( ch!=(int)'"' && ch!=(int)'>' && ch!=-1 ) {
- buf.append((char)ch);
- ch=br.read();
- }
- return buf.toString();
- }
-
- private boolean containsBlank(String s) {
- for (int i=0;i<s.length();i++) {
- if (' '==s.charAt(i)) return true;
- }
- return false;
- }
/** Creates a StringReader that removes all ENTITY declarations
* and replaces entity references with corresponding values
*/
public Reader getReader() throws IOException {
- StringBuffer buf = new StringBuffer();
- BufferedReader br = new BufferedReader(new StringReader(text));
- String line = null;
- while ((line=br.readLine())!=null) {
- // removing line(s) with entity declaration
- if (line.indexOf("<!ENTITY ")>=0) line = removeEntityDeclaration(line,br);
- // searches for entity reference and replace it with value
- int pos = line.indexOf("%");
- if (pos>=0) {
- StringTokenizer tok = new StringTokenizer(line.substring(pos),";%");
- while (tok.hasMoreTokens()) {
- String key = tok.nextToken();
- if (key.length()>0 && !containsBlank(key)) {
- String value = (String)entityMap.get(key);
- if (value!=null) line = line.replaceAll("%"+key+";",value);
- }
- }
- }
- if (line.length()>0) buf.append(line);
- }
- br.close();
- return new StringReader(buf.toString());
- }
-
- /** Removing line(s) containing ENTITY declaration
- */
- private String removeEntityDeclaration(String line,BufferedReader br) throws IOException {
- int start = line.indexOf("<!ENTITY ");
- StringBuffer buf = new StringBuffer();
- if (start>0) buf.append(line.substring(0, start));
- int endPos = line.indexOf(">", start);
- if (endPos>0) {
- buf.append(line.substring(endPos+1));
- return buf.toString();
- }
- String ln=null;
- while (endPos<0 && (ln=br.readLine())!=null) {
- endPos = ln.indexOf(">");
- if (endPos>=0) {
- buf.append(ln.substring(endPos+1));
- }
- }
- return buf.toString();
+ return new StringReader(remainingText);
}
-
+
}
diff --git a/ide/schema2beans/test/unit/data/TestEntity.dtd b/ide/schema2beans/test/unit/data/TestEntity.dtd
new file mode 100644
index 0000000..bdfd755
--- /dev/null
+++ b/ide/schema2beans/test/unit/data/TestEntity.dtd
@@ -0,0 +1,57 @@
+<!-- The following DTD is a simple view of a book:
+book
+ index
+ +
+ word
+ #PCDATA
+ page
+ #PCDATA
+ chapter+
+ comment?
+ #PCDATA
+ paragraph*
+ #PCDATA
+ summary?
+ #PCDATA
+
+ good
+ EMPTY
+-->
+
+<!ENTITY % yesno
+ "(yes, no)"
+>
+
+<!ELEMENT summary (#PCDATA)>
+
+<!ELEMENT book (summary?, chapter+, index+, available)>
+
+<!ELEMENT available EMPTY>
+
+<!ELEMENT index (word, ref+)>
+
+<!ELEMENT ref (page, line)>
+
+<!ELEMENT word (#PCDATA)>
+
+<!ELEMENT line (#PCDATA)>
+
+<!ELEMENT page (#PCDATA)>
+
+<!ELEMENT chapter (comment?, paragraph*)>
+
+<!ELEMENT paragraph (#PCDATA)>
+
+<!ELEMENT comment (#PCDATA)>
+
+<!ATTLIST book good %yesno; 'yes'>
+<!ATTLIST summary length CDATA #REQUIRED>
+<!ATTLIST summary lang CDATA #IMPLIED>
+<!ATTLIST summary size CDATA #FIXED '12'>
+<!ATTLIST chapter title CDATA #IMPLIED>
+<!ATTLIST index color CDATA #IMPLIED>
+<!ATTLIST index cross-ref %yesno; "no">
+<!ATTLIST index glossary CDATA #FIXED "nope">
+<!ATTLIST word freq CDATA #IMPLIED>
+
+
diff --git a/ide/schema2beans/test/unit/data/goldenfiles/tests/SecondaryTest/testEntityParser.pass b/ide/schema2beans/test/unit/data/goldenfiles/tests/SecondaryTest/testEntityParser.pass
new file mode 100644
index 0000000..89e141c
--- /dev/null
+++ b/ide/schema2beans/test/unit/data/goldenfiles/tests/SecondaryTest/testEntityParser.pass
@@ -0,0 +1,55 @@
+<!-- The following DTD is a simple view of a book:
+book
+ index
+ +
+ word
+ #PCDATA
+ page
+ #PCDATA
+ chapter+
+ comment?
+ #PCDATA
+ paragraph*
+ #PCDATA
+ summary?
+ #PCDATA
+
+ good
+ EMPTY
+-->
+
+
+
+<!ELEMENT summary (#PCDATA)>
+
+<!ELEMENT book (summary?, chapter+, index+, available)>
+
+<!ELEMENT available EMPTY>
+
+<!ELEMENT index (word, ref+)>
+
+<!ELEMENT ref (page, line)>
+
+<!ELEMENT word (#PCDATA)>
+
+<!ELEMENT line (#PCDATA)>
+
+<!ELEMENT page (#PCDATA)>
+
+<!ELEMENT chapter (comment?, paragraph*)>
+
+<!ELEMENT paragraph (#PCDATA)>
+
+<!ELEMENT comment (#PCDATA)>
+
+<!ATTLIST book good (yes, no) 'yes'>
+<!ATTLIST summary length CDATA #REQUIRED>
+<!ATTLIST summary lang CDATA #IMPLIED>
+<!ATTLIST summary size CDATA #FIXED '12'>
+<!ATTLIST chapter title CDATA #IMPLIED>
+<!ATTLIST index color CDATA #IMPLIED>
+<!ATTLIST index cross-ref (yes, no) "no">
+<!ATTLIST index glossary CDATA #FIXED "nope">
+<!ATTLIST word freq CDATA #IMPLIED>
+
+
diff --git a/ide/schema2beans/test/unit/src/tests/SecondaryTest.java b/ide/schema2beans/test/unit/src/tests/SecondaryTest.java
new file mode 100644
index 0000000..08c672e
--- /dev/null
+++ b/ide/schema2beans/test/unit/src/tests/SecondaryTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package tests;
+
+import java.io.*;
+import java.lang.reflect.Field;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import org.netbeans.junit.*;
+
+import org.netbeans.modules.schema2beansdev.*;
+
+public class SecondaryTest extends NbTestCase {
+
+ public SecondaryTest(java.lang.String testName) {
+ super(testName);
+ }
+
+ public void testEntityParser() throws Exception {
+ File schemaFile = new File(getDataDir(), "TestEntity.dtd");
+ try (InputStream dtdIn = new FileInputStream(schemaFile);
+ Reader reader = new InputStreamReader(dtdIn, StandardCharsets.ISO_8859_1);
+ InputStream goldenStream = new FileInputStream(getGoldenFile());
+ Reader goldenReader = new InputStreamReader(goldenStream, StandardCharsets.ISO_8859_1)) {
+ EntityParser ep = new EntityParser();
+
+ ep.parse(reader);
+
+ Field entityMapField = EntityParser.class.getDeclaredField("entityMap");
+ entityMapField.setAccessible(true);
+
+ Map entityMap = (Map) entityMapField.get(ep);
+
+ assertEquals(1, entityMap.size());
+
+ StringBuilder testOutput = new StringBuilder();
+
+ try (Reader r = ep.getReader()) {
+ int read = 0;
+ char[] buffer = new char[4096];
+ while ( (read = r.read(buffer)) >= 0) {
+ testOutput.append(buffer, 0, read);
+ }
+ }
+
+ StringBuilder reference = new StringBuilder();
+ {
+ int read = 0;
+ char[] buffer = new char[4096];
+ while ( (read = goldenReader.read(buffer)) >= 0) {
+ reference.append(buffer, 0, read);
+ }
+ }
+
+ assertEquals(reference.toString(), testOutput.toString());
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@netbeans.apache.org
For additional commands, e-mail: commits-help@netbeans.apache.org
For further information about the NetBeans mailing lists, visit:
https://cwiki.apache.org/confluence/display/NETBEANS/Mailing+lists