You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by nd...@apache.org on 2021/12/15 15:59:20 UTC
[tika] 01/01: port the TIKA-3446 work from the 2.x branch.
This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch TIKA-3446-1.x-port
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 434ec584104aae238a32c0882411a389949296ac
Author: Nicholas DiPiazza <ni...@lucidworks.com>
AuthorDate: Wed Dec 15 09:59:04 2021 -0600
port the TIKA-3446 work from the 2.x branch.
---
.../tika/parser/microsoft/onenote/CompactID.java | 10 +-
.../tika/parser/microsoft/onenote/Error.java | 12 +-
.../parser/microsoft/onenote/ExtendedGUID.java | 37 +-
.../microsoft/onenote/FileChunkReference.java | 21 +-
.../microsoft/onenote/FileDataStoreObject.java | 1 +
.../tika/parser/microsoft/onenote/FileNode.java | 105 ++--
.../parser/microsoft/onenote/FileNodeList.java | 1 +
.../microsoft/onenote/FileNodeListHeader.java | 35 +-
.../tika/parser/microsoft/onenote/FileNodePtr.java | 10 +-
.../microsoft/onenote/FileNodePtrBackPush.java | 1 +
.../parser/microsoft/onenote/FileNodeUnion.java | 34 +-
.../microsoft/onenote/FndStructureConstants.java | 44 +-
.../apache/tika/parser/microsoft/onenote/GUID.java | 80 +--
.../tika/parser/microsoft/onenote/IndentUtil.java | 7 +-
.../tika/parser/microsoft/onenote/Int24.java | 1 +
.../apache/tika/parser/microsoft/onenote/JCID.java | 45 +-
.../microsoft/onenote/JCIDPropertySetTypeEnum.java | 76 ++-
.../onenote/ObjectDeclarationWithRefCount.java | 27 +-
.../onenote/ObjectDeclarationWithRefCountBody.java | 3 +-
.../onenote/ObjectSpaceObjectPropSet.java | 14 +-
...ctSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java | 6 +-
.../onenote/OneNoteDirectFileResource.java | 11 +-
.../parser/microsoft/onenote/OneNoteDocument.java | 14 +-
.../parser/microsoft/onenote/OneNoteHeader.java | 36 +-
.../onenote/OneNoteLegacyDumpStrings.java | 52 +-
.../parser/microsoft/onenote/OneNoteParser.java | 188 ++++---
.../microsoft/onenote/OneNotePropertyEnum.java | 213 +++-----
.../microsoft/onenote/OneNotePropertyId.java | 14 +-
.../tika/parser/microsoft/onenote/OneNotePtr.java | 521 ++++++++++---------
.../microsoft/onenote/OneNoteTreeWalker.java | 210 ++++----
.../onenote/OneNoteTreeWalkerOptions.java | 17 +-
.../parser/microsoft/onenote/PropertyIDType.java | 8 +-
.../tika/parser/microsoft/onenote/PropertySet.java | 38 +-
.../parser/microsoft/onenote/PropertyValue.java | 20 +-
.../tika/parser/microsoft/onenote/Revision.java | 24 +-
.../parser/microsoft/onenote/RevisionManifest.java | 1 +
.../microsoft/onenote/RootObjectReference.java | 3 +-
.../IFSSHTTPBSerializable.java} | 26 +-
.../onenote/fsshttpb/MSOneStorePackage.java | 307 ++++++++++++
.../onenote/fsshttpb/MSOneStoreParser.java | 199 ++++++++
.../exception/DataElementParseErrorException.java} | 19 +-
.../onenote/fsshttpb/property/ArrayNumber.java | 52 ++
.../fsshttpb/property/EightBytesOfData.java | 52 ++
.../onenote/fsshttpb/property/FourBytesOfData.java | 51 ++
.../property/IProperty.java} | 42 +-
.../property/NoData.java} | 41 +-
.../onenote/fsshttpb/property/OneByteOfData.java | 49 ++
.../property/PrtArrayOfPropertyValues.java | 77 +++
.../PrtFourBytesOfLengthFollowedByData.java | 68 +++
.../onenote/fsshttpb/property/TwoBytesOfData.java | 52 ++
.../streamobj/CellManifestCurrentRevision.java | 72 +++
.../streamobj/CellManifestDataElementData.java | 64 +++
.../onenote/fsshttpb/streamobj/DataElement.java | 189 +++++++
.../fsshttpb/streamobj/DataElementData.java | 46 ++
.../fsshttpb/streamobj/DataElementHash.java | 80 +++
.../fsshttpb/streamobj/DataElementPackage.java | 81 +++
.../onenote/fsshttpb/streamobj/DataHashObject.java | 103 ++++
.../onenote/fsshttpb/streamobj/DataSizeObject.java | 71 +++
.../streamobj/EncryptionObject.java} | 20 +-
.../streamobj/FileDataObject.java} | 19 +-
.../fsshttpb/streamobj/IntermediateNodeObject.java | 114 +++++
.../onenote/fsshttpb/streamobj/JCIDObject.java | 45 ++
.../onenote/fsshttpb/streamobj/LeafNodeObject.java | 257 ++++++++++
.../onenote/fsshttpb/streamobj/NodeObject.java | 45 ++
.../fsshttpb/streamobj/ObjectGroupData.java | 115 +++++
.../streamobj/ObjectGroupDataElementData.java | 286 +++++++++++
.../streamobj/ObjectGroupDeclarations.java | 112 +++++
.../fsshttpb/streamobj/ObjectGroupMetadata.java | 83 +++
.../streamobj/ObjectGroupMetadataDeclarations.java | 97 ++++
.../ObjectGroupObjectBLOBDataDeclaration.java | 103 ++++
.../fsshttpb/streamobj/ObjectGroupObjectData.java | 83 +++
.../ObjectGroupObjectDataBLOBReference.java | 86 ++++
.../streamobj/ObjectGroupObjectDeclare.java | 103 ++++
.../onenote/fsshttpb/streamobj/PropertySet.java | 136 +++++
.../fsshttpb/streamobj/PropertySetObject.java | 46 ++
.../fsshttpb/streamobj/RevisionManifest.java | 75 +++
.../streamobj/RevisionManifestDataElementData.java | 109 ++++
.../RevisionManifestObjectGroupReferences.java | 85 ++++
.../streamobj/RevisionManifestRootDeclare.java | 77 +++
.../streamobj/RevisionStoreObject.java} | 40 +-
.../streamobj/RevisionStoreObjectGroup.java | 118 +++++
.../fsshttpb/streamobj/SignatureObject.java | 82 +++
.../streamobj/StorageIndexCellMapping.java | 84 ++++
.../streamobj/StorageIndexDataElementData.java | 119 +++++
.../streamobj/StorageIndexManifestMapping.java | 76 +++
.../streamobj/StorageIndexRevisionMapping.java | 83 +++
.../streamobj/StorageManifestDataElementData.java | 96 ++++
.../streamobj/StorageManifestRootDeclare.java | 79 +++
.../streamobj/StorageManifestSchemaGUID.java | 77 +++
.../onenote/fsshttpb/streamobj/StreamObject.java | 329 ++++++++++++
.../streamobj/StreamObjectHeaderEnd.java} | 19 +-
.../streamobj/StreamObjectHeaderEnd16bit.java | 117 +++++
.../streamobj/StreamObjectHeaderEnd8bit.java | 121 +++++
.../streamobj/StreamObjectHeaderStart.java | 94 ++++
.../streamobj/StreamObjectHeaderStart16bit.java | 144 ++++++
.../streamobj/StreamObjectHeaderStart32bit.java | 145 ++++++
.../streamobj/StreamObjectParseErrorException.java | 54 ++
.../streamobj/StreamObjectTypeHeaderEnd.java | 175 +++++++
.../streamobj/StreamObjectTypeHeaderStart.java | 457 +++++++++++++++++
.../fsshttpb/streamobj/basic/AdapterHelper.java | 65 +++
.../streamobj/basic/AlternativePackaging.java | 81 +++
.../fsshttpb/streamobj/basic/BasicObject.java | 79 +++
.../fsshttpb/streamobj/basic/BinaryItem.java | 89 ++++
.../onenote/fsshttpb/streamobj/basic/CellID.java | 127 +++++
.../fsshttpb/streamobj/basic/CellIDArray.java | 101 ++++
.../fsshttpb/streamobj/basic/Compact64bitInt.java | 224 +++++++++
.../fsshttpb/streamobj/basic/CompactID.java | 59 +++
.../fsshttpb/streamobj/basic/DataElementType.java | 90 ++++
.../streamobj/basic/DataNodeObjectData.java | 52 ++
.../fsshttpb/streamobj/basic/ExGUIDArray.java | 118 +++++
.../onenote/fsshttpb/streamobj/basic/ExGuid.java | 201 ++++++++
.../fsshttpb/streamobj/basic/HeaderCell.java | 61 +++
.../onenote/fsshttpb/streamobj/basic/JCID.java | 75 +++
.../fsshttpb/streamobj/basic/PropertyID.java | 65 +++
.../fsshttpb/streamobj/basic/PropertyType.java | 110 ++++
.../fsshttpb/streamobj/basic/RequestTypes.java | 73 +++
.../fsshttpb/streamobj/basic/SerialNumber.java | 112 +++++
.../fsshttpb/streamobj/basic/ZipHeader.java | 47 ++
.../streamobj/chunking/AbstractChunking.java} | 42 +-
.../streamobj/chunking/ChunkingFactory.java | 120 +++++
.../streamobj/chunking/ChunkingMethod.java} | 25 +-
.../streamobj/chunking/RDCAnalysisChunking.java | 250 +++++++++
.../streamobj/chunking/SimpleChunking.java | 89 ++++
.../streamobj/chunking/ZipFilesChunking.java | 222 ++++++++
.../streamobj/space/ObjectSpaceObjectPropSet.java | 87 ++++
.../space/ObjectSpaceObjectStreamHeader.java | 63 +++
.../space/ObjectSpaceObjectStreamOfContextIDs.java | 72 +++
.../space/ObjectSpaceObjectStreamOfOIDs.java | 73 +++
.../space/ObjectSpaceObjectStreamOfOSIDs.java | 72 +++
.../microsoft/onenote/fsshttpb/unsigned/UByte.java | 312 ++++++++++++
.../onenote/fsshttpb/unsigned/UInteger.java | 381 ++++++++++++++
.../microsoft/onenote/fsshttpb/unsigned/ULong.java | 289 +++++++++++
.../microsoft/onenote/fsshttpb/unsigned/UMath.java | 113 +++++
.../unsigned/UNumber.java} | 30 +-
.../onenote/fsshttpb/unsigned/UShort.java | 191 +++++++
.../onenote/fsshttpb/unsigned/Unsigned.java | 193 +++++++
.../microsoft/onenote/fsshttpb/util/Bit.java | 54 ++
.../onenote/fsshttpb/util/BitConverter.java | 137 +++++
.../microsoft/onenote/fsshttpb/util/BitReader.java | 197 ++++++++
.../microsoft/onenote/fsshttpb/util/BitWriter.java | 125 +++++
.../util/ByteUtil.java} | 36 +-
.../onenote/fsshttpb/util/DataElementUtils.java | 557 +++++++++++++++++++++
.../util/GuidUtil.java} | 16 +-
.../fsshttpb/util/LittleEndianBitConverter.java | 177 +++++++
.../fsshttpb/util/SequenceNumberGenerator.java | 93 ++++
.../util/UuidUtils.java} | 23 +-
.../microsoft/onenote/OneNoteParserTest.java | 167 +++---
...OrEarlier1.one => testOneNote2007OrEarlier.one} | Bin
.../test-documents/testOneNote2007OrEarlier2.one | Bin 36786 -> 0 bytes
.../test-documents/testOneNoteFromOffice365-2.one | Bin 0 -> 69986 bytes
.../test-documents/testOneNoteFromOffice365.one | Bin 0 -> 29387 bytes
.../test-documents/testOneNoteNonAscii.one | Bin 13528 -> 0 bytes
152 files changed, 13177 insertions(+), 1144 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/CompactID.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/CompactID.java
index bc7378b..df61e59 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/CompactID.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/CompactID.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
class CompactID {
@@ -49,12 +50,7 @@ class CompactID {
}
public String getCompactIDString() {
- return new StringBuilder()
- .append(guid)
- .append(", index=")
- .append(guidIndex)
- .append(", n=")
- .append((int) n)
- .toString();
+ return new StringBuilder().append(guid).append(", index=").append(guidIndex).append(", n=")
+ .append((int) n).toString();
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java
index 1239231..8277ebf 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java
@@ -14,16 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
public enum Error {
- OK,
- SEGV,
- RESERVED_NONZERO,
- UNKNOWN_ENUM,
- INVALID_CONSTANT,
- STRING_TOO_SHORT,
- HEX_OUT_OF_RANGE,
- COMPACT_ID_MISSING,
- UNKNOWN_GUID,
+ OK, SEGV, RESERVED_NONZERO, UNKNOWN_ENUM, INVALID_CONSTANT, STRING_TOO_SHORT, HEX_OUT_OF_RANGE,
+ COMPACT_ID_MISSING, UNKNOWN_GUID,
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java
index 2b46de2..812250c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java
@@ -14,24 +14,37 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Locale;
import java.util.Objects;
-class ExtendedGUID implements Comparable<ExtendedGUID> {
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitConverter;
+
+public class ExtendedGUID implements Comparable<ExtendedGUID> {
GUID guid;
long n;
+ public ExtendedGUID() {
+
+ }
+
public ExtendedGUID(GUID guid, long n) {
this.guid = guid;
this.n = n;
}
+ public static ExtendedGUID nil() {
+ return new ExtendedGUID(GUID.nil(), 0);
+ }
+
@Override
public int compareTo(ExtendedGUID other) {
if (other.guid.equals(guid)) {
- new Long(n).compareTo(other.n);
+ return Long.compare(n, other.n);
}
return guid.compareTo(other.guid);
}
@@ -45,8 +58,7 @@ class ExtendedGUID implements Comparable<ExtendedGUID> {
return false;
}
ExtendedGUID that = (ExtendedGUID) o;
- return n == that.n &&
- Objects.equals(guid, that.guid);
+ return n == that.n && Objects.equals(guid, that.guid);
}
@Override
@@ -54,10 +66,6 @@ class ExtendedGUID implements Comparable<ExtendedGUID> {
return Objects.hash(guid, n);
}
- public static ExtendedGUID nil() {
- return new ExtendedGUID(GUID.nil(), 0);
- }
-
@Override
public String toString() {
return String.format(Locale.US, "%s [%d]", guid, n);
@@ -84,4 +92,17 @@ class ExtendedGUID implements Comparable<ExtendedGUID> {
this.n = n;
return this;
}
+
+ /**
+ * This method is used to convert the element of ExtendedGUID object into a byte List.
+ *
+ * @return Return the byte list which store the byte information of ExtendedGUID
+ */
+ public List<Byte> SerializeToByteList() {
+ List<Byte> byteList = new ArrayList<>(guid.toByteArray());
+ for (byte b : BitConverter.getBytes(n)) {
+ byteList.add(b);
+ }
+ return byteList;
+ }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileChunkReference.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileChunkReference.java
index 04d1cb1..046106d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileChunkReference.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileChunkReference.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
import java.util.Objects;
@@ -23,17 +24,21 @@ import java.util.Objects;
* <p>
* Each file chunk reference contains an <pre>stp</pre> field and a <pre>cb</pre> field.
* <p>
- * The <pre>stp</pre> field is a stream pointer that specifies the offset, in bytes, from the beginning of the file where the referenced
+ * The <pre>stp</pre> field is a stream pointer that specifies the offset, in bytes, from the
+ * beginning of the file where the referenced
* data is located.
* <p>
- * The <pre>cb</pre> field specifies the size, in bytes, of the referenced data. The sizes, in bytes, of the
+ * The <pre>cb</pre> field specifies the size, in bytes, of the referenced data. The sizes, in
+ * bytes, of the
* stp and cb fields are specified by the structures in this section.
* <p>
* There are some Special values:
* <p>
- * fcrNil - Specifies a file chunk reference where all bits of the stp field are set to 1, and all bits of the cb field are set to zero.
+ * fcrNil - Specifies a file chunk reference where all bits of the stp field are set to 1, and
+ * all bits of the cb field are set to zero.
* <p>
- * fcrZero - Specifies a file chunk reference where all bits of the stp and cb fields are set to zero.
+ * fcrZero - Specifies a file chunk reference where all bits of the stp and cb fields are set to
+ * zero.
*/
class FileChunkReference {
@@ -55,10 +60,7 @@ class FileChunkReference {
@Override
public String toString() {
- return "FileChunkReference{" +
- "stp=" + stp +
- ", cb=" + cb +
- '}';
+ return "FileChunkReference{" + "stp=" + stp + ", cb=" + cb + '}';
}
@Override
@@ -70,8 +72,7 @@ class FileChunkReference {
return false;
}
FileChunkReference that = (FileChunkReference) o;
- return stp == that.stp &&
- cb == that.cb;
+ return stp == that.stp && cb == that.cb;
}
@Override
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileDataStoreObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileDataStoreObject.java
index f48019b..ee7af09 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileDataStoreObject.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileDataStoreObject.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
class FileDataStoreObject {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNode.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNode.java
index f27e877..5481af7 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNode.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNode.java
@@ -14,45 +14,56 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import org.apache.tika.exception.TikaMemoryLimitException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+package org.apache.tika.parser.microsoft.onenote;
import java.io.IOException;
import java.util.Objects;
+import org.apache.tika.exception.TikaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* A FileNode structure is the basic unit for holding and referencing data in the file.
* FileNode structures are organized into file node lists
* <p>
- * A FileNode structure is divided into header fields and a data field, fnd. The header fields specify what type of FileNode structure it
+ * A FileNode structure is divided into header fields and a data field, fnd. The header fields
+ * specify what type of FileNode structure it
* is,
* and what format the fnd field is in.
* <p>
- * The fnd field can be empty, or it can contain data directly, or it can contain a reference to another block of the file by
+ * The fnd field can be empty, or it can contain data directly, or it can contain a reference to
+ * another block of the file by
* byte position and byte count, or it can contain both data and a reference.
*/
class FileNode {
private static final Logger LOG = LoggerFactory.getLogger(FileNode.class);
/**
- * An unsigned integer that specifies the type of this FileNode structure. The meaning of this value is specified by the fnd field.
+ * An unsigned integer that specifies the type of this FileNode structure. The meaning of
+ * this value is specified by the fnd field.
*/
long id;
long size;
/**
- * An unsigned integer that specifies whether the structure specified by fnd contains a FileNodeChunkReference structure.
- * 0 - This FileNode structure does not reference other data. The data structure specified by fnd MUST NOT contain a
+ * An unsigned integer that specifies whether the structure specified by fnd contains a
+ * FileNodeChunkReference structure.
+ * 0 - This FileNode structure does not reference other data. The data structure specified
+ * by fnd MUST NOT contain a
* FileNodeChunkReference structure. The StpFormat and CbFormat fields MUST be ignored.
- * 1 - This FileNode structure contains a reference to data. The first field in the data structure specified by an fnd field MUST be a
- * FileNodeChunkReference structure that specifies the location and size of the referenced data.
- * The type of the FileNodeChunkReference structure is specified by the StpFormat and CbFormat fields.
+ * 1 - This FileNode structure contains a reference to data. The first field in the data
+ * structure specified by an fnd field MUST be a
+ * FileNodeChunkReference structure that specifies the location and size of the referenced
+ * data.
+ * The type of the FileNodeChunkReference structure is specified by the StpFormat and
+ * CbFormat fields.
* 2 - This FileNode structure contains a reference to a file node list.
- * The first field in the data structure specified by the fnd field MUST be a FileNodeChunkReference structure that specifies the
- * location and size of a file node list. The type of the FileNodeChunkReference is specified by the StpFormat and CbFormat fields.
+ * The first field in the data structure specified by the fnd field MUST be a
+ * FileNodeChunkReference structure that specifies the
+ * location and size of a file node list. The type of the FileNodeChunkReference is
+ * specified by the StpFormat and CbFormat fields.
*/
long baseType;
@@ -96,28 +107,25 @@ class FileNode {
return false;
}
FileNode fileNode = (FileNode) o;
- return id == fileNode.id &&
- size == fileNode.size &&
- baseType == fileNode.baseType &&
- isFileData == fileNode.isFileData &&
- Objects.equals(gosid, fileNode.gosid) &&
- Objects.equals(gctxid, fileNode.gctxid) &&
- Objects.equals(fileDataStoreReference, fileNode.fileDataStoreReference) &&
- Objects.equals(ref, fileNode.ref) &&
- Objects.equals(propertySet, fileNode.propertySet) &&
- Objects.equals(childFileNodeList, fileNode.childFileNodeList) &&
- Objects.equals(subType, fileNode.subType);
+ return id == fileNode.id && size == fileNode.size && baseType == fileNode.baseType &&
+ isFileData == fileNode.isFileData && Objects.equals(gosid, fileNode.gosid) &&
+ Objects.equals(gctxid, fileNode.gctxid) &&
+ Objects.equals(fileDataStoreReference, fileNode.fileDataStoreReference) &&
+ Objects.equals(ref, fileNode.ref) &&
+ Objects.equals(propertySet, fileNode.propertySet) &&
+ Objects.equals(childFileNodeList, fileNode.childFileNodeList) &&
+ Objects.equals(subType, fileNode.subType);
}
@Override
public int hashCode() {
- return Objects.hash(id, size, baseType, gosid, gctxid, fileDataStoreReference, ref, propertySet, isFileData, childFileNodeList,
- subType);
+ return Objects.hash(id, size, baseType, gosid, gctxid, fileDataStoreReference, ref,
+ propertySet, isFileData, childFileNodeList, subType);
}
public boolean hasGctxid() {
- return id == FndStructureConstants.RevisionRoleAndContextDeclarationFND
- || id == FndStructureConstants.RevisionManifestStart7FND;
+ return id == FndStructureConstants.RevisionRoleAndContextDeclarationFND ||
+ id == FndStructureConstants.RevisionManifestStart7FND;
}
public long getId() {
@@ -219,10 +227,12 @@ class FileNode {
return this;
}
- public void print(OneNoteDocument document, OneNotePtr pointer, int indentLevel) throws IOException, TikaMemoryLimitException {
+ public void print(OneNoteDocument document, OneNotePtr pointer, int indentLevel)
+ throws IOException, TikaException {
boolean shouldPrintHeader = FndStructureConstants.nameOf(id).contains("ObjectDec");
if (gosid.equals(ExtendedGUID.nil()) && shouldPrintHeader) {
- LOG.debug("{}[beg {}]:{}", IndentUtil.getIndent(indentLevel + 1), FndStructureConstants.nameOf(id), gosid);
+ LOG.debug("{}[beg {}]:{}", IndentUtil.getIndent(indentLevel + 1),
+ FndStructureConstants.nameOf(id), gosid);
}
propertySet.print(document, pointer, indentLevel + 1);
if (!childFileNodeList.children.isEmpty()) {
@@ -233,25 +243,26 @@ class FileNode {
child.print(document, pointer, indentLevel + 1);
}
}
- if (id == FndStructureConstants.RevisionRoleDeclarationFND
- || id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
+ if (id == FndStructureConstants.RevisionRoleDeclarationFND ||
+ id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
LOG.debug("{}[Revision Role {}]", IndentUtil.getIndent(indentLevel + 1),
- subType.revisionRoleDeclaration.revisionRole);
+ subType.revisionRoleDeclaration.revisionRole);
}
- if (id == FndStructureConstants.RevisionManifestStart4FND || id == FndStructureConstants.RevisionManifestStart6FND
- || id == FndStructureConstants.RevisionManifestStart7FND) {
+ if (id == FndStructureConstants.RevisionManifestStart4FND ||
+ id == FndStructureConstants.RevisionManifestStart6FND ||
+ id == FndStructureConstants.RevisionManifestStart7FND) {
LOG.debug("{}[revisionRole {}]", IndentUtil.getIndent(indentLevel + 1),
- subType.revisionManifest.revisionRole);
+ subType.revisionManifest.revisionRole);
}
- if ((gctxid != ExtendedGUID.nil() || id == FndStructureConstants.RevisionManifestStart7FND)
- && shouldPrintHeader) {
+ if ((!gctxid.equals(ExtendedGUID.nil()) ||
+ id == FndStructureConstants.RevisionManifestStart7FND) && shouldPrintHeader) {
LOG.debug("{}[gctxid {}]", IndentUtil.getIndent(indentLevel + 1), gctxid);
}
- if (gosid != ExtendedGUID.nil() && shouldPrintHeader) {
- LOG.debug("{}[end {}]:{}", IndentUtil.getIndent(indentLevel + 1), FndStructureConstants.nameOf(id),
- gosid);
+ if (!gosid.equals(ExtendedGUID.nil()) && shouldPrintHeader) {
+ LOG.debug("{}[end {}]:{}", IndentUtil.getIndent(indentLevel + 1),
+ FndStructureConstants.nameOf(id), gosid);
}
}
@@ -267,12 +278,8 @@ class FileNode {
@Override
public String toString() {
- return new StringBuilder().append("FileNodeID=0x")
- .append(Long.toHexString(id))
- .append(", gosid=")
- .append(gosid)
- .append(", baseType=0x")
- .append(Long.toHexString(baseType))
- .toString();
+ return new StringBuilder().append("FileNodeID=0x").append(Long.toHexString(id))
+ .append(", gosid=").append(gosid).append(", baseType=0x")
+ .append(Long.toHexString(baseType)).toString();
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
index aa01c18..1d678a0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
import java.util.ArrayList;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeListHeader.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeListHeader.java
index 1f8ee22..a0bd3c7 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeListHeader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeListHeader.java
@@ -14,9 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.exception.TikaException;
class FileNodeListHeader {
public static final long UNIT_MAGIC_CONSTANT = 0xA4567AB1F5F7F4C4L;
@@ -29,23 +31,28 @@ class FileNodeListHeader {
*
* @param position Position of the file where this header starts.
* @param uintMagic An unsigned integer; MUST be "0xA4567AB1F5F7F4C4"
- * @param fileNodeListId An unsigned integer that specifies the identity of the file node list
- * this fragment belongs to. MUST be equal to or greater than 0x00000010. The pair of
- * FileNodeListID and nFragmentSequence fields MUST be unique relative to other
- * FileNodeListFragment structures in the file.
+ * @param fileNodeListId An unsigned integer that specifies the identity of
+ * the file node list this fragment belongs to. MUST be equal to or
+ * greater than 0x00000010. The pair of
+ * FileNodeListID and nFragmentSequence fields MUST be unique
+ * relative to other FileNodeListFragment structures in the file.
* @param nFragmentSequence An unsigned integer that specifies the index of the fragment in the
- * file node list containing the fragment. The nFragmentSequence field of the first fragment in a
- * given file node list MUST be 0 and the nFragmentSequence fields of all subsequent fragments in
+ * file node list containing the fragment. The nFragmentSequence
+ * field of the first fragment in a given file node list MUST be 0
+ * and the nFragmentSequence fields of all subsequent fragments in
* this list MUST be sequential.
*/
- public FileNodeListHeader(long position, long uintMagic, long fileNodeListId, long nFragmentSequence) {
+ public FileNodeListHeader(long position, long uintMagic, long fileNodeListId,
+ long nFragmentSequence) throws TikaException {
if (uintMagic != UNIT_MAGIC_CONSTANT) {
- throw new RuntimeException("unitMagic must always be: 0x" + Long.toHexString(UNIT_MAGIC_CONSTANT));
+ throw new TikaException(
+ "unitMagic must always be: 0x" + Long.toHexString(UNIT_MAGIC_CONSTANT));
}
this.position = position;
this.fileNodeListId = fileNodeListId;
if (fileNodeListId < 0x00000010) {
- throw new RuntimeException("FileNodeListHeader.fileNodeListId MUST be equal to or greater than 0x00000010");
+ throw new TikaException("FileNodeListHeader.fileNodeListId MUST be equal " +
+ "to or greater than 0x00000010");
}
this.nFragmentSequence = nFragmentSequence;
}
@@ -78,15 +85,13 @@ class FileNodeListHeader {
}
public String getPositionHex() {
- return "0x" + StringUtils.leftPad(Long.toHexString(position), 8, "0");
+ return "0x" + StringUtils.leftPad(Long.toHexString(position), 8, '0');
}
@Override
public String toString() {
- return "FileNodeListHeader{" +
- "position=" + "0x" + StringUtils.leftPad(Long.toHexString(position), 8, "0") +
- ", fileNodeListId=" + fileNodeListId +
- ", nFragmentSequence=" + nFragmentSequence +
- '}';
+ return "FileNodeListHeader{" + "position=" + "0x" +
+ StringUtils.leftPad(Long.toHexString(position), 8, '0') + ", fileNodeListId=" +
+ fileNodeListId + ", nFragmentSequence=" + nFragmentSequence + '}';
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtr.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtr.java
index a0e9e25..27cd96c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtr.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtr.java
@@ -14,11 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
import java.util.ArrayList;
import java.util.List;
+import org.apache.tika.exception.TikaException;
+
/**
* Stores a list that represents how to get to the file node in the data structure.
*/
@@ -44,17 +47,18 @@ class FileNodePtr {
* <p>
* For example 0, 4, 15 would mean
* <p>
- * document.root.children.get(0).childFileNodeList.children.get(4).childFileNodeList.children.get(15)
+ * document.root.children.get(0).childFileNodeList.children.get(4).
+ * childFileNodeList.children.get(15)
*
* @param document
* @return
*/
- public FileNode dereference(OneNoteDocument document) {
+ public FileNode dereference(OneNoteDocument document) throws TikaException {
if (nodeListPositions.isEmpty()) {
return null;
}
if (nodeListPositions.get(0) >= document.root.children.size()) {
- throw new RuntimeException("Exceeded root child size");
+ throw new TikaException("Exceeded root child size");
}
FileNode cur = document.root.children.get(nodeListPositions.get(0));
for (int i = 1, ie = nodeListPositions.size(); i < ie; ++i) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
index b79ef8a..393de25 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
class FileNodePtrBackPush {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeUnion.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeUnion.java
index 169c394..7092b61 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeUnion.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeUnion.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
class FileNodeUnion {
@@ -24,9 +25,12 @@ class FileNodeUnion {
GlobalIdTableEntryFNDX globalIdTableEntryFNDX = new GlobalIdTableEntryFNDX();
GlobalIdTableEntry2FNDX globalIdTableEntry2FNDX = new GlobalIdTableEntry2FNDX();
GlobalIdTableEntry3FNDX globalIdTableEntry3FNDX = new GlobalIdTableEntry3FNDX();
- ObjectRevisionWithRefCountFNDX objectRevisionWithRefCountFNDX = new ObjectRevisionWithRefCountFNDX();
- ObjectInfoDependencyOverrides objectInfoDependencyOverrides = new ObjectInfoDependencyOverrides();
- ObjectDeclarationWithRefCount objectDeclarationWithRefCount = new ObjectDeclarationWithRefCount();
+ ObjectRevisionWithRefCountFNDX objectRevisionWithRefCountFNDX =
+ new ObjectRevisionWithRefCountFNDX();
+ ObjectInfoDependencyOverrides objectInfoDependencyOverrides =
+ new ObjectInfoDependencyOverrides();
+ ObjectDeclarationWithRefCount objectDeclarationWithRefCount =
+ new ObjectDeclarationWithRefCount();
RootObjectReference rootObjectReference = new RootObjectReference();
FileDataStoreObjectReference fileDataStoreObjectReference = new FileDataStoreObjectReference();
@@ -34,7 +38,8 @@ class FileNodeUnion {
return revisionManifestListStart;
}
- public FileNodeUnion setRevisionManifestListStart(RevisionManifestListStart revisionManifestListStart) {
+ public FileNodeUnion setRevisionManifestListStart(
+ RevisionManifestListStart revisionManifestListStart) {
this.revisionManifestListStart = revisionManifestListStart;
return this;
}
@@ -52,7 +57,8 @@ class FileNodeUnion {
return revisionRoleDeclaration;
}
- public FileNodeUnion setRevisionRoleDeclaration(RevisionRoleDeclaration revisionRoleDeclaration) {
+ public FileNodeUnion setRevisionRoleDeclaration(
+ RevisionRoleDeclaration revisionRoleDeclaration) {
this.revisionRoleDeclaration = revisionRoleDeclaration;
return this;
}
@@ -79,7 +85,8 @@ class FileNodeUnion {
return globalIdTableEntry2FNDX;
}
- public FileNodeUnion setGlobalIdTableEntry2FNDX(GlobalIdTableEntry2FNDX globalIdTableEntry2FNDX) {
+ public FileNodeUnion setGlobalIdTableEntry2FNDX(
+ GlobalIdTableEntry2FNDX globalIdTableEntry2FNDX) {
this.globalIdTableEntry2FNDX = globalIdTableEntry2FNDX;
return this;
}
@@ -88,7 +95,8 @@ class FileNodeUnion {
return globalIdTableEntry3FNDX;
}
- public FileNodeUnion setGlobalIdTableEntry3FNDX(GlobalIdTableEntry3FNDX globalIdTableEntry3FNDX) {
+ public FileNodeUnion setGlobalIdTableEntry3FNDX(
+ GlobalIdTableEntry3FNDX globalIdTableEntry3FNDX) {
this.globalIdTableEntry3FNDX = globalIdTableEntry3FNDX;
return this;
}
@@ -97,7 +105,8 @@ class FileNodeUnion {
return objectRevisionWithRefCountFNDX;
}
- public FileNodeUnion setObjectRevisionWithRefCountFNDX(ObjectRevisionWithRefCountFNDX objectRevisionWithRefCountFNDX) {
+ public FileNodeUnion setObjectRevisionWithRefCountFNDX(
+ ObjectRevisionWithRefCountFNDX objectRevisionWithRefCountFNDX) {
this.objectRevisionWithRefCountFNDX = objectRevisionWithRefCountFNDX;
return this;
}
@@ -106,7 +115,8 @@ class FileNodeUnion {
return objectInfoDependencyOverrides;
}
- public FileNodeUnion setObjectInfoDependencyOverrides(ObjectInfoDependencyOverrides objectInfoDependencyOverrides) {
+ public FileNodeUnion setObjectInfoDependencyOverrides(
+ ObjectInfoDependencyOverrides objectInfoDependencyOverrides) {
this.objectInfoDependencyOverrides = objectInfoDependencyOverrides;
return this;
}
@@ -115,7 +125,8 @@ class FileNodeUnion {
return objectDeclarationWithRefCount;
}
- public FileNodeUnion setObjectDeclarationWithRefCount(ObjectDeclarationWithRefCount objectDeclarationWithRefCount) {
+ public FileNodeUnion setObjectDeclarationWithRefCount(
+ ObjectDeclarationWithRefCount objectDeclarationWithRefCount) {
this.objectDeclarationWithRefCount = objectDeclarationWithRefCount;
return this;
}
@@ -133,7 +144,8 @@ class FileNodeUnion {
return fileDataStoreObjectReference;
}
- public FileNodeUnion setFileDataStoreObjectReference(FileDataStoreObjectReference fileDataStoreObjectReference) {
+ public FileNodeUnion setFileDataStoreObjectReference(
+ FileDataStoreObjectReference fileDataStoreObjectReference) {
this.fileDataStoreObjectReference = fileDataStoreObjectReference;
return this;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FndStructureConstants.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FndStructureConstants.java
index 88f543e..74fb6cb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FndStructureConstants.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FndStructureConstants.java
@@ -25,10 +25,6 @@ package org.apache.tika.parser.microsoft.onenote;
* The value of each constant corresponds to the FileNodeID property for the file node.
*/
final class FndStructureConstants {
- private FndStructureConstants() {
- // no op
- }
-
static final long ObjectSpaceManifestRootFND = 0x04;
static final long ObjectSpaceManifestListReferenceFND = 0x08;
static final long ObjectSpaceManifestListStartFND = 0x0c;
@@ -44,24 +40,6 @@ final class FndStructureConstants {
static final long GlobalIdTableEntry2FNDX = 0x25;
static final long GlobalIdTableEntry3FNDX = 0x26;
static final long GlobalIdTableEndFNDX = 0x28;
-
- public static final class CanRevise {
- private CanRevise() {
- // no op
- }
-
- static final long ObjectDeclarationWithRefCountFNDX = 0x2d;
- static final long ObjectDeclarationWithRefCount2FNDX = 0x2e;
- static final long ObjectRevisionWithRefCountFNDX = 0x041;
- static final long ObjectRevisionWithRefCount2FNDX = 0x42;
- static final long ObjectDeclaration2RefCountFND = 0x0A4;
- static final long ObjectDeclaration2LargeRefCountFND = 0xA5;
- static final long ReadOnlyObjectDeclaration2RefCountFND = 0xc4;
- static final long ReadOnlyObjectDeclaration2LargeRefCountFND = 0xc5;
- static final long ObjectDeclarationFileData3RefCountFND = 0x72;
- static final long ObjectDeclarationFileData3LargeRefCountFND = 0x73;
- }
-
static final long RootObjectReference2FNDX = 0x59;
static final long RootObjectReference3FND = 0x5a; // each root object must have a differe
static final long RevisionRoleDeclarationFND = 0x5c;
@@ -75,9 +53,12 @@ final class FndStructureConstants {
static final long ObjectGroupStartFND = 0xb4;
static final long ObjectGroupEndFND = 0xb8;
static final long HashedChunkDescriptor2FND = 0xc2;
-
static final long ChunkTerminatorFND = 0xff;
+ private FndStructureConstants() {
+ // no op
+ }
+
static String nameOf(long type) {
switch (new Long(type).intValue()) {
case (int) ObjectSpaceManifestRootFND:
@@ -163,4 +144,21 @@ final class FndStructureConstants {
return "UnknownFND";
}
}
+
+ public static final class CanRevise {
+ static final long ObjectDeclarationWithRefCountFNDX = 0x2d;
+ static final long ObjectDeclarationWithRefCount2FNDX = 0x2e;
+ static final long ObjectRevisionWithRefCountFNDX = 0x041;
+ static final long ObjectRevisionWithRefCount2FNDX = 0x42;
+ static final long ObjectDeclaration2RefCountFND = 0x0A4;
+ static final long ObjectDeclaration2LargeRefCountFND = 0xA5;
+ static final long ReadOnlyObjectDeclaration2RefCountFND = 0xc4;
+ static final long ReadOnlyObjectDeclaration2LargeRefCountFND = 0xc5;
+ static final long ObjectDeclarationFileData3RefCountFND = 0x72;
+ static final long ObjectDeclarationFileData3LargeRefCountFND = 0x73;
+
+ private CanRevise() {
+ // no op
+ }
+ }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java
index 371e328..27f016b 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java
@@ -14,42 +14,71 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
-import org.apache.commons.lang3.StringUtils;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import java.util.Locale;
-class GUID implements Comparable<GUID> {
+import org.apache.commons.lang3.StringUtils;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitConverter;
+
+public class GUID implements Comparable<GUID> {
int[] guid;
+ public GUID(int[] guid) {
+ this.guid = guid;
+ }
+
/**
- * Converts a GUID of format: {AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE} (in bytes) to a GUID object.
+ * Converts a GUID of format: {AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE} (in bytes) to a GUID
+ * object.
*
- * @param guid The bytes that contain string in UTF-16 format of {AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE}
+ * @param guid The bytes that contains string in UTF-16 format of {AAAAAAAA-BBBB-CCCC-DDDD
+ * -EEEEEEEEEEEE}
* @return GUID object parsed from guid bytes.
*/
public static GUID fromCurlyBraceUTF16Bytes(byte[] guid) {
int[] intGuid = new int[16];
String utf16Str = new String(guid, StandardCharsets.UTF_16LE).replaceAll("\\{", "")
- .replaceAll("-", "").replaceAll("}", "");
+ .replaceAll("-", "").replaceAll("}", "");
for (int i = 0; i < utf16Str.length(); i += 2) {
- intGuid[i / 2] = Integer.parseUnsignedInt("" + utf16Str.charAt(i) + utf16Str.charAt(i + 1), 16);
+ intGuid[i / 2] =
+ Integer.parseUnsignedInt("" + utf16Str.charAt(i) + utf16Str.charAt(i + 1), 16);
}
return new GUID(intGuid);
}
+ public static int memcmp(int[] b1, int[] b2, int sz) {
+ for (int i = 0; i < sz; i++) {
+ if (b1[i] != b2[i]) {
+ if ((b1[i] >= 0 && b2[i] >= 0) || (b1[i] < 0 && b2[i] < 0)) {
+ return b1[i] - b2[i];
+ }
+ if (b1[i] < 0 && b2[i] >= 0) {
+ return 1;
+ }
+ if (b2[i] < 0 && b1[i] >= 0) {
+ return -1;
+ }
+ }
+ }
+ return 0;
+ }
+
+ public static GUID nil() {
+ return new GUID(new int[16]);
+ }
+
@Override
public int compareTo(GUID o) {
return memcmp(guid, o.guid, 16);
}
- public GUID(int[] guid) {
- this.guid = guid;
- }
-
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -67,23 +96,6 @@ class GUID implements Comparable<GUID> {
return Arrays.hashCode(guid);
}
- public static int memcmp(int b1[], int b2[], int sz) {
- for (int i = 0; i < sz; i++) {
- if (b1[i] != b2[i]) {
- if ((b1[i] >= 0 && b2[i] >= 0) || (b1[i] < 0 && b2[i] < 0)) {
- return b1[i] - b2[i];
- }
- if (b1[i] < 0 && b2[i] >= 0) {
- return 1;
- }
- if (b2[i] < 0 && b1[i] >= 0) {
- return -1;
- }
- }
- }
- return 0;
- }
-
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -111,10 +123,6 @@ class GUID implements Comparable<GUID> {
return sb.toString().toUpperCase(Locale.US);
}
- public static GUID nil() {
- return new GUID(new int[16]);
- }
-
public int[] getGuid() {
return guid;
}
@@ -127,4 +135,14 @@ class GUID implements Comparable<GUID> {
public String getGuidString() {
return guid.toString();
}
+
+ public List<Byte> toByteArray() {
+ List<Byte> byteList = new ArrayList<>();
+ for (int nextInt : guid) {
+ for (byte b : BitConverter.getBytes(nextInt)) {
+ byteList.add(b);
+ }
+ }
+ return byteList;
+ }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/IndentUtil.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/IndentUtil.java
index 50c381f..bb6d516 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/IndentUtil.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/IndentUtil.java
@@ -14,14 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
class IndentUtil {
public static String getIndent(int indentLevel) {
- String retval = "";
+ StringBuilder retval = new StringBuilder();
for (int i = 0; i < indentLevel; ++i) {
- retval += " ";
+ retval.append(" ");
}
- return retval;
+ return retval.toString();
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Int24.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Int24.java
index 8fd7133..034948a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Int24.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Int24.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
class Int24 {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
index 745ff55..26daccb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCID.java
@@ -14,11 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
+import org.apache.tika.exception.TikaException;
+
/**
- * The JCID structure specifies the type of object and the type of data the object contains. A JCID structure can be
- * considered to be an unsigned integer of size four bytes as specified by property set and
+ * The JCID structure specifies the type of object and the type of data the object contains.
+ * A JCID structure can be considered to be an unsigned integer of size four bytes as specified
+ * by property set and
* file data object.
*
* <pre>[0,15] - the index</pre>
@@ -31,17 +35,19 @@ package org.apache.tika.parser.microsoft.onenote;
* <p>
* index (2 bytes): An unsigned integer that specifies the type of object.
* <p>
- * A - IsBinary (1 bit): Specifies whether the object contains encryption data transmitted over the File Synchronization via SOAP over
- * HTTP Protocol, as specified in [MS-FSSHTTP].
+ * A - IsBinary (1 bit): Specifies whether the object contains encryption data transmitted over
+ * the File Synchronization via SOAP over HTTP Protocol, as specified in [MS-FSSHTTP].
* <p>
* B - IsPropertySet (1 bit): Specifies whether the object contains a property set.
* <p>
* C - IsGraphNode (1 bit): Undefined and MUST be ignored.
* <p>
- * D - IsFileData (1 bit): Specifies whether the object is a file data object. If the value of IsFileData is "true", then the values of
- * the IsBinary, IsPropertySet, IsGraphNode, and IsReadOnly fields MUST all be false.
+ * D - IsFileData (1 bit): Specifies whether the object is a file data object. If the value of
+ * IsFileData is "true", then the values of the IsBinary, IsPropertySet, IsGraphNode, and
+ * IsReadOnly fields MUST all be false.
* <p>
- * E - IsReadOnly (1 bit): Specifies whether the object's data MUST NOT be changed when the object is revised.
+ * E - IsReadOnly (1 bit): Specifies whether the object's data MUST NOT be changed when the
+ * object is revised.
* <p>
* reserved (11 bits): MUST be zero, and MUST be ignored.
*/
@@ -55,16 +61,18 @@ class JCID {
boolean isReadOnly;
/**
- * If the value of the JCID.IsPropertySet field is "true" or if only JCID.index is specified, then the data
- * for the Object Space Object structure MUST be an ObjectSpaceObjectPropSet structure.
+ * If the value of the JCID.IsPropertySet field is "true" or if only JCID.index is specified,
+ * then the data for the Object Space Object structure MUST be an ObjectSpaceObjectPropSet
+ * structure.
*
* @return true if is ObjectSpaceObjectPropSet. false otherwise.
*/
public boolean isObjectSpaceObjectPropSet() {
- return isPropertySet || !isBinary && !isGraphNode && !isFileData && !isReadOnly && index > 0;
+ return isPropertySet ||
+ !isBinary && !isGraphNode && !isFileData && !isReadOnly && index > 0;
}
- public void loadFrom32BitIndex(long fullIndex) {
+ public void loadFrom32BitIndex(long fullIndex) throws TikaException {
jcid = fullIndex;
index = fullIndex & 0xffff;
isBinary = ((fullIndex >> 16) & 1) == 1;
@@ -73,21 +81,16 @@ class JCID {
isFileData = ((fullIndex >> 19) & 1) == 1;
isReadOnly = ((fullIndex >> 20) & 1) == 1;
if ((fullIndex >> 21) != 0) {
- throw new RuntimeException("RESERVED_NONZERO");
+ throw new TikaException("RESERVED_NONZERO");
}
}
@Override
public String toString() {
- return "JCID{" +
- "jcid=" + JCIDPropertySetTypeEnum.of(jcid) + " (0x" + Long.toHexString(jcid) + ")" +
- ", index=" + index +
- ", isBinary=" + isBinary +
- ", isPropertySet=" + isPropertySet +
- ", isGraphNode=" + isGraphNode +
- ", isFileData=" + isFileData +
- ", isReadOnly=" + isReadOnly +
- '}';
+ return "JCID{" + "jcid=" + JCIDPropertySetTypeEnum.of(jcid) + " (0x" +
+ Long.toHexString(jcid) + ")" + ", index=" + index + ", isBinary=" + isBinary +
+ ", isPropertySet=" + isPropertySet + ", isGraphNode=" + isGraphNode +
+ ", isFileData=" + isFileData + ", isReadOnly=" + isReadOnly + '}';
}
public long getJcid() {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
index 4b30da0..6617bad 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/JCIDPropertySetTypeEnum.java
@@ -25,55 +25,39 @@ import java.util.Map;
* specification.
*/
enum JCIDPropertySetTypeEnum {
- jcidReadOnlyPersistablePropertyContainerForAuthor(0x00120001),
- jcidPersistablePropertyContainerForTOC(0x00020001),
- jcidPersistablePropertyContainerForTOCSection(0x00020001),
- jcidSectionNode(0x00060007),
- jcidPageSeriesNode(0x00060008),
- jcidPageNode(0x0006000B),
- jcidOutlineNode(0x0006000C),
- jcidOutlineElementNode(0x0006000D),
- jcidRichTextOENode(0x0006000E),
- jcidImageNode(0x00060011),
- jcidNumberListNode(0x00060012),
- jcidOutlineGroup(0x00060019),
- jcidTableNode(0x00060022),
- jcidTableRowNode(0x00060023),
- jcidTableCellNode(0x00060024),
- jcidTitleNode(0x0006002C),
- jcidPageMetaData(0x00020030),
- jcidSectionMetaData(0x00020031),
- jcidEmbeddedFileNode(0x00060035),
- jcidPageManifestNode(0x00060037),
- jcidConflictPageMetaData(0x00020038),
- jcidVersionHistoryContent(0x0006003C),
- jcidVersionProxy(0x0006003D),
- jcidNoteTagSharedDefinitionContainer(0x00120043),
- jcidRevisionMetaData(0x00020044),
- jcidVersionHistoryMetaData(0x00020046),
- jcidParagraphStyleObject(0x0012004D),
- jcidParagraphStyleObjectForText(0x0012004D),
- unknown(0x0);
-
- private long jcid;
-
- JCIDPropertySetTypeEnum(long jcid) {
- this.jcid = jcid;
- }
+ jcidReadOnlyPersistablePropertyContainerForAuthor(0x00120001),
+ jcidPersistablePropertyContainerForTOC(0x00020001),
+ jcidPersistablePropertyContainerForTOCSection(0x00020001), jcidSectionNode(0x00060007),
+ jcidPageSeriesNode(0x00060008), jcidPageNode(0x0006000B), jcidOutlineNode(0x0006000C),
+ jcidOutlineElementNode(0x0006000D), jcidRichTextOENode(0x0006000E), jcidImageNode(0x00060011),
+ jcidNumberListNode(0x00060012), jcidOutlineGroup(0x00060019), jcidTableNode(0x00060022),
+ jcidTableRowNode(0x00060023), jcidTableCellNode(0x00060024), jcidTitleNode(0x0006002C),
+ jcidPageMetaData(0x00020030), jcidSectionMetaData(0x00020031), jcidEmbeddedFileNode(0x00060035),
+ jcidPageManifestNode(0x00060037), jcidConflictPageMetaData(0x00020038),
+ jcidVersionHistoryContent(0x0006003C), jcidVersionProxy(0x0006003D),
+ jcidNoteTagSharedDefinitionContainer(0x00120043), jcidRevisionMetaData(0x00020044),
+ jcidVersionHistoryMetaData(0x00020046), jcidParagraphStyleObject(0x0012004D),
+ jcidParagraphStyleObjectForText(0x0012004D), unknown(0x0);
+
+ private static final Map<Long, JCIDPropertySetTypeEnum> BY_ID = new HashMap<>();
+
+ static {
+ for (JCIDPropertySetTypeEnum e : values()) {
+ BY_ID.put(e.jcid, e);
+ }
+ }
- private static final Map<Long, JCIDPropertySetTypeEnum> BY_ID = new HashMap<>();
+ private final long jcid;
- static {
- for (JCIDPropertySetTypeEnum e : values()) {
- BY_ID.put(e.jcid, e);
+ JCIDPropertySetTypeEnum(long jcid) {
+ this.jcid = jcid;
}
- }
- public static JCIDPropertySetTypeEnum of(Long id) {
- JCIDPropertySetTypeEnum result = BY_ID.get(id);
- if (result == null) {
- return unknown;
+ public static JCIDPropertySetTypeEnum of(Long id) {
+ JCIDPropertySetTypeEnum result = BY_ID.get(id);
+ if (result == null) {
+ return unknown;
+ }
+ return result;
}
- return result;
- }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCount.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCount.java
index f3831e5..e6f1771 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCount.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCount.java
@@ -21,20 +21,6 @@ class ObjectDeclarationWithRefCount {
ObjectSpaceObjectPropSet objectRef;
ObjectDeclarationWithRefCountBody body = new ObjectDeclarationWithRefCountBody();
long cRef;
-
- public static class ReadOnly {
- byte[] md5;
-
- public byte[] getMd5() {
- return md5;
- }
-
- public ReadOnly setMd5(byte[] md5) {
- this.md5 = md5;
- return this;
- }
- }
-
ReadOnly readOnly = new ReadOnly();
public ObjectSpaceObjectPropSet getObjectRef() {
@@ -72,4 +58,17 @@ class ObjectDeclarationWithRefCount {
this.readOnly = readOnly;
return this;
}
+
+ public static class ReadOnly {
+ byte[] md5;
+
+ public byte[] getMd5() {
+ return md5;
+ }
+
+ public ReadOnly setMd5(byte[] md5) {
+ this.md5 = md5;
+ return this;
+ }
+ }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCountBody.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCountBody.java
index 476aeb5..7b7edf5 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCountBody.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectDeclarationWithRefCountBody.java
@@ -66,7 +66,8 @@ class ObjectDeclarationWithRefCountBody {
return file_data_store_reference;
}
- public ObjectDeclarationWithRefCountBody setFile_data_store_reference(boolean file_data_store_reference) {
+ public ObjectDeclarationWithRefCountBody setFile_data_store_reference(
+ boolean file_data_store_reference) {
this.file_data_store_reference = file_data_store_reference;
return this;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectPropSet.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectPropSet.java
index d555fc9..bd6f28a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectPropSet.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectPropSet.java
@@ -16,10 +16,13 @@
*/
package org.apache.tika.parser.microsoft.onenote;
-class ObjectSpaceObjectPropSet {
- ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs oids = new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
- ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs osids = new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
- ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs contextIDs = new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
+public class ObjectSpaceObjectPropSet {
+ ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs oids =
+ new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
+ ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs osids =
+ new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
+ ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs contextIDs =
+ new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
PropertySet body = new PropertySet();
public ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs getOids() {
@@ -44,7 +47,8 @@ class ObjectSpaceObjectPropSet {
return contextIDs;
}
- public ObjectSpaceObjectPropSet setContextIDs(ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs contextIDs) {
+ public ObjectSpaceObjectPropSet setContextIDs(
+ ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs contextIDs) {
this.contextIDs = contextIDs;
return this;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java
index 458b69a..eb53fb7 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs.java
@@ -38,7 +38,8 @@ class ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs {
return extendedStreamsPresent;
}
- public ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs setExtendedStreamsPresent(long extendedStreamsPresent) {
+ public ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs setExtendedStreamsPresent(
+ long extendedStreamsPresent) {
this.extendedStreamsPresent = extendedStreamsPresent;
return this;
}
@@ -47,7 +48,8 @@ class ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs {
return osidsStreamNotPresent;
}
- public ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs setOsidsStreamNotPresent(long osidsStreamNotPresent) {
+ public ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs setOsidsStreamNotPresent(
+ long osidsStreamNotPresent) {
this.osidsStreamNotPresent = osidsStreamNotPresent;
return this;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDirectFileResource.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDirectFileResource.java
index 475b680..d1d71ea 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDirectFileResource.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDirectFileResource.java
@@ -24,16 +24,17 @@ import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
/**
- * This is copied mostly from the {@link org.apache.tika.parser.mp4.DirectFileReadDataSource}.
+ * This is copied mostly from the the
+ * former org.apache.tika.parser.mp4.DirectFileReadDataSource
* <p>
- * Implements a simple way to encapsulate a {@link org.apache.tika.io.TikaInputStream} that you will have to seek,read,repeat
- * while parsing OneNote contents.
+ * Implements a simple way to encapsulate a {@link org.apache.tika.io.TikaInputStream} that you
+ * will have to seek,read,repeat while parsing OneNote contents.
*/
class OneNoteDirectFileResource implements Closeable {
private static final int TRANSFER_SIZE = 8192;
- private RandomAccessFile raf;
+ private final RandomAccessFile raf;
public OneNoteDirectFileResource(File f) throws IOException {
this.raf = new RandomAccessFile(f, "r");
@@ -84,4 +85,4 @@ class OneNoteDirectFileResource implements Closeable {
raf.close();
}
-}
\ No newline at end of file
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
index 83402a2..a949b07 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
@@ -14,15 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import org.apache.commons.lang3.tuple.Pair;
+package org.apache.tika.parser.microsoft.onenote;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.commons.lang3.tuple.Pair;
+
class OneNoteDocument {
OneNoteHeader header;
List<ExtendedGUID> revisionListOrder = new ArrayList<>();
@@ -60,7 +61,8 @@ class OneNoteDocument {
currentRevision = fn.gosid;
}
- public void registerAdditionalRevisionRole(ExtendedGUID gosid, long revisionRole, ExtendedGUID gctxid) {
+ public void registerAdditionalRevisionRole(ExtendedGUID gosid, long revisionRole,
+ ExtendedGUID gctxid) {
revisionRoleMap.put(gosid, Pair.of(revisionRole, gctxid));
}
@@ -86,7 +88,8 @@ class OneNoteDocument {
return revisionManifestLists;
}
- public OneNoteDocument setRevisionManifestLists(Map<ExtendedGUID, FileNodePtr> revisionManifestLists) {
+ public OneNoteDocument setRevisionManifestLists(
+ Map<ExtendedGUID, FileNodePtr> revisionManifestLists) {
this.revisionManifestLists = revisionManifestLists;
return this;
}
@@ -113,7 +116,8 @@ class OneNoteDocument {
return revisionRoleMap;
}
- public OneNoteDocument setRevisionRoleMap(Map<ExtendedGUID, Pair<Long, ExtendedGUID>> revisionRoleMap) {
+ public OneNoteDocument setRevisionRoleMap(
+ Map<ExtendedGUID, Pair<Long, ExtendedGUID>> revisionRoleMap) {
this.revisionRoleMap = revisionRoleMap;
return this;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
index a6dc733..d93979f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteHeader.java
@@ -14,9 +14,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import org.apache.tika.exception.TikaException;
+package org.apache.tika.parser.microsoft.onenote;
import java.io.Serializable;
@@ -60,14 +59,15 @@ class OneNoteHeader implements Serializable {
long buildNumberLastWroteToFile;
long buildNumberOldestWritten;
long buildNumberNewestWritten;
+ private boolean legacyOrAlternativePackaging;
/**
- * Determine if this OneNote file pre-dates the open specs published by
- * microsoft.
+ * Determine if this file is saved in the OnPrem OneNote 2013 or greater spec.
+ *
* @return True if file is based on the MS-ONE and MS-ONESTORE specs. False otherwise.
*/
- public boolean isLegacy() {
- return !GUID.nil().equals(guidLegacyFileVersion);
+ public boolean isMsOneStoreFormat() {
+ return GUID.nil().equals(guidLegacyFileVersion);
}
public GUID getGuidFileType() {
@@ -119,7 +119,8 @@ class OneNoteHeader implements Serializable {
return ffvOldestCodeThatHasWrittenToThisFile;
}
- public OneNoteHeader setFfvOldestCodeThatHasWrittenToThisFile(long ffvOldestCodeThatHasWrittenToThisFile) {
+ public OneNoteHeader setFfvOldestCodeThatHasWrittenToThisFile(
+ long ffvOldestCodeThatHasWrittenToThisFile) {
this.ffvOldestCodeThatHasWrittenToThisFile = ffvOldestCodeThatHasWrittenToThisFile;
return this;
}
@@ -128,7 +129,8 @@ class OneNoteHeader implements Serializable {
return ffvNewestCodeThatHasWrittenToThisFile;
}
- public OneNoteHeader setFfvNewestCodeThatHasWrittenToThisFile(long ffvNewestCodeThatHasWrittenToThisFile) {
+ public OneNoteHeader setFfvNewestCodeThatHasWrittenToThisFile(
+ long ffvNewestCodeThatHasWrittenToThisFile) {
this.ffvNewestCodeThatHasWrittenToThisFile = ffvNewestCodeThatHasWrittenToThisFile;
return this;
}
@@ -137,7 +139,8 @@ class OneNoteHeader implements Serializable {
return ffvOldestCodeThatMayReadThisFile;
}
- public OneNoteHeader setFfvOldestCodeThatMayReadThisFile(long ffvOldestCodeThatMayReadThisFile) {
+ public OneNoteHeader setFfvOldestCodeThatMayReadThisFile(
+ long ffvOldestCodeThatMayReadThisFile) {
this.ffvOldestCodeThatMayReadThisFile = ffvOldestCodeThatMayReadThisFile;
return this;
}
@@ -191,7 +194,8 @@ class OneNoteHeader implements Serializable {
return fcrLegacyFileNodeListRoot;
}
- public OneNoteHeader setFcrLegacyFileNodeListRoot(FileChunkReference fcrLegacyFileNodeListRoot) {
+ public OneNoteHeader setFcrLegacyFileNodeListRoot(
+ FileChunkReference fcrLegacyFileNodeListRoot) {
this.fcrLegacyFileNodeListRoot = fcrLegacyFileNodeListRoot;
return this;
}
@@ -200,7 +204,8 @@ class OneNoteHeader implements Serializable {
return cbLegacyFreeSpaceInFreeChunkList;
}
- public OneNoteHeader setCbLegacyFreeSpaceInFreeChunkList(long cbLegacyFreeSpaceInFreeChunkList) {
+ public OneNoteHeader setCbLegacyFreeSpaceInFreeChunkList(
+ long cbLegacyFreeSpaceInFreeChunkList) {
this.cbLegacyFreeSpaceInFreeChunkList = cbLegacyFreeSpaceInFreeChunkList;
return this;
}
@@ -402,4 +407,13 @@ class OneNoteHeader implements Serializable {
this.buildNumberNewestWritten = buildNumberNewestWritten;
return this;
}
+
+ public boolean isLegacyOrAlternativePackaging() {
+ return legacyOrAlternativePackaging;
+ }
+
+ public OneNoteHeader setLegacyOrAlternativePackaging(boolean legacyOrAlternativePackaging) {
+ this.legacyOrAlternativePackaging = legacyOrAlternativePackaging;
+ return this;
+ }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteLegacyDumpStrings.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteLegacyDumpStrings.java
index bdcff02..a1bebf0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteLegacyDumpStrings.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteLegacyDumpStrings.java
@@ -16,21 +16,23 @@
*/
package org.apache.tika.parser.microsoft.onenote;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.SAXException;
-
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.SAXException;
+
/**
- * OneNote versions before OneNote 2010 do not have a published OpenSpec document, and the older formats are drastically
+ * OneNote versions before OneNote 2010 do not have a published OpenSpec document, and the older
+ * formats are drastically
* incompatible with the later OpenSpecs.
- * Therefore, we resort to scraping out useful ASCII and UTF16LE strings using a similar algorithm used by the GNU "strings"
+ * Therefore, we resort to scraping out useful ASCII and UTF16LE strings using a similar
+ * algorithm used by the GNU "strings"
* program.
- *
+ * <p>
* This is only needed for OneNote versions prior to 2010.
*/
class OneNoteLegacyDumpStrings {
@@ -44,13 +46,15 @@ class OneNoteLegacyDumpStrings {
OneNoteDirectFileResource oneNoteDirectFileResource;
XHTMLContentHandler xhtml;
- public OneNoteLegacyDumpStrings(OneNoteDirectFileResource oneNoteDirectFileResource, XHTMLContentHandler xhtml) {
+ public OneNoteLegacyDumpStrings(OneNoteDirectFileResource oneNoteDirectFileResource,
+ XHTMLContentHandler xhtml) {
this.oneNoteDirectFileResource = oneNoteDirectFileResource;
this.xhtml = xhtml;
}
/**
* Dump all "useful" Ascii and UTF16LE strings found in the file to the XHTMLContentHandler.
+ *
* @throws TikaException
* @throws SAXException
*/
@@ -60,7 +64,8 @@ class OneNoteLegacyDumpStrings {
}
/**
- * Based on GNU "strings" implementation. Pulls out ascii text segments and writes them to the XHTMLContentHandler.
+ * Based on GNU "strings" implementation. Pulls out ascii text segments and writes them to
+ * the XHTMLContentHandler.
*/
private void dumpAscii() throws SAXException, TikaException {
try {
@@ -73,7 +78,7 @@ class OneNoteLegacyDumpStrings {
if (sz - pos < BUFFER_SIZE) {
nextBufferSize = sz - pos;
}
- ByteBuffer byteBuffer = ByteBuffer.allocate((int)nextBufferSize);
+ ByteBuffer byteBuffer = ByteBuffer.allocate((int) nextBufferSize);
oneNoteDirectFileResource.read(byteBuffer);
for (long i = 0; i < nextBufferSize - 1; ++i) {
int b = byteBuffer.get((int) i);
@@ -94,8 +99,10 @@ class OneNoteLegacyDumpStrings {
throw new TikaException("Could not extract text from legacy OneNote document", e);
}
}
+
/**
- * Based on GNU "strings" implementation. Pulls out UTF16 LE text segments and writes them to the XHTMLContentHandler.
+ * Based on GNU "strings" implementation. Pulls out UTF16 LE text segments and writes them to
+ * the XHTMLContentHandler.
*/
private void dumpUtf16LE() throws SAXException, TikaException {
try {
@@ -114,15 +121,15 @@ class OneNoteLegacyDumpStrings {
if (sz - pos < bufSize) {
nextBufferSize = sz - pos;
}
- ByteBuffer byteBuffer = ByteBuffer.allocate((int)nextBufferSize);
+ ByteBuffer byteBuffer = ByteBuffer.allocate((int) nextBufferSize);
oneNoteDirectFileResource.read(byteBuffer);
+ for (long i = 0; i < nextBufferSize - 1; i++) {
+ int c1 = byteBuffer.get((int) i) & 0xff;
+ int c2 = byteBuffer.get((int) i + 1);
- for (long i = 0; i < nextBufferSize - 1; ++i) {
- int c1 = byteBuffer.get((int)i);
- int c2 = byteBuffer.get((int)i+1);
- if (c1 == 0x00 && c2 >= 0x20 && c2 < 0x7F) {
+ if (c2 == 0x00 && c1 >= 0x20) { // add this back? && c1 < 0x7F) {
++i;
- os.write(c2);
+ os.write(c1);
} else {
if (os.size() >= MIN_STRING_LENGTH) {
writeIfUseful(os);
@@ -138,14 +145,17 @@ class OneNoteLegacyDumpStrings {
throw new TikaException("Could not extract text from legacy OneNote document", e);
}
}
+
/**
- * Writes a buffer of output characters if the (num alpha chars in the buffer) / (number of chars in the buffer) >
+ * Writes a buffer of output characters if the (num alpha chars in the buffer) / (number of
+ * chars in the buffer) >
* ACCEPTABLE_ALPHA_TO_OTHER_CHAR_RATIO.
+ *
* @param os Byte array output stream containing the buffer.
*/
private void writeIfUseful(ByteArrayOutputStream os) throws SAXException {
- String str = new String(os.toByteArray(), StandardCharsets.US_ASCII);
- String [] spl = str.split(" ");
+ String str = new String(os.toByteArray(), StandardCharsets.ISO_8859_1);
+ String[] spl = str.split(" ");
if (spl.length > 1) {
int numAlpha = 0;
for (int i = 0; i < str.length(); ++i) {
@@ -160,4 +170,4 @@ class OneNoteLegacyDumpStrings {
}
}
}
-}
\ No newline at end of file
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
index 6c27505..69e4e2c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
@@ -14,31 +14,36 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
+import java.io.IOException;
+import java.io.InputStream;
+import java.time.Instant;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.MSOneStorePackage;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.MSOneStoreParser;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.AlternativePackaging;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.time.Instant;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
/**
* OneNote tika parser capable of parsing Microsoft OneNote files.
* <p>
@@ -46,20 +51,23 @@ import java.util.Set;
*/
public class OneNoteParser extends AbstractParser {
- private static final Map<MediaType, List<String>> typesMap = new HashMap<>();
+ public static final String ONE_NOTE_PREFIX = "onenote:";
+ private static final Map<MediaType, List<String>> TYPES_MAP = new HashMap<>();
/**
* Serial version UID
*/
private static final long serialVersionUID = -5504243905998074168L;
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.unmodifiableSet(TYPES_MAP.keySet());
static {
// All types should be 4 bytes long, space padded as needed
- typesMap.put(MediaType.application("onenote; format=one"), Arrays.asList("ONE "));
+ TYPES_MAP.put(MediaType.application("onenote; format=one"),
+ Collections.singletonList("ONE "));
// TODO - add onetoc and other onenote mime types
}
- private static final Set<MediaType> SUPPORTED_TYPES =
- Collections.unmodifiableSet(typesMap.keySet());
+ private final OneNoteTreeWalkerOptions options = new OneNoteTreeWalkerOptions();
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
@@ -67,105 +75,157 @@ public class OneNoteParser extends AbstractParser {
}
@Override
- public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
+ ParseContext context) throws IOException, SAXException, TikaException {
+ byte[] oneStoreFileBytes = IOUtils.toByteArray(stream);
try (TemporaryResources temporaryResources = new TemporaryResources();
- TikaInputStream tikaInputStream = TikaInputStream.get(stream, temporaryResources);
- OneNoteDirectFileResource oneNoteDirectFileResource = new OneNoteDirectFileResource(tikaInputStream.getFile())) {
-
- temporaryResources.addResource(oneNoteDirectFileResource);
+ TikaInputStream tikaInputStream = TikaInputStream.get(oneStoreFileBytes);
+ OneNoteDirectFileResource oneNoteDirectFileResource = new OneNoteDirectFileResource(
+ tikaInputStream.getFile())) {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
- OneNoteDocument oneNoteDocument = createOneNoteDocumentFromDirectFileResource(oneNoteDirectFileResource);
-
- if (!oneNoteDocument.header.isLegacy()) {
- metadata.set("buildNumberCreated", "0x" + Long.toHexString(oneNoteDocument.header.buildNumberCreated));
- metadata.set("buildNumberLastWroteToFile", "0x" + Long.toHexString(oneNoteDocument.header.buildNumberLastWroteToFile));
- metadata.set("buildNumberNewestWritten", "0x" + Long.toHexString(oneNoteDocument.header.buildNumberNewestWritten));
- metadata.set("buildNumberOldestWritten", "0x" + Long.toHexString(oneNoteDocument.header.buildNumberOldestWritten));
- metadata.set("cbExpectedFileLength", "0x" + Long.toHexString(oneNoteDocument.header.cbExpectedFileLength));
- metadata.set("cbFreeSpaceInFreeChunkList", "0x" + Long.toHexString(oneNoteDocument.header.cbFreeSpaceInFreeChunkList));
- metadata.set("cbLegacyExpectedFileLength", "0x" + Long.toHexString(oneNoteDocument.header.cbLegacyExpectedFileLength));
- metadata.set("cbLegacyFreeSpaceInFreeChunkList",
- "0x" + Long.toHexString(oneNoteDocument.header.cbLegacyFreeSpaceInFreeChunkList));
- metadata.set("crcName", "0x" + Long.toHexString(oneNoteDocument.header.crcName));
- metadata.set("cTransactionsInLog", "0x" + Long.toHexString(oneNoteDocument.header.cTransactionsInLog));
- metadata.set("ffvLastCodeThatWroteToThisFile", "0x" + Long.toHexString(oneNoteDocument.header.ffvLastCodeThatWroteToThisFile));
- metadata.set("ffvNewestCodeThatHasWrittenToThisFile", "0x" + Long.toHexString(oneNoteDocument.header.ffvNewestCodeThatHasWrittenToThisFile));
- metadata.set("ffvOldestCodeThatMayReadThisFile", "0x" + Long.toHexString(oneNoteDocument.header.ffvOldestCodeThatMayReadThisFile));
- metadata.set("ffvOldestCodeThatHasWrittenToThisFile", "0x" + Long.toHexString(oneNoteDocument.header.ffvOldestCodeThatHasWrittenToThisFile));
- metadata.set("grfDebugLogFlags", "0x" + Long.toHexString(oneNoteDocument.header.grfDebugLogFlags));
- metadata.set("nFileVersionGeneration", "0x" + Long.toHexString(oneNoteDocument.header.nFileVersionGeneration));
- metadata.set("rgbPlaceholder", "0x" + Long.toHexString(oneNoteDocument.header.rgbPlaceholder));
+ temporaryResources.addResource(oneNoteDirectFileResource);
+ OneNoteDocument oneNoteDocument =
+ createOneNoteDocumentFromDirectFileResource(oneNoteDirectFileResource);
+
+ OneNoteHeader header = oneNoteDocument.header;
+
+ if (header.isMsOneStoreFormat()) {
+ metadata.set(ONE_NOTE_PREFIX + "buildNumberCreated",
+ "0x" + Long.toHexString(oneNoteDocument.header.buildNumberCreated));
+ metadata.set(ONE_NOTE_PREFIX + "buildNumberLastWroteToFile",
+ "0x" + Long.toHexString(oneNoteDocument.header.buildNumberLastWroteToFile));
+ metadata.set(ONE_NOTE_PREFIX + "buildNumberNewestWritten",
+ "0x" + Long.toHexString(oneNoteDocument.header.buildNumberNewestWritten));
+ metadata.set(ONE_NOTE_PREFIX + "buildNumberOldestWritten",
+ "0x" + Long.toHexString(oneNoteDocument.header.buildNumberOldestWritten));
+ metadata.set(ONE_NOTE_PREFIX + "cbExpectedFileLength",
+ "0x" + Long.toHexString(oneNoteDocument.header.cbExpectedFileLength));
+ metadata.set(ONE_NOTE_PREFIX + "cbFreeSpaceInFreeChunkList",
+ "0x" + Long.toHexString(oneNoteDocument.header.cbFreeSpaceInFreeChunkList));
+ metadata.set(ONE_NOTE_PREFIX + "cbLegacyExpectedFileLength",
+ "0x" + Long.toHexString(oneNoteDocument.header.cbLegacyExpectedFileLength));
+ metadata.set(ONE_NOTE_PREFIX + "cbLegacyFreeSpaceInFreeChunkList", "0x" +
+ Long.toHexString(oneNoteDocument.header.cbLegacyFreeSpaceInFreeChunkList));
+ metadata.set(ONE_NOTE_PREFIX + "crcName", "0x" + Long.toHexString(oneNoteDocument.header.crcName));
+ metadata.set(ONE_NOTE_PREFIX + "cTransactionsInLog",
+ "0x" + Long.toHexString(oneNoteDocument.header.cTransactionsInLog));
+ metadata.set(ONE_NOTE_PREFIX + "ffvLastCodeThatWroteToThisFile", "0x" +
+ Long.toHexString(oneNoteDocument.header.ffvLastCodeThatWroteToThisFile));
+ metadata.set(ONE_NOTE_PREFIX + "ffvNewestCodeThatHasWrittenToThisFile", "0x" + Long.toHexString(
+ oneNoteDocument.header.ffvNewestCodeThatHasWrittenToThisFile));
+ metadata.set(ONE_NOTE_PREFIX + "ffvOldestCodeThatMayReadThisFile", "0x" +
+ Long.toHexString(oneNoteDocument.header.ffvOldestCodeThatMayReadThisFile));
+ metadata.set(ONE_NOTE_PREFIX + "ffvOldestCodeThatHasWrittenToThisFile", "0x" + Long.toHexString(
+ oneNoteDocument.header.ffvOldestCodeThatHasWrittenToThisFile));
+ metadata.set(ONE_NOTE_PREFIX + "grfDebugLogFlags",
+ "0x" + Long.toHexString(oneNoteDocument.header.grfDebugLogFlags));
+ metadata.set(ONE_NOTE_PREFIX + "nFileVersionGeneration",
+ "0x" + Long.toHexString(oneNoteDocument.header.nFileVersionGeneration));
+ metadata.set(ONE_NOTE_PREFIX + "rgbPlaceholder",
+ "0x" + Long.toHexString(oneNoteDocument.header.rgbPlaceholder));
Pair<Long, ExtendedGUID> roleAndContext = Pair.of(1L, ExtendedGUID.nil());
- OneNoteTreeWalker oneNoteTreeWalker = new OneNoteTreeWalker(
- new OneNoteTreeWalkerOptions(), oneNoteDocument,
- oneNoteDirectFileResource, xhtml, metadata, context, roleAndContext);
+ OneNoteTreeWalker oneNoteTreeWalker =
+ new OneNoteTreeWalker(options, oneNoteDocument, oneNoteDirectFileResource,
+ xhtml, metadata, context, roleAndContext);
oneNoteTreeWalker.walkTree();
if (!oneNoteTreeWalker.getAuthors().isEmpty()) {
- metadata.set(Property.externalTextBag("authors"), oneNoteTreeWalker.getAuthors().toArray(new String[] {}));
+ metadata.set(TikaCoreProperties.CREATOR,
+ oneNoteTreeWalker.getAuthors().toArray(new String[]{}));
}
if (!oneNoteTreeWalker.getMostRecentAuthors().isEmpty()) {
- metadata.set(Property.externalTextBag("mostRecentAuthors"), oneNoteTreeWalker.getMostRecentAuthors().toArray(new String[] {}));
+ metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "mostRecentAuthors"),
+ oneNoteTreeWalker.getMostRecentAuthors().toArray(new String[]{}));
}
if (!oneNoteTreeWalker.getOriginalAuthors().isEmpty()) {
- metadata.set(Property.externalTextBag("originalAuthors"), oneNoteTreeWalker.getOriginalAuthors().toArray(new String[] {}));
+ metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "originalAuthors"),
+ oneNoteTreeWalker.getOriginalAuthors().toArray(new String[]{}));
}
- if (!Instant.MAX.equals(oneNoteTreeWalker.getCreationTimestamp())) {
- metadata.set("creationTimestamp", String.valueOf(oneNoteTreeWalker.getCreationTimestamp()));
+ if (!Instant.MAX.equals(
+ Instant.ofEpochMilli(oneNoteTreeWalker.getCreationTimestamp()))) {
+ metadata.set(ONE_NOTE_PREFIX + "creationTimestamp",
+ String.valueOf(oneNoteTreeWalker.getCreationTimestamp()));
}
if (!Instant.MIN.equals(oneNoteTreeWalker.getLastModifiedTimestamp())) {
- metadata.set("lastModifiedTimestamp", String.valueOf(oneNoteTreeWalker.getLastModifiedTimestamp().toEpochMilli()));
+ metadata.set(ONE_NOTE_PREFIX + "lastModifiedTimestamp", String.valueOf(
+ oneNoteTreeWalker.getLastModifiedTimestamp().toEpochMilli()));
}
if (oneNoteTreeWalker.getLastModified() > Long.MIN_VALUE) {
- metadata.set("lastModified", String.valueOf(oneNoteTreeWalker.getLastModified()));
+ metadata.set(TikaCoreProperties.MODIFIED,
+ String.valueOf(oneNoteTreeWalker.getLastModified()));
+ }
+ } else if (header.isLegacyOrAlternativePackaging()) {
+ try {
+ AlternativePackaging alternatePackageOneStoreFile = new AlternativePackaging();
+ alternatePackageOneStoreFile.doDeserializeFromByteArray(oneStoreFileBytes, 0);
+
+ MSOneStoreParser onenoteParser = new MSOneStoreParser();
+ MSOneStorePackage pkg =
+ onenoteParser.parse(alternatePackageOneStoreFile.dataElementPackage);
+
+ pkg.walkTree(options, metadata, xhtml);
+ } catch (Exception e) {
+ OneNoteLegacyDumpStrings dumpStrings =
+ new OneNoteLegacyDumpStrings(oneNoteDirectFileResource, xhtml);
+ dumpStrings.dump();
}
} else {
- OneNoteLegacyDumpStrings dumpStrings = new OneNoteLegacyDumpStrings(oneNoteDirectFileResource, xhtml);
- dumpStrings.dump();
+ throw new TikaException("Invalid OneStore document - could not parse headers");
}
xhtml.endDocument();
}
+
+
}
/**
* Create a OneNoteDocument object.
* <p>
- * This won't actually have the binary data of any of the sections, but it's more of a metadata structure that contains
- * the general structure of the container and contains offset positions of where to find the binary data we care about.
+ * This won't actually have the binary data of the sections, but it's more of a
+ * metadata structure that contains
+ * the general structure of the container and contains offset positions of where to find the
+ * binary data we care about.
* <p>
* OneNote files are of format:
* <p>
- * The header (section 2.3.1 in MS-ONESTORE) is the first 1024 bytes of the file. It contains references to the other structures in the
+ * The header (section 2.3.1 in MS-ONESTORE) is the first 1024 bytes of the file. It contains
+ * references to the other structures in the
* file as well as metadata about the file.
- * The free chunk list (section 2.3.2 in MS-ONESTORE) defines where there are free spaces in the file where data can be written.
- * The transaction log (section 2.3.3 in MS-ONESTORE) stores the state and length of each file node list (section 2.4 in MS-ONESTORE)
+ * The free chunk list (section 2.3.2 in MS-ONESTORE) defines where there are free spaces in
+ * the file where data can be written.
+ * The transaction log (section 2.3.3 in MS-ONESTORE) stores the state and length of each
+ * file node list (section 2.4 in MS-ONESTORE)
* in the file.
- * The hashed chunk list (section 2.3.4 in MS-ONESTORE) stores read-only objects in the file that can be referenced by multiple
+ * The hashed chunk list (section 2.3.4 in MS-ONESTORE) stores read-only objects in the file
+ * that can be referenced by multiple
* revisions (section 2.1.8 in MS-ONESTORE).
- * The root file node list (section 2.1.14 in MS-ONESTORE) is the file node list that is the root of the tree of all file node lists in
+ * The root file node list (section 2.1.14 in MS-ONESTORE) is the file node list that is the
+ * root of the tree of all file node lists in
* the file.
* <p>
* In this method we first parse the header.
* <p>
* After parsing the header, this results in header.fcrFileNodeListRoot that points to the first
*
- * @param oneNoteDirectFileResource A random access file resource used as the source of the content.
- * @return A parsed one note document. This document does not contain any of the binary data, rather it just contains
+ * @param oneNoteDirectFileResource A random access file resource used as the source of the
+ * content.
+ * @return A parsed one note document. This document does not contain any of the binary data,
+ * rather it just contains
* the data pointers and metadata.
* @throws IOException Will throw IOException in typical IO issue situations.
*/
- public OneNoteDocument createOneNoteDocumentFromDirectFileResource(OneNoteDirectFileResource oneNoteDirectFileResource) throws IOException, TikaException {
+ public OneNoteDocument createOneNoteDocumentFromDirectFileResource(
+ OneNoteDirectFileResource oneNoteDirectFileResource) throws IOException, TikaException {
OneNoteDocument oneNoteDocument = new OneNoteDocument();
OneNotePtr oneNotePtr = new OneNotePtr(oneNoteDocument, oneNoteDirectFileResource);
// First parse out the header.
oneNoteDocument.header = oneNotePtr.deserializeHeader();
- if (!oneNoteDocument.header.isLegacy()) {
+ if (oneNoteDocument.header.isMsOneStoreFormat()) {
// Now that we parsed the header, the "root file node list"
oneNotePtr.reposition(oneNoteDocument.header.fcrFileNodeListRoot);
FileNodePtr curPath = new FileNodePtr();
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyEnum.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyEnum.java
index c47a5f7..f676b55 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyEnum.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyEnum.java
@@ -14,163 +14,70 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
import java.util.HashMap;
import java.util.Map;
@SuppressWarnings("unused")
-enum OneNotePropertyEnum {
- LayoutTightLayout(0x08001C00),
- PageWidth(0x14001C01),
- PageHeight(0x14001C02),
- OutlineElementChildLevel(0x0C001C03),
- Bold(0x08001C04),
- Italic(0x08001C05),
- Underline(0x08001C06),
- Strikethrough(0x08001C07),
- Superscript(0x08001C08),
- Subscript(0x08001C09),
- Font(0x1C001C0A),
- FontSize(0x10001C0B),
- FontColor(0x14001C0C),
- Highlight(0x14001C0D),
- RgOutlineIndentDistance(0x1C001C12),
- BodyTextAlignment(0x0C001C13),
- OffsetFromParentHoriz(0x14001C14),
- OffsetFromParentVert(0x14001C15),
- NumberListFormat(0x1C001C1A),
- LayoutMaxWidth(0x14001C1B),
- LayoutMaxHeight(0x14001C1C),
- ContentChildNodesOfOutlineElement(0x24001C1F),
- ContentChildNodesOfPageManifest(0x24001C1F),
- ElementChildNodesOfSection(0x24001C20),
- ElementChildNodesOfPage(0x24001C20),
- ElementChildNodesOfTitle(0x24001C20),
- ElementChildNodesOfOutline(0x24001C20),
- ElementChildNodesOfOutlineElement(0x24001C20),
- ElementChildNodesOfTable(0x24001C20),
- ElementChildNodesOfTableRow(0x24001C20),
- ElementChildNodesOfTableCell(0x24001C20),
- ElementChildNodesOfVersionHistory(0x24001C20),
- EnableHistory(0x08001E1E),
- RichEditTextUnicode(0x1C001C22),
- ListNodes(0x24001C26),
- NotebookManagementEntityGuid(0x1C001C30),
- OutlineElementRTL(0x08001C34),
- LanguageID(0x14001C3B),
- LayoutAlignmentInParent(0x14001C3E),
- PictureContainer(0x20001C3F),
- PageMarginTop(0x14001C4C),
- PageMarginBottom(0x14001C4D),
- PageMarginLeft(0x14001C4E),
- PageMarginRight(0x14001C4F),
- ListFont(0x1C001C52),
- TopologyCreationTimeStamp(0x18001C65),
- LayoutAlignmentSelf(0x14001C84),
- IsTitleTime(0x08001C87),
- IsBoilerText(0x08001C88),
- PageSize(0x14001C8B),
- PortraitPage(0x08001C8E),
- EnforceOutlineStructure(0x08001C91),
- EditRootRTL(0x08001C92),
- CannotBeSelected(0x08001CB2),
- IsTitleText(0x08001CB4),
- IsTitleDate(0x08001CB5),
- ListRestart(0x14001CB7),
- IsLayoutSizeSetByUser(0x08001CBD),
- ListSpacingMu(0x14001CCB),
- LayoutOutlineReservedWidth(0x14001CDB),
- LayoutResolveChildCollisions(0x08001CDC),
- IsReadOnly(0x08001CDE),
- LayoutMinimumOutlineWidth(0x14001CEC),
- LayoutCollisionPriority(0x14001CF1),
- CachedTitleString(0x1C001CF3),
- DescendantsCannotBeMoved(0x08001CF9),
- RichEditTextLangID(0x10001CFE),
- LayoutTightAlignment(0x08001CFF),
- Charset(0x0C001D01),
- CreationTimeStamp(0x14001D09),
- Deletable(0x08001D0C),
- ListMSAAIndex(0x10001D0E),
- IsBackground(0x08001D13),
- IRecordMedia(0x14001D24),
- CachedTitleStringFromPage(0x1C001D3C),
- RowCount(0x14001D57),
- ColumnCount(0x14001D58),
- TableBordersVisible(0x08001D5E),
- StructureElementChildNodes(0x24001D5F),
- ChildGraphSpaceElementNodes(0x2C001D63),
- TableColumnWidths(0x1C001D66),
- Author(0x1C001D75),
- LastModifiedTimeStamp(0x18001D77),
- AuthorOriginal(0x20001D78),
- AuthorMostRecent(0x20001D79),
- LastModifiedTime(0x14001D7A),
- IsConflictPage(0x08001D7C),
- TableColumnsLocked(0x1C001D7D),
- SchemaRevisionInOrderToRead(0x14001D82),
- IsConflictObjectForRender(0x08001D96),
- EmbeddedFileContainer(0x20001D9B),
- EmbeddedFileName(0x1C001D9C),
- SourceFilepath(0x1C001D9D),
- ConflictingUserName(0x1C001D9E),
- ImageFilename(0x1C001DD7),
- IsConflictObjectForSelection(0x08001DDB),
- PageLevel(0x14001DFF),
- TextRunIndex(0x1C001E12),
- TextRunFormatting(0x24001E13),
- Hyperlink(0x08001E14),
- UnderlineType(0x0C001E15),
- Hidden(0x08001E16),
- HyperlinkProtected(0x08001E19),
- TextRunIsEmbeddedObject(0x08001E22),
- ImageAltText(0x1C001E58),
- MathFormatting(0x08003401),
- ParagraphStyle(0x2000342C),
- ParagraphSpaceBefore(0x1400342E),
- ParagraphSpaceAfter(0x1400342F),
- ParagraphLineSpacingExact(0x14003430),
- MetaDataObjectsAboveGraphSpace(0x24003442),
- TextRunDataObject(0x24003458),
- TextRunData(0x40003499),
- ParagraphStyleId(0x1C00345A),
- HasVersionPages(0x08003462),
- ActionItemType(0x10003463),
- NoteTagShape(0x10003464),
- NoteTagHighlightColor(0x14003465),
- NoteTagTextColor(0x14003466),
- NoteTagPropertyStatus(0x14003467),
- NoteTagLabel(0x1C003468),
- NoteTagCreated(0x1400346E),
- NoteTagCompleted(0x1400346F),
- NoteTagDefinitionOid(0x20003488),
- NoteTagStates(0x04003489),
- ActionItemStatus(0x10003470),
- ActionItemSchemaVersion(0x0C003473),
- ReadingOrderRTL(0x08003476),
- ParagraphAlignment(0x0C003477),
- VersionHistoryGraphSpaceContextNodes(0x3400347B),
- DisplayedPageNumber(0x14003480),
- SectionDisplayName(0x1C00349B),
- NextStyle(0x1C00348A),
- WebPictureContainer14(0x200034C8),
- ImageUploadState(0x140034CB),
- TextExtendedAscii(0x1C003498),
- PictureWidth(0x140034CD),
- PictureHeight(0x140034CE),
- PageMarginOriginX(0x14001D0F),
- PageMarginOriginY(0x14001D10),
- WzHyperlinkUrl(0x1C001E20),
- TaskTagDueDate(0x1400346B),
+public enum OneNotePropertyEnum {
+ LayoutTightLayout(0x08001C00), PageWidth(0x14001C01), PageHeight(0x14001C02),
+ OutlineElementChildLevel(0x0C001C03), Bold(0x08001C04), Italic(0x08001C05),
+ Underline(0x08001C06), Strikethrough(0x08001C07), Superscript(0x08001C08),
+ Subscript(0x08001C09), Font(0x1C001C0A), FontSize(0x10001C0B), FontColor(0x14001C0C),
+ Highlight(0x14001C0D), RgOutlineIndentDistance(0x1C001C12), BodyTextAlignment(0x0C001C13),
+ OffsetFromParentHoriz(0x14001C14), OffsetFromParentVert(0x14001C15),
+ NumberListFormat(0x1C001C1A), LayoutMaxWidth(0x14001C1B), LayoutMaxHeight(0x14001C1C),
+ ContentChildNodesOfOutlineElement(0x24001C1F), ContentChildNodesOfPageManifest(0x24001C1F),
+ ElementChildNodesOfSection(0x24001C20), ElementChildNodesOfPage(0x24001C20),
+ ElementChildNodesOfTitle(0x24001C20), ElementChildNodesOfOutline(0x24001C20),
+ ElementChildNodesOfOutlineElement(0x24001C20), ElementChildNodesOfTable(0x24001C20),
+ ElementChildNodesOfTableRow(0x24001C20), ElementChildNodesOfTableCell(0x24001C20),
+ ElementChildNodesOfVersionHistory(0x24001C20), EnableHistory(0x08001E1E),
+ RichEditTextUnicode(0x1C001C22), ListNodes(0x24001C26),
+ NotebookManagementEntityGuid(0x1C001C30), OutlineElementRTL(0x08001C34), LanguageID(0x14001C3B),
+ LayoutAlignmentInParent(0x14001C3E), PictureContainer(0x20001C3F), PageMarginTop(0x14001C4C),
+ PageMarginBottom(0x14001C4D), PageMarginLeft(0x14001C4E), PageMarginRight(0x14001C4F),
+ ListFont(0x1C001C52), TopologyCreationTimeStamp(0x18001C65), LayoutAlignmentSelf(0x14001C84),
+ IsTitleTime(0x08001C87), IsBoilerText(0x08001C88), PageSize(0x14001C8B),
+ PortraitPage(0x08001C8E), EnforceOutlineStructure(0x08001C91), EditRootRTL(0x08001C92),
+ CannotBeSelected(0x08001CB2), IsTitleText(0x08001CB4), IsTitleDate(0x08001CB5),
+ ListRestart(0x14001CB7), IsLayoutSizeSetByUser(0x08001CBD), ListSpacingMu(0x14001CCB),
+ LayoutOutlineReservedWidth(0x14001CDB), LayoutResolveChildCollisions(0x08001CDC),
+ IsReadOnly(0x08001CDE), LayoutMinimumOutlineWidth(0x14001CEC),
+ LayoutCollisionPriority(0x14001CF1), CachedTitleString(0x1C001CF3),
+ DescendantsCannotBeMoved(0x08001CF9), RichEditTextLangID(0x10001CFE),
+ LayoutTightAlignment(0x08001CFF), Charset(0x0C001D01), CreationTimeStamp(0x14001D09),
+ Deletable(0x08001D0C), ListMSAAIndex(0x10001D0E), IsBackground(0x08001D13),
+ IRecordMedia(0x14001D24), CachedTitleStringFromPage(0x1C001D3C), RowCount(0x14001D57),
+ ColumnCount(0x14001D58), TableBordersVisible(0x08001D5E),
+ StructureElementChildNodes(0x24001D5F), ChildGraphSpaceElementNodes(0x2C001D63),
+ TableColumnWidths(0x1C001D66), Author(0x1C001D75), LastModifiedTimeStamp(0x18001D77),
+ AuthorOriginal(0x20001D78), AuthorMostRecent(0x20001D79), LastModifiedTime(0x14001D7A),
+ IsConflictPage(0x08001D7C), TableColumnsLocked(0x1C001D7D),
+ SchemaRevisionInOrderToRead(0x14001D82), IsConflictObjectForRender(0x08001D96),
+ EmbeddedFileContainer(0x20001D9B), EmbeddedFileName(0x1C001D9C), SourceFilepath(0x1C001D9D),
+ ConflictingUserName(0x1C001D9E), ImageFilename(0x1C001DD7),
+ IsConflictObjectForSelection(0x08001DDB), PageLevel(0x14001DFF), TextRunIndex(0x1C001E12),
+ TextRunFormatting(0x24001E13), Hyperlink(0x08001E14), UnderlineType(0x0C001E15),
+ Hidden(0x08001E16), HyperlinkProtected(0x08001E19), TextRunIsEmbeddedObject(0x08001E22),
+ ImageAltText(0x1C001E58), MathFormatting(0x08003401), ParagraphStyle(0x2000342C),
+ ParagraphSpaceBefore(0x1400342E), ParagraphSpaceAfter(0x1400342F),
+ ParagraphLineSpacingExact(0x14003430), MetaDataObjectsAboveGraphSpace(0x24003442),
+ TextRunDataObject(0x24003458), TextRunData(0x40003499), ParagraphStyleId(0x1C00345A),
+ HasVersionPages(0x08003462), ActionItemType(0x10003463), NoteTagShape(0x10003464),
+ NoteTagHighlightColor(0x14003465), NoteTagTextColor(0x14003466),
+ NoteTagPropertyStatus(0x14003467), NoteTagLabel(0x1C003468), NoteTagCreated(0x1400346E),
+ NoteTagCompleted(0x1400346F), NoteTagDefinitionOid(0x20003488), NoteTagStates(0x04003489),
+ ActionItemStatus(0x10003470), ActionItemSchemaVersion(0x0C003473), ReadingOrderRTL(0x08003476),
+ ParagraphAlignment(0x0C003477), VersionHistoryGraphSpaceContextNodes(0x3400347B),
+ DisplayedPageNumber(0x14003480), SectionDisplayName(0x1C00349B), NextStyle(0x1C00348A),
+ WebPictureContainer14(0x200034C8), ImageUploadState(0x140034CB), TextExtendedAscii(0x1C003498),
+ PictureWidth(0x140034CD), PictureHeight(0x140034CE), PageMarginOriginX(0x14001D0F),
+ PageMarginOriginY(0x14001D10), WzHyperlinkUrl(0x1C001E20), TaskTagDueDate(0x1400346B),
Unknown(0x00000000);
- private long id;
-
- OneNotePropertyEnum(long id) {
- this.id = id;
- }
-
private static final Map<Long, OneNotePropertyEnum> BY_ID = new HashMap<>();
static {
@@ -179,6 +86,12 @@ enum OneNotePropertyEnum {
}
}
+ private final long id;
+
+ OneNotePropertyEnum(long id) {
+ this.id = id;
+ }
+
public static OneNotePropertyEnum of(Long id) {
OneNotePropertyEnum result = BY_ID.get(id);
if (result == null) {
@@ -202,7 +115,7 @@ enum OneNotePropertyEnum {
inlineBool = ((pid >> 31) & 0x1) > 0; // set the bool value from header
} else {
if (((pid >> 31) & 0x1) > 0) {
- throw new RuntimeException("Reserved non-zero");
+ throw new IllegalArgumentException("Reserved non-zero");
}
}
return inlineBool;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyId.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyId.java
index 661b03e..6491a93 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyId.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePropertyId.java
@@ -14,8 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
+import org.apache.tika.exception.TikaException;
+
class OneNotePropertyId {
OneNotePropertyEnum propertyEnum;
long pid;
@@ -25,7 +28,7 @@ class OneNotePropertyId {
public OneNotePropertyId() {
}
- public OneNotePropertyId(long pid) {
+ public OneNotePropertyId(long pid) throws TikaException {
this.pid = pid;
propertyEnum = OneNotePropertyEnum.of(pid);
type = pid >> 26 & 0x1f;
@@ -34,7 +37,7 @@ class OneNotePropertyId {
inlineBool = ((pid >> 31) & 0x1) > 0; // set the bool value from header
} else {
if (((pid >> 31) & 0x1) > 0) {
- throw new RuntimeException("Reserved non-zero");
+ throw new TikaException("Reserved non-zero");
}
}
}
@@ -77,10 +80,7 @@ class OneNotePropertyId {
@Override
public String toString() {
- return "{" + propertyEnum +
- ", pid=0x" + Long.toHexString(pid) +
- ", type=0x" + Long.toHexString(type) +
- ", inlineBool=" + inlineBool +
- '}';
+ return "{" + propertyEnum + ", pid=0x" + Long.toHexString(pid) + ", type=0x" +
+ Long.toHexString(type) + ", inlineBool=" + inlineBool + '}';
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
index 0dc13f3..ce0d20b 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java
@@ -17,13 +17,6 @@
package org.apache.tika.parser.microsoft.onenote;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.io.EndianUtils;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.exception.TikaMemoryLimitException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
@@ -33,63 +26,35 @@ import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.io.EndianUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
- * This is the main class used during parsing. This will contain an offset position and end position for reading bytes
- * from the byte stream.
+ * This is the main class used during parsing. This will contain an offset position and end
+ * position for reading bytes from the byte stream.
* <p>
- * It contains all the deserialize methods used to read the different data elements from a one note file.
+ * It contains all the deserialize methods used to read the different data elements from a one
+ * note file.
* <p>
- * You can construct a new one note pointer and it will reposition the byte channel and will read until
+ * You can construct a new one note pointer and it will reposition the byte channel and will
+ * read until
*/
class OneNotePtr {
- private static final Logger LOG = LoggerFactory.getLogger(OneNoteParser.class);
-
public static final long FOOTER_CONST = 0x8BC215C38233BA4BL;
public static final String UNKNOWN = "unknown";
- private static final byte[] IFNDF = new byte[] {
- 60, 0, 105, 0, 102, 0, 110, 0, 100, 0, 102, 0, 62, 0
- };
-
- private static final GUID FILE_DATA_STORE_OBJ_HEADER = new GUID(new int[] {
- 0xBD,
- 0xE3,
- 0x16,
- 0xE7,
- 0x26,
- 0x65,
- 0x45,
- 0x11,
- 0xA4,
- 0xC4,
- 0x8D,
- 0x4D,
- 0x0B,
- 0x7A,
- 0x9E,
- 0xAC
- });
-
- private static final GUID FILE_DATA_STORE_OBJ_FOOTER = new GUID(new int[] {
- 0x71,
- 0xFB,
- 0xA7,
- 0x22,
- 0x0F,
- 0x79,
- 0x4A,
- 0x0B,
- 0xBB,
- 0x13,
- 0x89,
- 0x92,
- 0x56,
- 0x42,
- 0x6B,
- 0x24});
-
public static final int IFNDF_GUID_LENGTH = 38; // 36 char guid with a { and a } char.
public static final int NUM_RESERVED_BYTES_AT_END_OF_HEADER = 728;
+ private static final Logger LOG = LoggerFactory.getLogger(OneNoteParser.class);
+ private static final byte[] IFNDF =
+ new byte[]{60, 0, 105, 0, 102, 0, 110, 0, 100, 0, 102, 0, 62, 0};
+ private static final String PACKAGE_STORAGE_FILE_FORMAT_GUID =
+ "{638DE92F-A6D4-4BC1-9A36-B3FC2511A5B7}";
+
int indentLevel = 0;
long offset;
@@ -98,7 +63,8 @@ class OneNotePtr {
OneNoteDocument document;
OneNoteDirectFileResource dif;
- public OneNotePtr(OneNoteDocument document, OneNoteDirectFileResource oneNoteDirectFileResource) throws IOException {
+ public OneNotePtr(OneNoteDocument document, OneNoteDirectFileResource oneNoteDirectFileResource)
+ throws IOException {
this.document = document;
this.dif = oneNoteDirectFileResource;
offset = oneNoteDirectFileResource.position();
@@ -115,44 +81,45 @@ class OneNotePtr {
public OneNoteHeader deserializeHeader() throws IOException, TikaException {
OneNoteHeader data = new OneNoteHeader();
- data.setGuidFileType(deserializeGUID())
- .setGuidFile(deserializeGUID())
- .setGuidLegacyFileVersion(deserializeGUID())
- .setGuidFileFormat(deserializeGUID())
- .setFfvLastCodeThatWroteToThisFile(deserializeLittleEndianInt())
- .setFfvOldestCodeThatHasWrittenToThisFile(deserializeLittleEndianInt())
- .setFfvNewestCodeThatHasWrittenToThisFile(deserializeLittleEndianInt())
- .setFfvOldestCodeThatMayReadThisFile(deserializeLittleEndianInt())
- .setFcrLegacyFreeChunkList(deserializeFileChunkReference64())
- .setFcrLegacyTransactionLog(deserializeFileChunkReference64())
- .setcTransactionsInLog(deserializeLittleEndianInt())
- .setCbExpectedFileLength(deserializeLittleEndianInt())
- .setRgbPlaceholder(deserializeLittleEndianLong())
- .setFcrLegacyFileNodeListRoot(deserializeFileChunkReference64())
- .setCbLegacyFreeSpaceInFreeChunkList(deserializeLittleEndianInt())
- .setIgnoredZeroA(deserializeLittleEndianChar())
- .setIgnoredZeroB(deserializeLittleEndianChar())
- .setIgnoredZeroC(deserializeLittleEndianChar())
- .setIgnoredZeroD(deserializeLittleEndianChar())
- .setGuidAncestor(deserializeGUID())
- .setCrcName(deserializeLittleEndianInt())
- .setFcrHashedChunkList(deserializeFileChunkReference64x32())
- .setFcrTransactionLog(deserializeFileChunkReference64x32())
- .setFcrFileNodeListRoot(deserializeFileChunkReference64x32())
- .setFcrFreeChunkList(deserializeFileChunkReference64x32())
- .setCbExpectedFileLength(deserializeLittleEndianLong())
- .setCbFreeSpaceInFreeChunkList(deserializeLittleEndianLong())
- .setGuidFileVersion(deserializeGUID())
- .setnFileVersionGeneration(deserializeLittleEndianLong())
- .setGuidDenyReadFileVersion(deserializeGUID())
- .setGrfDebugLogFlags(deserializeLittleEndianInt())
- .setFcrDebugLogA(deserializeFileChunkReference64x32())
- .setFcrDebugLogB(deserializeFileChunkReference64x32())
- .setBuildNumberCreated(deserializeLittleEndianInt())
- .setBuildNumberLastWroteToFile(deserializeLittleEndianInt())
- .setBuildNumberOldestWritten(deserializeLittleEndianInt())
- .setBuildNumberNewestWritten(deserializeLittleEndianInt());
- ByteBuffer reservedBytesAtEndOfHeader = ByteBuffer.allocate(NUM_RESERVED_BYTES_AT_END_OF_HEADER);
+ data.setGuidFileType(deserializeGUID()).setGuidFile(deserializeGUID())
+ .setGuidLegacyFileVersion(deserializeGUID()).setGuidFileFormat(deserializeGUID())
+ .setFfvLastCodeThatWroteToThisFile(deserializeLittleEndianInt())
+ .setFfvOldestCodeThatHasWrittenToThisFile(deserializeLittleEndianInt())
+ .setFfvNewestCodeThatHasWrittenToThisFile(deserializeLittleEndianInt())
+ .setFfvOldestCodeThatMayReadThisFile(deserializeLittleEndianInt())
+ .setFcrLegacyFreeChunkList(deserializeFileChunkReference64())
+ .setFcrLegacyTransactionLog(deserializeFileChunkReference64())
+ .setcTransactionsInLog(deserializeLittleEndianInt())
+ .setCbExpectedFileLength(deserializeLittleEndianInt())
+ .setRgbPlaceholder(deserializeLittleEndianLong())
+ .setFcrLegacyFileNodeListRoot(deserializeFileChunkReference64())
+ .setCbLegacyFreeSpaceInFreeChunkList(deserializeLittleEndianInt())
+ .setIgnoredZeroA(deserializeLittleEndianChar())
+ .setIgnoredZeroB(deserializeLittleEndianChar())
+ .setIgnoredZeroC(deserializeLittleEndianChar())
+ .setIgnoredZeroD(deserializeLittleEndianChar()).setGuidAncestor(deserializeGUID())
+ .setCrcName(deserializeLittleEndianInt())
+ .setFcrHashedChunkList(deserializeFileChunkReference64x32())
+ .setFcrTransactionLog(deserializeFileChunkReference64x32())
+ .setFcrFileNodeListRoot(deserializeFileChunkReference64x32())
+ .setFcrFreeChunkList(deserializeFileChunkReference64x32())
+ .setCbExpectedFileLength(deserializeLittleEndianLong())
+ .setCbFreeSpaceInFreeChunkList(deserializeLittleEndianLong())
+ .setGuidFileVersion(deserializeGUID())
+ .setnFileVersionGeneration(deserializeLittleEndianLong())
+ .setGuidDenyReadFileVersion(deserializeGUID())
+ .setGrfDebugLogFlags(deserializeLittleEndianInt())
+ .setFcrDebugLogA(deserializeFileChunkReference64x32())
+ .setFcrDebugLogB(deserializeFileChunkReference64x32())
+ .setBuildNumberCreated(deserializeLittleEndianInt())
+ .setBuildNumberLastWroteToFile(deserializeLittleEndianInt())
+ .setBuildNumberOldestWritten(deserializeLittleEndianInt())
+ .setBuildNumberNewestWritten(deserializeLittleEndianInt());
+ if (data.getGuidFileFormat().toString().equals(PACKAGE_STORAGE_FILE_FORMAT_GUID)) {
+ return data.setLegacyOrAlternativePackaging(true);
+ }
+ ByteBuffer reservedBytesAtEndOfHeader =
+ ByteBuffer.allocate(NUM_RESERVED_BYTES_AT_END_OF_HEADER);
deserializeBytes(reservedBytesAtEndOfHeader);
return data;
}
@@ -250,18 +217,17 @@ class OneNotePtr {
}
int c1 = dif.read();
int c2 = dif.read();
- long res = (((c1 & 0xff) << 0) +
- ((c2 & 0xff) << 8));
+ long res = (((c1 & 0xff)) + ((c2 & 0xff) << 8));
offset = dif.position();
return res;
}
private String getIndent() {
- String retval = "";
+ StringBuilder retval = new StringBuilder();
for (int i = 0; i < indentLevel; ++i) {
- retval += " ";
+ retval.append(" ");
}
- return retval;
+ return retval.toString();
}
public void reposition(FileChunkReference loc) throws IOException {
@@ -279,20 +245,21 @@ class OneNotePtr {
* <p>
* A file node list can be divided into one or more FileNodeListFragment
* structures. Each fragment can specify whether there are more fragments in the list and
- * the location of the next fragment. Each fragment specifies a sub-sequence of FileNode structures
- * from the file node list.
+ * the location of the next fragment. Each fragment specifies a sub-sequence
+ * of FileNode structures from the file node list.
* <p>
- * When specifying the structure of a specific file node list in this document, the division of the list into
- * fragments is ignored and FileNode structures with FileNode.FileNodeID field values equal to 0x0FF
- * ("ChunkTerminatorFND") are not specified.
+ * When specifying the structure of a specific file node list in this document, the division
+ * of the list into fragments is ignored and FileNode structures with FileNode.FileNodeID
+ * field values equal to 0x0FF ("ChunkTerminatorFND") are not specified.
*
* @param ptr The current OneNotePtr we are at currently.
* @param fileNodeList The file node list to populate as we parse.
* @param curPath The current FileNodePtr.
* @return The resulting one note pointer after node lists are all parsed.
*/
- public OneNotePtr internalDeserializeFileNodeList(OneNotePtr ptr, FileNodeList fileNodeList, FileNodePtr curPath) throws IOException,
- TikaException {
+ public OneNotePtr internalDeserializeFileNodeList(OneNotePtr ptr, FileNodeList fileNodeList,
+ FileNodePtr curPath)
+ throws IOException, TikaException {
OneNotePtr localPtr = new OneNotePtr(document, dif);
FileNodePtrBackPush bp = new FileNodePtrBackPush(curPath);
try {
@@ -312,24 +279,27 @@ class OneNotePtr {
}
- public OneNotePtr deserializeFileNodeList(FileNodeList fileNodeList, FileNodePtr curPath) throws IOException, TikaException {
+ public OneNotePtr deserializeFileNodeList(FileNodeList fileNodeList, FileNodePtr curPath)
+ throws IOException, TikaException {
return internalDeserializeFileNodeList(this, fileNodeList, curPath);
}
/**
* Deserializes a FileNodeListFragment.
* <p>
- * The FileNodeListFragment structure specifies a sequence of file nodes from a file node list. The size of the
- * FileNodeListFragment structure is specified by the structure that references it.
+ * The FileNodeListFragment structure specifies a sequence of file nodes from a file node
+ * list. The size of the FileNodeListFragment structure is specified by the structure that
+ * references it.
* <p>
- * All fragments in the same file node list MUST have the same FileNodeListFragment.header.FileNodeListID field.
+ * All fragments in the same file node list MUST have the same FileNodeListFragment.header
+ * .FileNodeListID field.
*
* @param data List of file nodes that we collect while deserializing.
* @param next The next file chunk we are referencing.
* @param curPath The current FileNodePtr.
*/
- void deserializeFileNodeListFragment(FileNodeList data, FileChunkReference next, FileNodePtr curPath) throws IOException,
- TikaException {
+ void deserializeFileNodeListFragment(FileNodeList data, FileChunkReference next,
+ FileNodePtr curPath) throws IOException, TikaException {
data.fileNodeListHeader = deserializeFileNodeListHeader();
boolean terminated = false;
while (offset + 24 <= end) { // while there are at least 24 bytes free
@@ -338,7 +308,8 @@ class OneNotePtr {
CheckedFileNodePushBack pushBack = new CheckedFileNodePushBack(data);
try {
long initialOffset = offset;
- FileNode fileNode = deserializeFileNode(data.children.get(data.children.size() - 1), curPath);
+ FileNode fileNode =
+ deserializeFileNode(data.children.get(data.children.size() - 1), curPath);
if (initialOffset == offset) {
//nothing read; avoid an infinite loop
break;
@@ -350,9 +321,12 @@ class OneNotePtr {
pushBack.commit();
FileNode dereference = curPath.dereference(document);
FileNode lastChild = data.children.get(data.children.size() - 1);
- assert dereference.equals(lastChild); // is this correct? or should we be checking the pointer?
- Integer curPathOffset = curPath.nodeListPositions.get(curPath.nodeListPositions.size() - 1);
- curPath.nodeListPositions.set(curPath.nodeListPositions.size() - 1, curPathOffset + 1);
+ assert dereference.equals(lastChild); // is this correct? or should we be
+ // checking the pointer?
+ Integer curPathOffset =
+ curPath.nodeListPositions.get(curPath.nodeListPositions.size() - 1);
+ curPath.nodeListPositions.set(curPath.nodeListPositions.size() - 1,
+ curPathOffset + 1);
} finally {
pushBack.popBackIfNotCommitted();
}
@@ -362,17 +336,20 @@ class OneNotePtr {
next.cb = nextChunkRef.cb;
next.stp = nextChunkRef.stp;
if (terminated) {
- LOG.debug("{}Chunk terminator found NextChunkRef.cb={}, NextChunkRef.stp={}, Offset={}, End={}", getIndent(), nextChunkRef.cb
- , nextChunkRef.stp, offset, end);
+ LOG.debug("{}Chunk terminator found NextChunkRef.cb={}, NextChunkRef.stp={}," +
+ " Offset={}, End={}", getIndent(), nextChunkRef.cb, nextChunkRef.stp, offset,
+ end);
// TODO check that next is OK
}
long footer = deserializeLittleEndianLong();
if (footer != FOOTER_CONST) {
- throw new TikaException("Invalid footer constant. Expected " + FOOTER_CONST + " but was " + footer);
+ throw new TikaException(
+ "Invalid footer constant. Expected " + FOOTER_CONST + " but was " + footer);
}
}
- private FileNode deserializeFileNode(FileNode data, FileNodePtr curPath) throws IOException, TikaException {
+ private FileNode deserializeFileNode(FileNode data, FileNodePtr curPath)
+ throws IOException, TikaException {
OneNotePtr backup = new OneNotePtr(this);
long reserved;
@@ -383,7 +360,8 @@ class OneNotePtr {
if (data.id == 0) {
return data;
}
- LOG.debug("{}Start Node {} ({}) - Offset={}, End={}", getIndent(), FndStructureConstants.nameOf(data.id), data.id, offset, end);
+ LOG.debug("{}Start Node {} ({}) - Offset={}, End={}", getIndent(),
+ FndStructureConstants.nameOf(data.id), data.id, offset, end);
++indentLevel;
@@ -409,15 +387,16 @@ class OneNotePtr {
data.gosid = deserializeExtendedGUID();
} else if (data.id == FndStructureConstants.ObjectGroupEndFND) {
// no data
- } else if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND
- || data.id == FndStructureConstants.ObjectSpaceManifestListStartFND) {
+ } else if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND ||
+ data.id == FndStructureConstants.ObjectSpaceManifestListStartFND) {
if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND) {
data.idDesc = "gosidRoot";
} else {
data.idDesc = "gosid";
}
- // Specifies the identity of the object space being specified by this object space manifest list.
- // MUST match the ObjectSpaceManifestListReferenceFND.gosid field of the FileNode structure that referenced
+ // Specifies the identity of the object space being specified by this object
+ // space manifest list. MUST match the ObjectSpaceManifestListReferenceFND.gosid
+ // field of the FileNode structure that referenced
// this file node list.
data.gosid = deserializeExtendedGUID();
//LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
@@ -440,20 +419,22 @@ class OneNotePtr {
data.idDesc = "rid";
//LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
data.subType.revisionManifest.ridDependent = deserializeExtendedGUID(); // the rid
- LOG.debug("{}dependent gosid {}", getIndent(), data.subType.revisionManifest.ridDependent);
+ LOG.debug("{}dependent gosid {}", getIndent(),
+ data.subType.revisionManifest.ridDependent);
data.subType.revisionManifest.timeCreation = deserializeLittleEndianLong();
data.subType.revisionManifest.revisionRole = deserializeLittleEndianInt();
data.subType.revisionManifest.odcsDefault = deserializeLittleEndianShort();
data.gctxid = ExtendedGUID.nil();
document.registerRevisionManifest(data);
- } else if (data.id == FndStructureConstants.RevisionManifestStart6FND
- || data.id == FndStructureConstants.RevisionManifestStart7FND) {
+ } else if (data.id == FndStructureConstants.RevisionManifestStart6FND ||
+ data.id == FndStructureConstants.RevisionManifestStart7FND) {
data.gosid = deserializeExtendedGUID(); // the rid
data.idDesc = "rid";
//LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
data.subType.revisionManifest.ridDependent = deserializeExtendedGUID(); // the rid
- LOG.debug("{}dependent gosid {}", getIndent(), data.subType.revisionManifest.ridDependent);
+ LOG.debug("{}dependent gosid {}", getIndent(),
+ data.subType.revisionManifest.ridDependent);
data.subType.revisionManifest.revisionRole = deserializeLittleEndianInt();
data.subType.revisionManifest.odcsDefault = deserializeLittleEndianShort();
@@ -461,7 +442,8 @@ class OneNotePtr {
if (data.id == FndStructureConstants.RevisionManifestStart7FND) {
data.gctxid = deserializeExtendedGUID(); // the rid
}
- document.registerAdditionalRevisionRole(data.gosid, data.subType.revisionManifest.revisionRole, data.gctxid);
+ document.registerAdditionalRevisionRole(data.gosid,
+ data.subType.revisionManifest.revisionRole, data.gctxid);
document.registerRevisionManifest(data);
} else if (data.id == FndStructureConstants.GlobalIdTableStartFNDX) {
data.subType.globalIdTableStartFNDX.reserved = deserializeLittleEndianChar();
@@ -471,20 +453,23 @@ class OneNotePtr {
data.subType.globalIdTableEntryFNDX.guid = deserializeGUID();
- document.revisionMap.get(document.currentRevision).globalId.put(data.subType.globalIdTableEntryFNDX.index,
- data.subType.globalIdTableEntryFNDX.guid);
+ document.revisionMap.get(document.currentRevision).globalId.put(
+ data.subType.globalIdTableEntryFNDX.index,
+ data.subType.globalIdTableEntryFNDX.guid);
} else if (data.id == FndStructureConstants.GlobalIdTableEntry2FNDX) {
data.subType.globalIdTableEntry2FNDX.indexMapFrom = deserializeLittleEndianInt();
data.subType.globalIdTableEntry2FNDX.indexMapTo = deserializeLittleEndianInt();
ExtendedGUID dependentRevision =
- document.revisionMap.get(document.currentRevision).dependent;
+ document.revisionMap.get(document.currentRevision).dependent;
// Get the compactId from the revisionMap's globalId map.
- GUID compactId = document.revisionMap.get(dependentRevision).globalId.get(data.subType.globalIdTableEntry2FNDX.indexMapFrom);
+ GUID compactId = document.revisionMap.get(dependentRevision).globalId.get(
+ data.subType.globalIdTableEntry2FNDX.indexMapFrom);
if (compactId == null) {
throw new TikaException("COMPACT_ID_MISSING");
}
- document.revisionMap.get(document.currentRevision).globalId.put(data.subType.globalIdTableEntry2FNDX.indexMapTo, compactId);
+ document.revisionMap.get(document.currentRevision).globalId.put(
+ data.subType.globalIdTableEntry2FNDX.indexMapTo, compactId);
} else if (data.id == FndStructureConstants.GlobalIdTableEntry3FNDX) {
data.subType.globalIdTableEntry3FNDX.indexCopyFromStart = deserializeLittleEndianInt();
@@ -492,18 +477,20 @@ class OneNotePtr {
data.subType.globalIdTableEntry3FNDX.indexCopyToStart = deserializeLittleEndianInt();
- ExtendedGUID dependent_revision = document.revisionMap.get(document.currentRevision).dependent;
+ ExtendedGUID dependent_revision =
+ document.revisionMap.get(document.currentRevision).dependent;
for (int i = 0; i < data.subType.globalIdTableEntry3FNDX.entriesToCopy; ++i) {
Map<Long, GUID> globalIdMap = document.revisionMap.get(dependent_revision).globalId;
- GUID compactId = globalIdMap.get(data.subType.globalIdTableEntry3FNDX.indexCopyFromStart + i);
+ GUID compactId = globalIdMap.get(
+ data.subType.globalIdTableEntry3FNDX.indexCopyFromStart + i);
if (compactId == null) {
throw new TikaException("COMPACT_ID_MISSING");
}
- document.revisionMap.get(document.currentRevision).globalId.put(data.subType.globalIdTableEntry3FNDX.indexCopyToStart + i
- , compactId);
+ document.revisionMap.get(document.currentRevision).globalId.put(
+ data.subType.globalIdTableEntry3FNDX.indexCopyToStart + i, compactId);
}
- } else if (data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX
- || data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCount2FNDX) {
+ } else if (data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX ||
+ data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCount2FNDX) {
data.subType.objectRevisionWithRefCountFNDX.oid = deserializeCompactID(); // the oid
if (data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX) {
@@ -527,20 +514,22 @@ class OneNotePtr {
data.idDesc = "oidRoot";
data.gosid = data.subType.rootObjectReference.oidRoot.guid;
- data.subType.rootObjectReference.rootObjectReferenceBase.rootRole = deserializeLittleEndianInt();
+ data.subType.rootObjectReference.rootObjectReferenceBase.rootRole =
+ deserializeLittleEndianInt();
LOG.debug("{}Root role {}", getIndent(),
- data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
+ data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
} else if (data.id == FndStructureConstants.RootObjectReference3FND) {
data.idDesc = "oidRoot";
data.gosid = deserializeExtendedGUID();
- data.subType.rootObjectReference.rootObjectReferenceBase.rootRole = deserializeLittleEndianInt();
+ data.subType.rootObjectReference.rootObjectReferenceBase.rootRole =
+ deserializeLittleEndianInt();
LOG.debug("{}Root role {}", getIndent(),
- data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
- } else if (data.id == FndStructureConstants.RevisionRoleDeclarationFND
- || data.id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
+ data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
+ } else if (data.id == FndStructureConstants.RevisionRoleDeclarationFND ||
+ data.id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
data.gosid = deserializeExtendedGUID();
data.subType.revisionRoleDeclaration.revisionRole = deserializeLittleEndianInt();
@@ -550,15 +539,15 @@ class OneNotePtr {
}
document.registerAdditionalRevisionRole(data.gosid,
- data.subType.revisionRoleDeclaration.revisionRole,
- data.gctxid);
+ data.subType.revisionRoleDeclaration.revisionRole, data.gctxid);
// FIXME: deal with ObjectDataEncryptionKey
} else if (data.id == FndStructureConstants.ObjectInfoDependencyOverridesFND) {
OneNotePtr content = new OneNotePtr(this);
if (!data.ref.equals(FileChunkReference.nil())) {
content.reposition(data.ref); // otherwise it's positioned right at this node
}
- data.subType.objectInfoDependencyOverrides.data = content.deserializeObjectInfoDependencyOverrideData();
+ data.subType.objectInfoDependencyOverrides.data =
+ content.deserializeObjectInfoDependencyOverrideData();
} else if (data.id == FndStructureConstants.FileDataStoreListReferenceFND) {
// already processed this
} else if (data.id == FndStructureConstants.FileDataStoreObjectReferenceFND) {
@@ -570,33 +559,37 @@ class OneNotePtr {
OneNotePtr fileDataStorePtr = new OneNotePtr(this);
fileDataStorePtr.reposition(data.ref);
- data.subType.fileDataStoreObjectReference.ref = fileDataStorePtr.deserializeFileDataStoreObject();
-
- } else if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX
- || data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX
- || data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND
- || data.id == FndStructureConstants.CanRevise.ObjectDeclaration2LargeRefCountFND
- || data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND
- || data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
- data.subType.objectDeclarationWithRefCount.body.file_data_store_reference =
- false;
- if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX
- || data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX) {
- data.subType.objectDeclarationWithRefCount.body = deserializeObjectDeclarationWithRefCountBody();
+ data.subType.fileDataStoreObjectReference.ref =
+ fileDataStorePtr.deserializeFileDataStoreObject();
+
+ } else if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
+ data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX ||
+ data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND ||
+ data.id == FndStructureConstants.CanRevise.ObjectDeclaration2LargeRefCountFND ||
+ data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND ||
+ data.id ==
+ FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
+ data.subType.objectDeclarationWithRefCount.body.file_data_store_reference = false;
+ if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
+ data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX) {
+ data.subType.objectDeclarationWithRefCount.body =
+ deserializeObjectDeclarationWithRefCountBody();
} else { // one of the other 4 that use the ObjectDeclaration2Body
- data.subType.objectDeclarationWithRefCount.body = deserializeObjectDeclaration2Body();
+ data.subType.objectDeclarationWithRefCount.body =
+ deserializeObjectDeclaration2Body();
}
- if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX
- || data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND
- || data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND) {
- long refCnt = deserializeLittleEndianChar();
- data.subType.objectDeclarationWithRefCount.cRef = refCnt;
+ if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
+ data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND ||
+ data.id ==
+ FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND) {
+ data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianChar();
} else {
data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianInt();
}
- if (data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND
- || data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
+ if (data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND ||
+ data.id ==
+ FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
ByteBuffer md5Buffer = ByteBuffer.allocate(16);
deserializeBytes(md5Buffer);
data.subType.objectDeclarationWithRefCount.readOnly.md5 = md5Buffer.array();
@@ -605,9 +598,11 @@ class OneNotePtr {
postprocessObjectDeclarationContents(data, curPath);
LOG.debug("{}Ref Count JCID {}", getIndent(),
- data.subType.objectDeclarationWithRefCount.body.jcid);
- } else if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationFileData3RefCountFND
- || data.id == FndStructureConstants.CanRevise.ObjectDeclarationFileData3LargeRefCountFND) {
+ data.subType.objectDeclarationWithRefCount.body.jcid);
+ } else if (
+ data.id == FndStructureConstants.CanRevise.ObjectDeclarationFileData3RefCountFND ||
+ data.id ==
+ FndStructureConstants.CanRevise.ObjectDeclarationFileData3LargeRefCountFND) {
data.subType.objectDeclarationWithRefCount.body.oid = deserializeCompactID();
long jcid = deserializeLittleEndianInt();
@@ -624,22 +619,26 @@ class OneNotePtr {
long roomLeftLong = roomLeft();
if (cch > roomLeftLong) { // not a valid guid
- throw new TikaException("Data out of bounds - cch " + cch + " is > room left = " + roomLeftLong);
+ throw new TikaException(
+ "Data out of bounds - cch " + cch + " is > room left = " + roomLeftLong);
}
if (cch > dif.size()) {
- throw new TikaMemoryLimitException("CCH=" + cch + " was found that was greater" +
- " than file size " + dif.size());
+ throw new TikaMemoryLimitException(
+ "CCH=" + cch + " was found that was greater" + " than file size " +
+ dif.size());
}
ByteBuffer dataSpaceBuffer = ByteBuffer.allocate((int) cch * 2);
dif.read(dataSpaceBuffer);
byte[] dataSpaceBufferBytes = dataSpaceBuffer.array();
offset += dataSpaceBufferBytes.length;
if (dataSpaceBufferBytes.length == (IFNDF_GUID_LENGTH * 2 + IFNDF.length) &&
- Arrays.equals(IFNDF, Arrays.copyOfRange(dataSpaceBufferBytes, 0, IFNDF.length))) {
+ Arrays.equals(IFNDF,
+ Arrays.copyOfRange(dataSpaceBufferBytes, 0, IFNDF.length))) {
data.subType.objectDeclarationWithRefCount.body.file_data_store_reference = true;
- GUID guid = GUID.fromCurlyBraceUTF16Bytes(Arrays.copyOfRange(dataSpaceBufferBytes, IFNDF.length,
- dataSpaceBufferBytes.length));
+ GUID guid = GUID.fromCurlyBraceUTF16Bytes(
+ Arrays.copyOfRange(dataSpaceBufferBytes, IFNDF.length,
+ dataSpaceBufferBytes.length));
ExtendedGUID extendedGUID = new ExtendedGUID(guid, 0);
FileChunkReference fileChunk = document.getAssocGuidToRef(extendedGUID);
if (fileChunk == null) {
@@ -648,7 +647,8 @@ class OneNotePtr {
// TODO - call postprocessObjectDeclarationContents on this object?
}
} else {
- LOG.debug("{}Ignoring an external reference {}", getIndent(), new String(dataSpaceBufferBytes, StandardCharsets.UTF_16LE));
+ LOG.debug("{}Ignoring an external reference {}", getIndent(),
+ new String(dataSpaceBufferBytes, StandardCharsets.UTF_16LE));
}
} else if (data.id == FndStructureConstants.ObjectGroupListReferenceFND) {
data.idDesc = "object_group_id";
@@ -670,7 +670,9 @@ class OneNotePtr {
Revision currentRevision = document.revisionMap.get(document.currentRevision);
currentRevision.manifestList.add(curPath);
} else {
- LOG.debug("No fnd needed to be parsed for data.id=0x" + Long.toHexString(data.id) + " (" + FndStructureConstants.nameOf(data.id) + ")");
+ LOG.debug(
+ "No fnd needed to be parsed for data.id=0x" + Long.toHexString(data.id) + " (" +
+ FndStructureConstants.nameOf(data.id) + ")");
}
if (data.baseType == 2) {
// Generic baseType == 2 parser - means we have children to parse.
@@ -699,12 +701,12 @@ class OneNotePtr {
}
--indentLevel;
if (data.gosid.equals(ExtendedGUID.nil())) {
- LOG.debug("{}End Node {} ({}) - Offset={}, End={}", getIndent(), FndStructureConstants.nameOf(data.id), (int) data.id, offset
- , end);
+ LOG.debug("{}End Node {} ({}) - Offset={}, End={}", getIndent(),
+ FndStructureConstants.nameOf(data.id), (int) data.id, offset, end);
} else {
- LOG.debug("{}End Node {} ({}) {}:[{}] - Offset={}, End={}", getIndent(), FndStructureConstants.nameOf(data.id), (int) data.id
- , data.idDesc,
- data.gosid, offset, end);
+ LOG.debug("{}End Node {} ({}) {}:[{}] - Offset={}, End={}", getIndent(),
+ FndStructureConstants.nameOf(data.id), (int) data.id, data.idDesc, data.gosid,
+ offset, end);
}
return data;
}
@@ -717,7 +719,8 @@ class OneNotePtr {
offset = dif.position();
}
- private ObjectDeclarationWithRefCountBody deserializeObjectDeclarationWithRefCountBody() throws IOException, TikaException {
+ private ObjectDeclarationWithRefCountBody deserializeObjectDeclarationWithRefCountBody()
+ throws IOException, TikaException {
ObjectDeclarationWithRefCountBody data = new ObjectDeclarationWithRefCountBody();
data.oid = deserializeCompactID();
long jci_odcs_etc = deserializeLittleEndianInt();
@@ -738,7 +741,8 @@ class OneNotePtr {
return data;
}
- private ObjectDeclarationWithRefCountBody deserializeObjectDeclaration2Body() throws IOException, TikaException {
+ private ObjectDeclarationWithRefCountBody deserializeObjectDeclaration2Body()
+ throws IOException, TikaException {
ObjectDeclarationWithRefCountBody data = new ObjectDeclarationWithRefCountBody();
data.oid = deserializeCompactID();
long jcid = deserializeLittleEndianInt();
@@ -775,7 +779,8 @@ class OneNotePtr {
data.fileData.cb = len;
offset += len;
while ((offset & 0x7) > 0) {
- // Padding is added to the end of the FileData stream to ensure that it ends on an 8-byte boundary.
+ // Padding is added to the end of the FileData stream to ensure that it
+ // ends on an 8-byte boundary.
++offset;
}
GUID footer = deserializeGUID();
@@ -786,8 +791,10 @@ class OneNotePtr {
return data;
}
- private ObjectInfoDependencyOverrideData deserializeObjectInfoDependencyOverrideData() throws IOException {
- ObjectInfoDependencyOverrideData objectInfoDependencyOverrideData = new ObjectInfoDependencyOverrideData();
+ private ObjectInfoDependencyOverrideData deserializeObjectInfoDependencyOverrideData()
+ throws IOException {
+ ObjectInfoDependencyOverrideData objectInfoDependencyOverrideData =
+ new ObjectInfoDependencyOverrideData();
long num_8bit_overrides = deserializeLittleEndianInt();
long num_32bit_overrides = deserializeLittleEndianInt();
long crc = deserializeLittleEndianInt();
@@ -837,33 +844,42 @@ class OneNotePtr {
* Depending on stpFormat and cbFormat, will deserialize a FileChunkReference.
*
* @param stpFormat An unsigned integer that specifies the size and format of the
- * FileNodeChunkReference.stp field specified by the fnd field if this FileNode structure has a
- * value of the BaseType field equal to 1 or 2. MUST be ignored if the value of the BaseType field
- * of this FileNode structure is equal to 0. The meaning of the StpFormat field is given by the
+ * FileNodeChunkReference.stp field specified by the fnd field if this
+ * FileNode structure has a
+ * value of the BaseType field equal to 1 or 2. MUST be ignored if the
+ * value of the BaseType field
+ * of this FileNode structure is equal to 0. The meaning of the StpFormat
+ * field is given by the
* following table.
* Value Meaning
* 0 8 bytes, uncompressed.
* 1 4 bytes, uncompressed.
* 2 2 bytes, compressed.
* 3 4 bytes, compressed.
- * The value of an uncompressed file pointer specifies a location in the file. To uncompress a
+ * The value of an uncompressed file pointer specifies a location in the
+ * file. To uncompress a
* compressed file pointer, multiply the value by 8.
* @param cbFormat An unsigned integer that specifies the size and format of the
- * FileNodeChunkReference.cb field specified by the fnd field if this FileNode structure has a
- * BaseType field value equal to 1 or 2. MUST be 0 and MUST be ignored if BaseType of this
- * FileNode structure is equal to 0. The meaning of CbFormat is given by the following table.
+ * FileNodeChunkReference.cb field specified by the fnd field if this
+ * FileNode structure has a
+ * BaseType field value equal to 1 or 2. MUST be 0 and MUST be ignored if
+ * BaseType of this
+ * FileNode structure is equal to 0. The meaning of CbFormat is given by
+ * the following table.
* Value Meaning
* 0 4 bytes, uncompressed.
* 1 8 bytes, uncompressed.
* 2 1 byte, compressed.
* 3 2 bytes, compressed.
- * The value of an uncompressed byte count specifies the size, in bytes, of the data referenced by a
+ * The value of an uncompressed byte count specifies the size, in bytes, of
+ * the data referenced by a
* FileNodeChunkReference structure. To uncompress a compressed byte count,
* multiply the value by 8.
* @return
* @throws IOException
*/
- FileChunkReference deserializeVarFileChunkReference(long stpFormat, long cbFormat) throws IOException, TikaException {
+ FileChunkReference deserializeVarFileChunkReference(long stpFormat, long cbFormat)
+ throws IOException, TikaException {
FileChunkReference data = new FileChunkReference(0, 0);
long local8;
long local16;
@@ -914,13 +930,14 @@ class OneNotePtr {
return data;
}
- FileNodeListHeader deserializeFileNodeListHeader() throws IOException {
+ FileNodeListHeader deserializeFileNodeListHeader() throws TikaException, IOException {
long positionOfThisHeader = offset;
long uintMagic = deserializeLittleEndianLong();
long fileNodeListId = deserializeLittleEndianInt();
long nFragmentSequence = deserializeLittleEndianInt();
- return new FileNodeListHeader(positionOfThisHeader, uintMagic, fileNodeListId, nFragmentSequence);
+ return new FileNodeListHeader(positionOfThisHeader, uintMagic, fileNodeListId,
+ nFragmentSequence);
}
/**
@@ -931,16 +948,18 @@ class OneNotePtr {
* @param curPtr The current pointer.
* @throws IOException
*/
- private void postprocessObjectDeclarationContents(FileNode data, FileNodePtr curPtr) throws IOException, TikaException {
+ private void postprocessObjectDeclarationContents(FileNode data, FileNodePtr curPtr)
+ throws IOException, TikaException {
data.gosid = data.subType.objectDeclarationWithRefCount.body.oid.guid;
document.guidToObject.put(data.gosid, new FileNodePtr(curPtr));
if (data.subType.objectDeclarationWithRefCount.body.jcid.isObjectSpaceObjectPropSet()) {
OneNotePtr objectSpacePropSetPtr = new OneNotePtr(this);
objectSpacePropSetPtr.reposition(data.ref);
- data.subType.objectDeclarationWithRefCount.objectRef = objectSpacePropSetPtr.deserializeObjectSpaceObjectPropSet();
+ data.subType.objectDeclarationWithRefCount.objectRef =
+ objectSpacePropSetPtr.deserializeObjectSpaceObjectPropSet();
ObjectStreamCounters streamCounters = new ObjectStreamCounters();
data.propertySet = objectSpacePropSetPtr.deserializePropertySet(streamCounters,
- data.subType.objectDeclarationWithRefCount.objectRef);
+ data.subType.objectDeclarationWithRefCount.objectRef);
} else {
if (!data.subType.objectDeclarationWithRefCount.body.jcid.isFileData) {
throw new TikaException("JCID must be file data when !isObjectSpaceObjectPropSet.");
@@ -957,29 +976,32 @@ class OneNotePtr {
}
}
- private PropertySet deserializePropertySet(ObjectStreamCounters counters, ObjectSpaceObjectPropSet streams) throws IOException,
- TikaException {
+ private PropertySet deserializePropertySet(ObjectStreamCounters counters,
+ ObjectSpaceObjectPropSet streams)
+ throws IOException, TikaException {
PropertySet data = new PropertySet();
long count = deserializeLittleEndianShort();
- data.rgPridsData = Stream.generate(PropertyValue::new)
- .limit((int) count)
- .collect(Collectors.toList());
+ data.rgPridsData =
+ Stream.generate(PropertyValue::new).limit((int) count).collect(Collectors.toList());
for (int i = 0; i < count; ++i) {
data.rgPridsData.get(i).propertyId = deserializePropertyID();
LOG.debug("{}Property {}", getIndent(), data.rgPridsData.get(i).propertyId);
}
LOG.debug("{}{} elements in property set:", getIndent(), count);
for (int i = 0; i < count; ++i) {
- data.rgPridsData.set(i, deserializePropertyValueFromPropertyID(
- data.rgPridsData.get(i).propertyId, streams, counters));
+ data.rgPridsData.set(i,
+ deserializePropertyValueFromPropertyID(data.rgPridsData.get(i).propertyId,
+ streams, counters));
}
LOG.debug("");
return data;
}
- private PropertyValue deserializePropertyValueFromPropertyID(OneNotePropertyId propertyID, ObjectSpaceObjectPropSet streams,
- ObjectStreamCounters counters) throws IOException, TikaException {
+ private PropertyValue deserializePropertyValueFromPropertyID(OneNotePropertyId propertyID,
+ ObjectSpaceObjectPropSet streams,
+ ObjectStreamCounters counters)
+ throws IOException, TikaException {
PropertyValue data = new PropertyValue();
data.propertyId = propertyID;
char val8;
@@ -1022,20 +1044,25 @@ class OneNotePtr {
LOG.debug(" PropertyID long({})", data.scalar);
break;
case 0x7:
- // If the value of the PropertyID.type element is "0x7" and the property specifies an array of elements, the value of
+ // If the value of the PropertyID.type element is "0x7" and the property
+ // specifies an array of elements, the value of
// the
- // prtFourBytesOfLengthFollowedByData.cb element MUST be the sum of the sizes, in bytes, of each element in the array.
+ // prtFourBytesOfLengthFollowedByData.cb element MUST be the sum of the
+ // sizes, in bytes, of each element in the array.
// Exceptions include:
- // * The RgOutlineIndentDistance element, where the value of the prtFourBytesOfLengthFollowedByData.cb element
+ // * The RgOutlineIndentDistance element, where the value of the
+ // prtFourBytesOfLengthFollowedByData.cb element
// MUST be: 4 + (4 × RgOutlineIndentDistance.count).
- // * The TableColumnsLocked element, where the value of the prtFourBytesOfLengthFollowedByData.cb
+ // * The TableColumnsLocked element, where the value of the
+ // prtFourBytesOfLengthFollowedByData.cb
// element MUST be: 1 + (TableColumnsLocked.cColumns + 7) / 8.
- // * The TableColumnWidths element, where the value of the prtFourBytesOfLengthFollowedByData.cb
+ // * The TableColumnWidths element, where the value of the
+ // prtFourBytesOfLengthFollowedByData.cb
// element MUST be: 1 + (4 × TableColumnWidths.cColumns).
val32 = deserializeLittleEndianInt();
LOG.debug(" raw data: ({})[", val32);
- {
+
data.rawData.stp = offset;
data.rawData.cb = 0;
if (offset + val32 > end) {
@@ -1050,9 +1077,9 @@ class OneNotePtr {
content.reposition(data.rawData);
content.dumpHex();
}
- }
- LOG.debug("]");
- break;
+
+ LOG.debug("]");
+ break;
case 0x9:
case 0xb:
case 0xd:
@@ -1061,11 +1088,9 @@ class OneNotePtr {
case 0x8:
case 0xa:
case 0xc:
- if (type == 0x8 || type == 0xa
- || type == 0xc) {
+ if (type == 0x8 || type == 0xa || type == 0xc) {
val32 = 1;
}
- {
List<CompactID> stream = streams.contextIDs.data;
String xtype = "contextID";
long s_count = counters.context_ids_count;
@@ -1084,30 +1109,29 @@ class OneNotePtr {
if (index < stream.size()) {
data.compactIDs.add(stream.get(index));
LOG.debug(" {}[{}]", xtype,
- data.compactIDs.get(data.compactIDs.size() - 1));
+ data.compactIDs.get(data.compactIDs.size() - 1));
} else {
throw new TikaException("SEGV");
}
}
- }
- break;
+ break;
case 0x10:
val32 = deserializeLittleEndianInt();
- {
OneNotePropertyId propId = deserializePropertyID();
LOG.debug(" UnifiedSubPropertySet {} {}", val32, propId);
- data.propertySet.rgPridsData = Stream.generate(PropertyValue::new)
- .limit((int) val32)
- .collect(Collectors.toList());
+ data.propertySet.rgPridsData =
+ Stream.generate(PropertyValue::new).limit((int) val32)
+ .collect(Collectors.toList());
for (int i = 0; i < val32; ++i) {
try {
- data.propertySet.rgPridsData.set(i, deserializePropertyValueFromPropertyID(propId, streams, counters));
+ data.propertySet.rgPridsData.set(i,
+ deserializePropertyValueFromPropertyID(propId, streams,
+ counters));
} catch (IOException e) {
return data;
}
}
- }
- break;
+ break;
case 0x11:
LOG.debug(" SubPropertySet");
data.propertySet = deserializePropertySet(counters, streams);
@@ -1122,12 +1146,13 @@ class OneNotePtr {
}
}
- private OneNotePropertyId deserializePropertyID() throws IOException {
+ private OneNotePropertyId deserializePropertyID() throws TikaException, IOException {
long pid = deserializeLittleEndianInt();
return new OneNotePropertyId(pid);
}
- private ObjectSpaceObjectPropSet deserializeObjectSpaceObjectPropSet() throws IOException, TikaException {
+ private ObjectSpaceObjectPropSet deserializeObjectSpaceObjectPropSet()
+ throws IOException, TikaException {
ObjectSpaceObjectPropSet data = new ObjectSpaceObjectPropSet();
data.osids.extendedStreamsPresent = 0;
data.osids.osidsStreamNotPresent = 1;
@@ -1147,19 +1172,18 @@ class OneNotePtr {
return data;
}
- private ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs deserializeObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs() throws IOException
- , TikaException {
- ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs data = new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
+ private ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs deserializeObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs()
+ throws IOException, TikaException {
+ ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs data =
+ new ObjectSpaceObjectStreamOfOIDsOSIDsOrContextIDs();
long header = deserializeLittleEndianInt();
data.count = header & 0xffffff;
data.osidsStreamNotPresent = ((header >> 31) & 0x1);
data.extendedStreamsPresent = ((header >> 30) & 0x1);
if (LOG.isDebugEnabled()) {
- LOG.debug(
- "{}Deserialized Stream Header count: {} OsidsNotPresent {} Extended {}",
- getIndent(), data.count,
- data.osidsStreamNotPresent,
- data.extendedStreamsPresent);
+ LOG.debug("{}Deserialized Stream Header count: {} OsidsNotPresent {} Extended {}",
+ getIndent(), data.count, data.osidsStreamNotPresent,
+ data.extendedStreamsPresent);
}
for (int i = 0; i < data.count; ++i) {
CompactID cid;
@@ -1175,8 +1199,9 @@ class OneNotePtr {
public void dumpHex() throws TikaMemoryLimitException, IOException {
if (end - offset > dif.size()) {
- throw new TikaMemoryLimitException("Exceeded memory limit when trying to dumpHex - " +
- "" + (end - offset) + " > " + dif.size());
+ throw new TikaMemoryLimitException(
+ "Exceeded memory limit when trying to dumpHex - " + "" + (end - offset) +
+ " > " + dif.size());
}
ByteBuffer byteBuffer = ByteBuffer.allocate((int) (end - offset));
LOG.debug(Hex.encodeHexString(byteBuffer.array()));
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
index 5553bf0..2190b1f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
@@ -14,21 +14,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.tuple.Pair;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.exception.TikaMemoryLimitException;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.EmbeddedDocumentUtil;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.EmbeddedContentHandler;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.AttributesImpl;
+package org.apache.tika.parser.microsoft.onenote;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -47,6 +34,20 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.EmbeddedContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
/**
* Walk the one note tree and create a Map while it goes.
* Also writes user input text to a print writer as it parses.
@@ -54,39 +55,43 @@ import java.util.regex.Pattern;
class OneNoteTreeWalker {
private static final String P = "p";
- private static Pattern HYPERLINK_PATTERN = Pattern.compile("\uFDDFHYPERLINK\\s+\"([^\"]+)\"([^\"]+)$");
-
/**
* See spec MS-ONE - 2.3.1 - TIME32 - epoch of jan 1 1980 UTC.
- * So we create this offset used to calculate number of seconds between this and the Instant.EPOCH.
+ * So we create this offset used to calculate number of seconds between this and the Instant
+ * .EPOCH.
*/
private static final long TIME32_EPOCH_DIFF_1980;
+ /**
+ * See spec MS-DTYP - 2.3.3 - DATETIME dates are based on epoch of jan 1 1601 UTC.
+ * So we create this offset used to calculate number of seconds between this and the Instant
+ * .EPOCH.
+ */
+ private static final long DATETIME_EPOCH_DIFF_1601;
+ private static final Pattern HYPERLINK_PATTERN =
+ Pattern.compile("\uFDDFHYPERLINK\\s+\"([^\"]+)\"([^\"]+)$");
+
static {
LocalDateTime time32Epoch1980 = LocalDateTime.of(1980, Month.JANUARY, 1, 0, 0);
Instant instant = time32Epoch1980.atZone(ZoneOffset.UTC).toInstant();
TIME32_EPOCH_DIFF_1980 = (instant.toEpochMilli() - Instant.EPOCH.toEpochMilli()) / 1000;
}
- /**
- * See spec MS-DTYP - 2.3.3 - DATETIME dates are based on epoch of jan 1 1601 UTC.
- * So we create this offset used to calculate number of seconds between this and the Instant.EPOCH.
- */
- private static final long DATETIME_EPOCH_DIFF_1601;
+
static {
LocalDateTime time32Epoch1601 = LocalDateTime.of(1601, Month.JANUARY, 1, 0, 0);
Instant instant = time32Epoch1601.atZone(ZoneOffset.UTC).toInstant();
DATETIME_EPOCH_DIFF_1601 = (instant.toEpochMilli() - Instant.EPOCH.toEpochMilli()) / 1000;
}
- private OneNoteTreeWalkerOptions options;
- private OneNoteDocument oneNoteDocument;
- private OneNoteDirectFileResource dif;
- private XHTMLContentHandler xhtml;
- private Pair<Long, ExtendedGUID> roleAndContext;
private final Metadata parentMetadata;
private final EmbeddedDocumentExtractor embeddedDocumentExtractor;
private final Set<String> authors = new HashSet<>();
private final Set<String> mostRecentAuthors = new HashSet<>();
private final Set<String> originalAuthors = new HashSet<>();
+ private final OneNoteTreeWalkerOptions options;
+ private final OneNoteDocument oneNoteDocument;
+ private final OneNoteDirectFileResource dif;
+ private final XHTMLContentHandler xhtml;
+ private final Pair<Long, ExtendedGUID> roleAndContext;
private Instant lastModifiedTimestamp = Instant.MIN;
private long creationTimestamp = Long.MAX_VALUE;
private long lastModified = Long.MIN_VALUE;
@@ -98,21 +103,25 @@ class OneNoteTreeWalker {
*
* @param options The options for how to walk this tree.
* @param oneNoteDocument The one note document we want to walk.
- * @param dif The random file access structure we read and reposition while extracting the content.
+ * @param dif The rando file access structure we read and reposition while
+ * extracting the content.
* @param xhtml The XHTMLContentHandler to populate as you walk the tree.
- * @param roleAndContext The role and context value we want to use when crawling. Set this to null if you are
+ * @param roleAndContext The role nd context value we want to use when crawling. Set this
+ * to null if you are
* crawling all root file nodes, and don't care about revisions.
*/
public OneNoteTreeWalker(OneNoteTreeWalkerOptions options, OneNoteDocument oneNoteDocument,
OneNoteDirectFileResource dif, XHTMLContentHandler xhtml,
- Metadata parentMetadata, ParseContext parseContext, Pair<Long, ExtendedGUID> roleAndContext) {
+ Metadata parentMetadata, ParseContext parseContext,
+ Pair<Long, ExtendedGUID> roleAndContext) {
this.options = options;
this.oneNoteDocument = oneNoteDocument;
this.dif = dif;
this.roleAndContext = roleAndContext;
this.xhtml = xhtml;
this.parentMetadata = parentMetadata;
- this.embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
+ this.embeddedDocumentExtractor =
+ EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
}
/**
@@ -129,12 +138,14 @@ class OneNoteTreeWalker {
}
/**
- * Walk the root file nodes, depending on the options will crawl revisions or the entire revision tree.
+ * Walk the root file nodes, depending on the options will crawl revisions or the entire
+ * revision tree.
*
* @return List of the root file nodes.
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
- public List<Map<String, Object>> walkRootFileNodes() throws IOException, TikaException, SAXException {
+ public List<Map<String, Object>> walkRootFileNodes()
+ throws IOException, TikaException, SAXException {
List<Map<String, Object>> res = new ArrayList<>();
if (options.isCrawlAllFileNodesFromRoot()) {
res.add(walkFileNodeList(oneNoteDocument.root));
@@ -143,7 +154,8 @@ class OneNoteTreeWalker {
Map<String, Object> structure = new HashMap<>();
structure.put("oneNoteType", "Revision");
structure.put("revisionListGuid", revisionListGuid.toString());
- FileNodePtr fileNodePtr = oneNoteDocument.revisionManifestLists.get(revisionListGuid);
+ FileNodePtr fileNodePtr =
+ oneNoteDocument.revisionManifestLists.get(revisionListGuid);
structure.put("fileNode", walkRevision(fileNodePtr));
res.add(structure);
}
@@ -170,7 +182,8 @@ class OneNoteTreeWalker {
* @return A map of the parsed data.
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
- private Map<String, Object> walkRevision(FileNodePtr fileNodePtr) throws IOException, TikaException, SAXException {
+ private Map<String, Object> walkRevision(FileNodePtr fileNodePtr)
+ throws IOException, TikaException, SAXException {
Map<String, Object> structure = new HashMap<>();
structure.put("oneNoteType", "FileNodePointer");
structure.put("offsets", fileNodePtr.nodeListPositions);
@@ -197,22 +210,24 @@ class OneNoteTreeWalker {
boolean okGroup = false;
for (FileNode child : revisionFileNode.childFileNodeList.children) {
if (child.id == FndStructureConstants.RevisionManifestStart4FND ||
- child.id == FndStructureConstants.RevisionManifestStart6FND ||
- child.id == FndStructureConstants.RevisionManifestStart7FND) {
+ child.id == FndStructureConstants.RevisionManifestStart6FND ||
+ child.id == FndStructureConstants.RevisionManifestStart7FND) {
okGroup = validRevisions.contains(child.gosid);
}
if (okGroup) {
if ((child.id == FndStructureConstants.RootObjectReference2FNDX ||
- child.id == FndStructureConstants.RootObjectReference3FND) &&
- child.subType.rootObjectReference.rootObjectReferenceBase.rootRole == 1) {
- FileNodePtr childFileNodePointer = oneNoteDocument.guidToObject.get(child.gosid);
+ child.id == FndStructureConstants.RootObjectReference3FND) &&
+ child.subType.rootObjectReference.rootObjectReferenceBase.rootRole == 1) {
+ FileNodePtr childFileNodePointer =
+ oneNoteDocument.guidToObject.get(child.gosid);
children.add(walkFileNodePtr(childFileNodePointer));
}
}
}
if (!children.isEmpty()) {
Map<String, Object> childFileNodeListMap = new HashMap<>();
- childFileNodeListMap.put("fileNodeListHeader", revisionFileNode.childFileNodeList.fileNodeListHeader);
+ childFileNodeListMap.put("fileNodeListHeader",
+ revisionFileNode.childFileNodeList.fileNodeListHeader);
childFileNodeListMap.put("children", children);
structure.put("revisionFileNodeList", childFileNodeListMap);
}
@@ -226,7 +241,8 @@ class OneNoteTreeWalker {
* @return Returns a map of the main data.
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
- public Map<String, Object> walkFileNodePtr(FileNodePtr fileNodePtr) throws IOException, TikaException, SAXException {
+ public Map<String, Object> walkFileNodePtr(FileNodePtr fileNodePtr)
+ throws IOException, TikaException, SAXException {
if (fileNodePtr != null) {
FileNode fileNode = fileNodePtr.dereference(oneNoteDocument);
return walkFileNode(fileNode);
@@ -241,7 +257,8 @@ class OneNoteTreeWalker {
* @return The result.
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
- public Map<String, Object> walkFileNodeList(FileNodeList fileNodeList) throws IOException, TikaException, SAXException {
+ public Map<String, Object> walkFileNodeList(FileNodeList fileNodeList)
+ throws IOException, TikaException, SAXException {
Map<String, Object> structure = new HashMap<>();
structure.put("oneNoteType", "FileNodeList");
structure.put("fileNodeListHeader", fileNodeList.fileNodeListHeader);
@@ -262,7 +279,8 @@ class OneNoteTreeWalker {
* @return Map which is result of the parsed file node.
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
- public Map<String, Object> walkFileNode(FileNode fileNode) throws IOException, TikaException, SAXException {
+ public Map<String, Object> walkFileNode(FileNode fileNode)
+ throws IOException, TikaException, SAXException {
Map<String, Object> structure = new HashMap<>();
structure.put("oneNoteType", "FileNode");
structure.put("gosid", fileNode.gosid.toString());
@@ -272,7 +290,8 @@ class OneNoteTreeWalker {
structure.put("fileNodeBaseType", "0x" + Long.toHexString(fileNode.baseType));
structure.put("isFileData", fileNode.isFileData);
structure.put("idDesc", fileNode.idDesc);
- if (fileNode.childFileNodeList != null && fileNode.childFileNodeList.fileNodeListHeader != null) {
+ if (fileNode.childFileNodeList != null &&
+ fileNode.childFileNodeList.fileNodeListHeader != null) {
structure.put("childFileNodeList", walkFileNodeList(fileNode.childFileNodeList));
}
if (fileNode.propertySet != null) {
@@ -281,10 +300,10 @@ class OneNoteTreeWalker {
structure.put("propertySet", propSet);
}
}
- if (fileNode.subType.fileDataStoreObjectReference.ref != null &&
- !FileChunkReference.nil().equals(fileNode.subType.fileDataStoreObjectReference.ref.fileData)) {
- structure.put("fileDataStoreObjectReference",
- walkFileDataStoreObjectReference(fileNode.subType.fileDataStoreObjectReference));
+ if (fileNode.subType.fileDataStoreObjectReference.ref != null && !FileChunkReference.nil()
+ .equals(fileNode.subType.fileDataStoreObjectReference.ref.fileData)) {
+ structure.put("fileDataStoreObjectReference", walkFileDataStoreObjectReference(
+ fileNode.subType.fileDataStoreObjectReference));
}
return structure;
}
@@ -297,21 +316,22 @@ class OneNoteTreeWalker {
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
private Map<String, Object> walkFileDataStoreObjectReference(
- FileDataStoreObjectReference fileDataStoreObjectReference) throws IOException, SAXException, TikaException {
+ FileDataStoreObjectReference fileDataStoreObjectReference)
+ throws IOException, SAXException, TikaException {
Map<String, Object> structure = new HashMap<>();
OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
content.reposition(fileDataStoreObjectReference.ref.fileData);
if (fileDataStoreObjectReference.ref.fileData.cb > dif.size()) {
- throw new TikaMemoryLimitException("File data store cb " +
- fileDataStoreObjectReference.ref.fileData.cb +
- " exceeds document size: " + dif.size());
+ throw new TikaMemoryLimitException(
+ "File data store cb " + fileDataStoreObjectReference.ref.fileData.cb +
+ " exceeds document size: " + dif.size());
}
- handleEmbedded((int)fileDataStoreObjectReference.ref.fileData.cb);
+ handleEmbedded((int) fileDataStoreObjectReference.ref.fileData.cb);
structure.put("fileDataStoreObjectMetadata", fileDataStoreObjectReference);
return structure;
}
- private void handleEmbedded(int length) throws TikaException, IOException, SAXException {
+ private void handleEmbedded(int length) throws TikaException, IOException, SAXException {
TikaInputStream stream = null;
ByteBuffer buf = null;
try {
@@ -325,9 +345,7 @@ class OneNoteTreeWalker {
Metadata embeddedMetadata = new Metadata();
try {
stream = TikaInputStream.get(buf.array());
- embeddedDocumentExtractor.parseEmbedded(
- stream,
- new EmbeddedContentHandler(xhtml),
+ embeddedDocumentExtractor.parseEmbedded(stream, new EmbeddedContentHandler(xhtml),
embeddedMetadata, false);
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
@@ -344,8 +362,8 @@ class OneNoteTreeWalker {
* @return
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
- private List<Map<String, Object>> processPropertySet(PropertySet propertySet) throws IOException, TikaException,
- SAXException {
+ private List<Map<String, Object>> processPropertySet(PropertySet propertySet)
+ throws IOException, TikaException, SAXException {
List<Map<String, Object>> propValues = new ArrayList<>();
for (PropertyValue propertyValue : propertySet.rgPridsData) {
propValues.add(processPropertyValue(propertyValue));
@@ -361,21 +379,22 @@ class OneNoteTreeWalker {
*/
private boolean propertyIsBinary(OneNotePropertyEnum property) {
return property == OneNotePropertyEnum.RgOutlineIndentDistance ||
- property == OneNotePropertyEnum.NotebookManagementEntityGuid ||
- property == OneNotePropertyEnum.RichEditTextUnicode;
+ property == OneNotePropertyEnum.NotebookManagementEntityGuid ||
+ property == OneNotePropertyEnum.RichEditTextUnicode;
}
/**
* Process a property value and populate a map containing all the property value data.
* <p>
- * Parse out any relevant text and write it to the print writer as well for easy search engine parsing.
+ * Parse out any relevant text and write it to the print writer as well for easy search
+ * engine parsing.
*
* @param propertyValue The property value we are parsing.
* @return The map parsed by this property value.
* @throws IOException Can throw these when manipulating the seekable byte channel.
*/
- private Map<String, Object> processPropertyValue(PropertyValue propertyValue) throws IOException, TikaException,
- SAXException {
+ private Map<String, Object> processPropertyValue(PropertyValue propertyValue)
+ throws IOException, TikaException, SAXException {
Map<String, Object> propMap = new HashMap<>();
propMap.put("oneNoteType", "PropertyValue");
propMap.put("propertyId", propertyValue.propertyId.toString());
@@ -387,13 +406,15 @@ class OneNoteTreeWalker {
lastModifiedTimestamp = instant;
}
} else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.CreationTimeStamp) {
- // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not 1970
+ // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
+ // 1970
long creationTs = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
if (creationTs < creationTimestamp) {
creationTimestamp = creationTs;
}
} else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.LastModifiedTime) {
- // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not 1970
+ // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
+ // 1970
long lastMod = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
if (lastMod > lastModified) {
lastModified = lastMod;
@@ -423,12 +444,12 @@ class OneNoteTreeWalker {
content.reposition(propertyValue.rawData);
boolean isBinary = propertyIsBinary(propertyValue.propertyId.propertyEnum);
propMap.put("isBinary", isBinary);
- if ((content.size() & 1) == 0
- && propertyValue.propertyId.propertyEnum != OneNotePropertyEnum.TextExtendedAscii
- && isBinary == false) {
+ if ((content.size() & 1) == 0 && propertyValue.propertyId.propertyEnum !=
+ OneNotePropertyEnum.TextExtendedAscii && !isBinary) {
if (content.size() > dif.size()) {
- throw new TikaMemoryLimitException("File data store cb " + content.size() +
- " exceeds document size: " + dif.size());
+ throw new TikaMemoryLimitException(
+ "File data store cb " + content.size() + " exceeds document size: " +
+ dif.size());
}
ByteBuffer buf = ByteBuffer.allocate(content.size());
dif.read(buf);
@@ -438,21 +459,24 @@ class OneNoteTreeWalker {
xhtml.characters((String) propMap.get("dataUnicode16LE"));
xhtml.endElement(P);
}
- } else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.TextExtendedAscii) {
+ } else if (propertyValue.propertyId.propertyEnum ==
+ OneNotePropertyEnum.TextExtendedAscii) {
if (content.size() > dif.size()) {
- throw new TikaMemoryLimitException("File data store cb " + content.size() +
- " exceeds document size: " + dif.size());
+ throw new TikaMemoryLimitException(
+ "File data store cb " + content.size() + " exceeds document size: " +
+ dif.size());
}
ByteBuffer buf = ByteBuffer.allocate(content.size());
dif.read(buf);
- propMap.put("dataAscii", new String(buf.array(), StandardCharsets.ISO_8859_1));
+ propMap.put("dataAscii", new String(buf.array(), StandardCharsets.US_ASCII));
xhtml.startElement(P);
xhtml.characters((String) propMap.get("dataAscii"));
xhtml.endElement(P);
- } else if (isBinary == false) {
+ } else if (!isBinary) {
if (content.size() > dif.size()) {
- throw new TikaMemoryLimitException("File data store cb " + content.size() +
- " exceeds document size: " + dif.size());
+ throw new TikaMemoryLimitException(
+ "File data store cb " + content.size() + " exceeds document size: " +
+ dif.size());
}
ByteBuffer buf = ByteBuffer.allocate(content.size());
dif.read(buf);
@@ -464,10 +488,12 @@ class OneNoteTreeWalker {
}
} else {
if (content.size() > dif.size()) {
- throw new TikaMemoryLimitException("File data store cb " + content.size() +
- " exceeds document size: " + dif.size());
+ throw new TikaMemoryLimitException(
+ "File data store cb " + content.size() + " exceeds document size: " +
+ dif.size());
}
- if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.RichEditTextUnicode) {
+ if (propertyValue.propertyId.propertyEnum ==
+ OneNotePropertyEnum.RichEditTextUnicode) {
handleRichEditTextUnicode(content.size());
} else {
//TODO -- these seem to be somewhat broken font files and other
@@ -497,31 +523,35 @@ class OneNoteTreeWalker {
/**
* returns a UTF-16LE author string.
+ *
* @param propertyValue The property value of an author.
* @return Resulting author string in UTF-16LE format.
*/
- private String getAuthor(PropertyValue propertyValue) throws IOException, TikaMemoryLimitException {
+ private String getAuthor(PropertyValue propertyValue)
+ throws IOException, TikaMemoryLimitException {
OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
content.reposition(propertyValue.rawData);
if (content.size() > dif.size()) {
- throw new TikaMemoryLimitException("File data store cb " + content.size() +
- " exceeds document size: " + dif.size());
+ throw new TikaMemoryLimitException(
+ "File data store cb " + content.size() + " exceeds document size: " +
+ dif.size());
}
ByteBuffer buf = ByteBuffer.allocate(content.size());
dif.read(buf);
return new String(buf.array(), StandardCharsets.UTF_16LE);
}
- private void handleRichEditTextUnicode(int length) throws SAXException, IOException, TikaException {
+ private void handleRichEditTextUnicode(int length)
+ throws SAXException, IOException, TikaException {
//this is a null-ended UTF-16LE string
ByteBuffer buf = ByteBuffer.allocate(length);
dif.read(buf);
byte[] arr = buf.array();
//look for the first null
int firstNull = 0;
- for (int i = 0; i < arr.length-1; i += 2) {
- if (arr[i] == 0 && arr[i+1] == 0) {
- firstNull = (i > 0) ? i : 0;
+ for (int i = 0; i < arr.length - 1; i += 2) {
+ if (arr[i] == 0 && arr[i + 1] == 0) {
+ firstNull = (i > 0) ? i : 0;
break;
}
}
@@ -577,4 +607,4 @@ class OneNoteTreeWalker {
public void setCreationTimestamp(long creationTimestamp) {
this.creationTimestamp = creationTimestamp;
}
-}
\ No newline at end of file
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalkerOptions.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalkerOptions.java
index b25fd05..8226654 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalkerOptions.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalkerOptions.java
@@ -17,6 +17,7 @@
package org.apache.tika.parser.microsoft.onenote;
+import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
@@ -24,13 +25,12 @@ import java.util.Set;
/**
* Options when walking the one note tree.
*/
-class OneNoteTreeWalkerOptions {
+public class OneNoteTreeWalkerOptions implements Serializable {
private boolean crawlAllFileNodesFromRoot = true;
private boolean onlyLatestRevision = true;
private Set<OneNotePropertyEnum> utf16PropertiesToPrint = new HashSet<>(
- Arrays.asList(OneNotePropertyEnum.ImageFilename,
- OneNotePropertyEnum.Author,
- OneNotePropertyEnum.CachedTitleString));
+ Arrays.asList(OneNotePropertyEnum.ImageFilename, OneNotePropertyEnum.Author,
+ OneNotePropertyEnum.CachedTitleString));
/**
* Do this to ignore revisions and just parse all file nodes from the root recursively.
@@ -45,7 +45,8 @@ class OneNoteTreeWalkerOptions {
* @param crawlAllFileNodesFromRoot
* @return
*/
- public OneNoteTreeWalkerOptions setCrawlAllFileNodesFromRoot(boolean crawlAllFileNodesFromRoot) {
+ public OneNoteTreeWalkerOptions setCrawlAllFileNodesFromRoot(
+ boolean crawlAllFileNodesFromRoot) {
this.crawlAllFileNodesFromRoot = crawlAllFileNodesFromRoot;
return this;
}
@@ -78,10 +79,12 @@ class OneNoteTreeWalkerOptions {
/**
* Print file node data in UTF-16 format when they match these props.
*
- * @param utf16PropertiesToPrint The set of UTF properties you want to print UTF-16 for. Defaults are usually ok here.
+ * @param utf16PropertiesToPrint The set of UTF properties you want to print UTF-16 for.
+ * Defaults are usually ok here.
* @return Returns this, as per builder pattern.
*/
- public OneNoteTreeWalkerOptions setUtf16PropertiesToPrint(Set<OneNotePropertyEnum> utf16PropertiesToPrint) {
+ public OneNoteTreeWalkerOptions setUtf16PropertiesToPrint(
+ Set<OneNotePropertyEnum> utf16PropertiesToPrint) {
this.utf16PropertiesToPrint = utf16PropertiesToPrint;
return this;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyIDType.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyIDType.java
index 87782e6..d5f79fb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyIDType.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyIDType.java
@@ -14,13 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
enum PropertyIDType {
- ObjectID,
- ArrayOfObjectIDs,
- ObjectSpaceID,
- ArrayOfObjectSpaceIDs,
- ContextID,
- ArrayofContextIDs;
+ ObjectID, ArrayOfObjectIDs, ObjectSpaceID, ArrayOfObjectSpaceIDs, ContextID, ArrayofContextIDs
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertySet.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertySet.java
index a23d671..d6acb32 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertySet.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertySet.java
@@ -14,38 +14,49 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import org.apache.tika.exception.TikaMemoryLimitException;
+package org.apache.tika.parser.microsoft.onenote;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
+import org.apache.tika.exception.TikaException;
+
/**
- * A property set is a collection of properties that specify the attributes of an object (section 2.1.5).
+ * A property set is a collection of properties that specify the attributes of an object (section
+ * 2.1.5).
* <p>
- * The PropertySet structure specifies the format of a property set and is contained by an ObjectSpaceObjectPropSet structure
- * (section 2.6.1). The meaning of each property in the set is specified in [MS-ONE] section 2.1.12.
+ * The PropertySet structure specifies the format of a property set and is contained by an
+ * ObjectSpaceObjectPropSet structure
+ * (section 2.6.1). The meaning of each property in the set is specified
+ * in [MS-ONE] section 2.1.12.
* <p>
* A PropertySet structure can contain references to other objects.
* <p>
- * The data for a property that is not an object reference is contained in the PropertySet.rgData stream field. The rgData stream is read
- * sequentially beginning with the first property in a PropertySet.rgPrids array until every property has been read.
+ * The data for a property that is not an object reference is contained in the PropertySet
+ * .rgData stream field. The rgData stream is read
+ * sequentially beginning with the first property in a PropertySet.rgPrids array until every
+ * property has been read.
* <p>
* The number of bytes read for each property is specified by the PropertyID.type field.
* <p>
- * The data for a property that is a reference to one or more objects (section 2.1.5) is contained in the streams within an
+ * The data for a property that is a reference to one or more objects (section 2.1.5) is
+ * contained in the streams within an
* ObjectSpaceObjectPropSet structure (OIDs.body, OSIDs.body, ContextIDs.body).
* <p>
- * The streams are read sequentially beginning with the first property in a PropertySet.rgPrids array.
+ * The streams are read sequentially beginning with the first property in a PropertySet.rgPrids
+ * array.
* <p>
- * If the PropertyID.type field specifies a single object (0x8, 0xA, 0xC), a single CompactID (4 bytes) is read from the corresponding
+ * If the PropertyID.type field specifies a single object (0x8, 0xA, 0xC), a single CompactID (4
+ * bytes) is read from the corresponding
* stream in the ObjectSpaceObjectPropSet structure.
* <p>
- * If the PropertyID.type field specifies an array of objects (0x9, 0xB, 0xD), an unsigned integer (4 bytes) is read from the
- * PropertySet.rgDatastream and specifies the number of CompactID structures (section 2.2.2) to read from the corresponding stream in the
+ * If the PropertyID.type field specifies an array of objects (0x9, 0xB, 0xD), an unsigned
+ * integer (4 bytes) is read from the
+ * PropertySet.rgDatastream and specifies the number of CompactID structures (section 2.2.2) to
+ * read from the corresponding stream in the
* ObjectSpaceObjectPropSet structure.
* <p>
* The streams for each PropertyID.type field are given by the following table.
@@ -61,7 +72,8 @@ import java.util.Objects;
class PropertySet {
List<PropertyValue> rgPridsData = new ArrayList<>();
- public void print(OneNoteDocument document, OneNotePtr pointer, int indentLevel) throws IOException, TikaMemoryLimitException {
+ public void print(OneNoteDocument document, OneNotePtr pointer, int indentLevel)
+ throws IOException, TikaException {
for (PropertyValue child : rgPridsData) {
child.print(document, pointer, indentLevel);
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyValue.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyValue.java
index 454a3ea..e6b236a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyValue.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/PropertyValue.java
@@ -14,16 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import org.apache.tika.exception.TikaMemoryLimitException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+package org.apache.tika.parser.microsoft.onenote;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.tika.exception.TikaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
class PropertyValue {
private static final Logger LOG = LoggerFactory.getLogger(PropertyValue.class);
@@ -37,8 +38,10 @@ class PropertyValue {
PropertySet propertySet = new PropertySet(); // or used to house a single value
FileChunkReference rawData = new FileChunkReference(); // FourBytesOfLengthFollowedByData
- public void print(OneNoteDocument document, OneNotePtr pointer, int indentLevel) throws IOException, TikaMemoryLimitException {
- boolean isRawText = true; //std::string(get_property_id_name(propertyId.id)).find("TextE")!=-1;
+ public void print(OneNoteDocument document, OneNotePtr pointer, int indentLevel)
+ throws IOException, TikaException {
+ boolean isRawText =
+ true; //std::string(get_property_id_name(propertyId.id)).find("TextE")!=-1;
long type = propertyId.type;
@@ -57,9 +60,8 @@ class PropertyValue {
content.dumpHex();
LOG.debug("]");
}
- } else if (type == 0x9 || type == 0x8
- || type == 0xb || type == 0xc
- || type == 0xa || type == 0xd) {
+ } else if (type == 0x9 || type == 0x8 || type == 0xb || type == 0xc || type == 0xa ||
+ type == 0xd) {
String xtype = "contextID";
if (type == 0x8 || type == 0x9) {
xtype = "OIDs";
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Revision.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Revision.java
index e6ca0fc..6bc4a48 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Revision.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Revision.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
import java.util.ArrayList;
@@ -27,6 +28,18 @@ class Revision {
ExtendedGUID gosid = ExtendedGUID.nil();
ExtendedGUID dependent = ExtendedGUID.nil();
+ public Revision() {
+
+ }
+
+ public Revision(Map<Long, GUID> globalId, List<FileNodePtr> manifestList, ExtendedGUID gosid,
+ ExtendedGUID dependent) {
+ this.globalId = globalId;
+ this.manifestList = manifestList;
+ this.gosid = gosid;
+ this.dependent = dependent;
+ }
+
public Map<Long, GUID> getGlobalId() {
return globalId;
}
@@ -58,15 +71,4 @@ class Revision {
public void setDependent(ExtendedGUID dependent) {
this.dependent = dependent;
}
-
- public Revision() {
-
- }
-
- public Revision(Map<Long, GUID> globalId, List<FileNodePtr> manifestList, ExtendedGUID gosid, ExtendedGUID dependent) {
- this.globalId = globalId;
- this.manifestList = manifestList;
- this.gosid = gosid;
- this.dependent = dependent;
- }
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RevisionManifest.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RevisionManifest.java
index 4bd18b5..eb27e54 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RevisionManifest.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RevisionManifest.java
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.parser.microsoft.onenote;
class RevisionManifest {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RootObjectReference.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RootObjectReference.java
index 94017b9..abe0c72 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RootObjectReference.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/RootObjectReference.java
@@ -33,7 +33,8 @@ class RootObjectReference {
return rootObjectReferenceBase;
}
- public RootObjectReference setRootObjectReferenceBase(RootObjectReferenceBase rootObjectReferenceBase) {
+ public RootObjectReference setRootObjectReferenceBase(
+ RootObjectReferenceBase rootObjectReferenceBase) {
this.rootObjectReferenceBase = rootObjectReferenceBase;
return this;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/IFSSHTTPBSerializable.java
similarity index 66%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/IFSSHTTPBSerializable.java
index b79ef8a..d141aa1 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/IFSSHTTPBSerializable.java
@@ -14,17 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-class FileNodePtrBackPush {
- FileNodePtr parent;
+package org.apache.tika.parser.microsoft.onenote.fsshttpb;
- public FileNodePtrBackPush(FileNodePtr parent) {
- this.parent = parent;
- this.parent.nodeListPositions.add(0);
- }
+import java.io.IOException;
+import java.util.List;
- public void dec() {
- parent.nodeListPositions.remove(parent.nodeListPositions.size() - 1);
- }
+import org.apache.tika.exception.TikaException;
+
+/**
+ * FSSHTTPB Serialize interface.
+ */
+public interface IFSSHTTPBSerializable {
+
+ /**
+ * Serialize to byte list.
+ *
+ * @return The byte list.
+ */
+ List<Byte> serializeToByteList() throws IOException, TikaException;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/MSOneStorePackage.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/MSOneStorePackage.java
new file mode 100644
index 0000000..cb6bc23
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/MSOneStorePackage.java
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb;
+
+import static org.apache.tika.parser.microsoft.onenote.OneNoteParser.ONE_NOTE_PREFIX;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.Month;
+import java.time.ZoneOffset;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.microsoft.onenote.OneNotePropertyEnum;
+import org.apache.tika.parser.microsoft.onenote.OneNoteTreeWalkerOptions;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.EightBytesOfData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.FourBytesOfData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.IProperty;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.PrtFourBytesOfLengthFollowedByData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.CellManifestDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.PropertySet;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionManifestDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionStoreObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionStoreObjectGroup;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexCellMapping;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexRevisionMapping;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageManifestDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.HeaderCell;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.PropertyID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.PropertyType;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.unsigned.Unsigned;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitConverter;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.SAXException;
+
+public class MSOneStorePackage {
+ /**
+ * See spec MS-ONE - 2.3.1 - TIME32 - epoch of jan 1 1980 UTC.
+ * So we create this offset used to calculate number of seconds between this and the Instant
+ * .EPOCH.
+ */
+ private static final long TIME32_EPOCH_DIFF_1980;
+ /**
+ * See spec MS-DTYP - 2.3.3 - DATETIME dates are based on epoch of jan 1 1601 UTC.
+ * So we create this offset used to calculate number of seconds between this and the Instant
+ * .EPOCH.
+ */
+ private static final long DATETIME_EPOCH_DIFF_1601;
+ private static final Pattern HYPERLINK_PATTERN =
+ Pattern.compile("\uFDDFHYPERLINK\\s+\"([^\"]+)\"([^\"]+)$");
+ private static final String P = "p";
+
+ static {
+ LocalDateTime time32Epoch1980 = LocalDateTime.of(1980, Month.JANUARY, 1, 0, 0);
+ Instant instant = time32Epoch1980.atZone(ZoneOffset.UTC).toInstant();
+ TIME32_EPOCH_DIFF_1980 = (instant.toEpochMilli() - Instant.EPOCH.toEpochMilli()) / 1000;
+ }
+
+ static {
+ LocalDateTime time32Epoch1601 = LocalDateTime.of(1601, Month.JANUARY, 1, 0, 0);
+ Instant instant = time32Epoch1601.atZone(ZoneOffset.UTC).toInstant();
+ DATETIME_EPOCH_DIFF_1601 = (instant.toEpochMilli() - Instant.EPOCH.toEpochMilli()) / 1000;
+ }
+
+ private final Set<String> authors = new HashSet<>();
+ private final Set<String> mostRecentAuthors = new HashSet<>();
+ private final Set<String> originalAuthors = new HashSet<>();
+ public StorageIndexDataElementData storageIndex;
+ public StorageManifestDataElementData storageManifest;
+ public CellManifestDataElementData headerCellCellManifest;
+ public RevisionManifestDataElementData headerCellRevisionManifest;
+ public List<RevisionManifestDataElementData> revisionManifests;
+ public List<CellManifestDataElementData> cellManifests;
+ public HeaderCell headerCell;
+ public List<RevisionStoreObjectGroup> dataRoot;
+ public List<RevisionStoreObjectGroup> OtherFileNodeList;
+ private boolean mostRecentAuthorProp = false;
+ private boolean originalAuthorProp = false;
+ private Instant lastModifiedTimestamp = Instant.MIN;
+ private long creationTimestamp = Long.MAX_VALUE;
+ private long lastModified = Long.MIN_VALUE;
+
+ public MSOneStorePackage() {
+ this.revisionManifests = new ArrayList<>();
+ this.cellManifests = new ArrayList<>();
+ this.OtherFileNodeList = new ArrayList<>();
+ }
+
+ /**
+ * This method is used to find the Storage Index Cell Mapping matches the Cell ID.
+ *
+ * @param cellID Specify the Cell ID.
+ * @return Return the specific Storage Index Cell Mapping.
+ */
+ public StorageIndexCellMapping findStorageIndexCellMapping(CellID cellID) {
+ StorageIndexCellMapping storageIndexCellMapping = null;
+ if (this.storageIndex != null) {
+ storageIndexCellMapping = this.storageIndex.storageIndexCellMappingList.stream()
+ .filter(s -> s.cellID.equals(cellID)).findFirst()
+ .orElse(new StorageIndexCellMapping());
+ }
+ return storageIndexCellMapping;
+ }
+
+ /**
+ * This method is used to find the Storage Index Revision Mapping that matches the Revision Mapping Extended GUID.
+ *
+ * @param revisionExtendedGUID Specify the Revision Mapping Extended GUID.
+ * @return Return the instance of Storage Index Revision Mapping.
+ */
+ public StorageIndexRevisionMapping findStorageIndexRevisionMapping(
+ ExGuid revisionExtendedGUID) {
+ StorageIndexRevisionMapping instance = null;
+ if (this.storageIndex != null) {
+ instance = this.storageIndex.storageIndexRevisionMappingList.stream()
+ .filter(r -> r.revisionExGuid.equals(revisionExtendedGUID)).findFirst()
+ .orElse(new StorageIndexRevisionMapping());
+ }
+
+ return instance;
+ }
+
+ /**
+ * Is this property a binary property?
+ *
+ * @param property The property.
+ * @return Is it binary?
+ */
+ private boolean propertyIsBinary(OneNotePropertyEnum property) {
+ return property == OneNotePropertyEnum.RgOutlineIndentDistance ||
+ property == OneNotePropertyEnum.NotebookManagementEntityGuid ||
+ property == OneNotePropertyEnum.RichEditTextUnicode;
+ }
+
+ public void walkTree(OneNoteTreeWalkerOptions options, Metadata metadata,
+ XHTMLContentHandler xhtml)
+ throws SAXException, TikaException, IOException {
+ for (RevisionStoreObjectGroup revisionStoreObjectGroup : OtherFileNodeList) {
+ for (RevisionStoreObject revisionStoreObject : revisionStoreObjectGroup.objects) {
+ PropertySet propertySet =
+ revisionStoreObject.propertySet.objectSpaceObjectPropSet.body;
+ for (int i = 0; i < propertySet.rgData.size(); ++i) {
+ IProperty property = propertySet.rgData.get(i);
+ PropertyID propertyID = propertySet.rgPrids[i];
+ PropertyType propertyType = PropertyType.fromIntVal(propertyID.type);
+ OneNotePropertyEnum oneNotePropertyEnum =
+ OneNotePropertyEnum.of(Unsigned.uint(propertyID.value).longValue());
+ if (oneNotePropertyEnum == OneNotePropertyEnum.LastModifiedTimeStamp) {
+ long fullval = getScalar(property);
+ Instant instant = Instant.ofEpochSecond(
+ fullval / 10000000 + DATETIME_EPOCH_DIFF_1601);
+ if (instant.isAfter(lastModifiedTimestamp)) {
+ lastModifiedTimestamp = instant;
+ }
+ metadata.set(ONE_NOTE_PREFIX + "lastModifiedTimestamp",
+ String.valueOf(lastModifiedTimestamp.toEpochMilli()));
+ } else if (oneNotePropertyEnum == OneNotePropertyEnum.CreationTimeStamp) {
+ // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
+ // 1970
+ long scalar = getScalar(property);
+ long creationTs = scalar + TIME32_EPOCH_DIFF_1980;
+ if (creationTs < creationTimestamp) {
+ creationTimestamp = creationTs;
+ }
+ metadata.set(ONE_NOTE_PREFIX + "creationTimestamp", String.valueOf(creationTimestamp));
+ } else if (oneNotePropertyEnum == OneNotePropertyEnum.LastModifiedTime) {
+ // add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
+ // 1970
+ long scalar = getScalar(property);
+ long lastMod = scalar + TIME32_EPOCH_DIFF_1980;
+ if (lastMod > lastModified) {
+ lastModified = lastMod;
+ }
+ metadata.set(TikaCoreProperties.MODIFIED, String.valueOf(lastModified));
+ } else if (oneNotePropertyEnum == OneNotePropertyEnum.Author) {
+ String author =
+ new String(((PrtFourBytesOfLengthFollowedByData) property).data,
+ StandardCharsets.UTF_8);
+ if (mostRecentAuthorProp) {
+ mostRecentAuthors.add(author);
+ } else if (originalAuthorProp) {
+ originalAuthors.add(author);
+ } else {
+ authors.add(author);
+ }
+ } else if (oneNotePropertyEnum == OneNotePropertyEnum.AuthorMostRecent) {
+ mostRecentAuthorProp = true;
+ } else if (oneNotePropertyEnum == OneNotePropertyEnum.AuthorOriginal) {
+ originalAuthorProp = true;
+ } else if (propertyType == PropertyType.FourBytesOfLengthFollowedByData) {
+ boolean isBinary = propertyIsBinary(oneNotePropertyEnum);
+ PrtFourBytesOfLengthFollowedByData dataProperty =
+ (PrtFourBytesOfLengthFollowedByData) property;
+ if ((dataProperty.data.length & 1) == 0 &&
+ oneNotePropertyEnum != OneNotePropertyEnum.TextExtendedAscii &&
+ !isBinary) {
+ if (options.getUtf16PropertiesToPrint().contains(oneNotePropertyEnum)) {
+ xhtml.startElement(P);
+ xhtml.characters(
+ new String(dataProperty.data, StandardCharsets.UTF_16LE));
+ xhtml.endElement(P);
+ }
+ } else if (oneNotePropertyEnum == OneNotePropertyEnum.TextExtendedAscii) {
+ xhtml.startElement(P);
+ xhtml.characters(
+ new String(dataProperty.data, StandardCharsets.US_ASCII));
+ xhtml.endElement(P);
+ } else if (!isBinary) {
+ if (options.getUtf16PropertiesToPrint().contains(oneNotePropertyEnum)) {
+ xhtml.startElement(P);
+ xhtml.characters(
+ new String(dataProperty.data, StandardCharsets.UTF_16LE));
+ xhtml.endElement(P);
+ }
+ } else {
+ if (oneNotePropertyEnum == OneNotePropertyEnum.RichEditTextUnicode) {
+ handleRichEditTextUnicode(dataProperty.data, xhtml);
+ } else {
+ //TODO -- these seem to be somewhat broken font files and other
+ //odds and ends...what are they and how should we process them?
+ //handleEmbedded(content.size());
+ }
+ }
+ }
+ }
+ }
+ }
+ if (!authors.isEmpty()) {
+ metadata.set(TikaCoreProperties.CREATOR, authors.toArray(new String[]{}));
+ }
+ if (!mostRecentAuthors.isEmpty()) {
+ metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "mostRecentAuthors"),
+ mostRecentAuthors.toArray(new String[]{}));
+ }
+ if (!originalAuthors.isEmpty()) {
+ metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "originalAuthors"),
+ originalAuthors.toArray(new String[]{}));
+ }
+ }
+
+
+ private void handleRichEditTextUnicode(byte[] arr, XHTMLContentHandler xhtml)
+ throws SAXException, IOException, TikaException {
+ // look for the first null
+ int firstNull = 0;
+ for (int i = 0; i < arr.length - 1; i += 2) {
+ if (arr[i] == 0 && arr[i + 1] == 0) {
+ firstNull = Math.max(i, 0);
+ break;
+ }
+ }
+
+ if (firstNull == 0) {
+ return;
+ }
+ String txt = new String(arr, 0, firstNull, StandardCharsets.UTF_16LE);
+ Matcher m = HYPERLINK_PATTERN.matcher(txt);
+ if (m.find()) {
+ xhtml.startElement("a", "href", m.group(1));
+ xhtml.characters(m.group(2));
+ xhtml.endElement("a");
+ } else {
+ xhtml.startElement(P);
+ xhtml.characters(txt);
+ xhtml.endElement(P);
+ }
+ }
+
+ private long getScalar(IProperty property) throws TikaException, IOException {
+ if (property instanceof FourBytesOfData) {
+ FourBytesOfData fourBytesOfDataProp = (FourBytesOfData) property;
+ return BitConverter.toUInt32(fourBytesOfDataProp.data, 0);
+ } else if (property instanceof EightBytesOfData) {
+ EightBytesOfData fourBytesOfDataProp = (EightBytesOfData) property;
+ return BitConverter.toInt64(fourBytesOfDataProp.data, 0);
+ }
+ throw new TikaException("Could not parse scalar of type " + property.getClass());
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/MSOneStoreParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/MSOneStoreParser.java
new file mode 100644
index 0000000..ab1f008
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/MSOneStoreParser.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import java.util.stream.Collectors;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.CellManifestDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.DataElement;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.DataElementPackage;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.ObjectGroupDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionManifestDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionManifestObjectGroupReferences;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionManifestRootDeclare;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionStoreObjectGroup;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexCellMapping;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexRevisionMapping;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageManifestDataElementData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.DataElementType;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.HeaderCell;
+
+public class MSOneStoreParser {
+ private final Set<CellID> storageIndexHashTab = new HashSet<>();
+ // The DataElements of Storage Index
+ private List<DataElement> storageIndexDataElements;
+ // The DataElements of Storage Manifest
+ private List<DataElement> storageManifestDataElements;
+ // The DataElements of Cell Manifest
+ private List<DataElement> cellManifestDataElements;
+ // The DataElements of Revision Manifest
+ private List<DataElement> revisionManifestDataElements;
+ // The DataElements of Object Group Data
+ private List<DataElement> objectGroupDataElements;
+ // The DataElements of Object BLOB
+ private List<DataElement> objectBlOBElements;
+
+ public MSOneStorePackage parse(DataElementPackage dataElementPackage) throws IOException {
+ MSOneStorePackage msOneStorePackage = new MSOneStorePackage();
+
+ storageIndexDataElements = dataElementPackage.dataElements.stream()
+ .filter(d -> d.dataElementType == DataElementType.StorageIndexDataElementData)
+ .collect(Collectors.toList());
+ storageManifestDataElements = dataElementPackage.dataElements.stream()
+ .filter(d -> d.dataElementType == DataElementType.StorageManifestDataElementData)
+ .collect(Collectors.toList());
+ cellManifestDataElements = dataElementPackage.dataElements.stream()
+ .filter(d -> d.dataElementType == DataElementType.CellManifestDataElementData)
+ .collect(Collectors.toList());
+ revisionManifestDataElements = dataElementPackage.dataElements.stream()
+ .filter(d -> d.dataElementType == DataElementType.RevisionManifestDataElementData)
+ .collect(Collectors.toList());
+ objectGroupDataElements = dataElementPackage.dataElements.stream()
+ .filter(d -> d.dataElementType == DataElementType.ObjectGroupDataElementData)
+ .collect(Collectors.toList());
+ objectBlOBElements = dataElementPackage.dataElements.stream()
+ .filter(d -> d.dataElementType == DataElementType.ObjectDataBLOBDataElementData)
+ .collect(Collectors.toList());
+
+ msOneStorePackage.storageIndex =
+ (StorageIndexDataElementData) storageIndexDataElements.get(0).data;
+ msOneStorePackage.storageManifest =
+ (StorageManifestDataElementData) storageManifestDataElements.get(0).data;
+
+ // Parse Header Cell
+ CellID headerCellID =
+ msOneStorePackage.storageManifest.storageManifestRootDeclareList.get(0).cellID;
+ StorageIndexCellMapping headerCellStorageIndexCellMapping =
+ msOneStorePackage.findStorageIndexCellMapping(headerCellID);
+ storageIndexHashTab.add(headerCellID);
+
+ if (headerCellStorageIndexCellMapping != null) {
+ msOneStorePackage.headerCellCellManifest =
+ this.findCellManifest(headerCellStorageIndexCellMapping.cellMappingExGuid);
+ StorageIndexRevisionMapping headerCellRevisionManifestMapping =
+ msOneStorePackage.findStorageIndexRevisionMapping(
+ msOneStorePackage.headerCellCellManifest.cellManifestCurrentRevision
+ .cellManifestCurrentRevisionExGuid);
+ msOneStorePackage.headerCellRevisionManifest = this.findRevisionManifestDataElement(
+ headerCellRevisionManifestMapping.revisionMappingExGuid);
+ msOneStorePackage.headerCell =
+ this.parseHeaderCell(msOneStorePackage.headerCellRevisionManifest);
+
+ // Parse Data root
+ CellID dataRootCellID =
+ msOneStorePackage.storageManifest.storageManifestRootDeclareList.get(1).cellID;
+ storageIndexHashTab.add(dataRootCellID);
+ msOneStorePackage.dataRoot = this.parseObjectGroup(dataRootCellID, msOneStorePackage);
+ // Parse other data
+ for (StorageIndexCellMapping storageIndexCellMapping : msOneStorePackage.storageIndex
+ .storageIndexCellMappingList) {
+ if (!storageIndexHashTab.contains(storageIndexCellMapping.cellID)) {
+ msOneStorePackage.OtherFileNodeList.addAll(
+ this.parseObjectGroup(storageIndexCellMapping.cellID,
+ msOneStorePackage));
+ storageIndexHashTab.add(storageIndexCellMapping.cellID);
+ }
+ }
+ }
+ return msOneStorePackage;
+ }
+
+ /**
+ * Find the CellManifestDataElementData
+ *
+ * @param cellMappingExtendedGUID The ExGuid of Cell Mapping Extended GUID.
+ * @return The CellManifestDataElementData instance.
+ */
+ private CellManifestDataElementData findCellManifest(ExGuid cellMappingExtendedGUID) {
+ return (CellManifestDataElementData) this.cellManifestDataElements.stream()
+ .filter(d -> d.dataElementExGuid.equals(cellMappingExtendedGUID)).findFirst()
+ .orElse(new DataElement()).data;
+ }
+
+ /**
+ * Find the Revision Manifest from Data Elements.
+ *
+ * @param revisionMappingExtendedGUID The Revision Mapping Extended GUID.
+ * @return Returns the instance of RevisionManifestDataElementData
+ */
+ private RevisionManifestDataElementData findRevisionManifestDataElement(
+ ExGuid revisionMappingExtendedGUID) {
+ return (RevisionManifestDataElementData) this.revisionManifestDataElements.stream()
+ .filter(d -> d.dataElementExGuid.equals(revisionMappingExtendedGUID)).findFirst()
+ .orElse(new DataElement()).data;
+ }
+
+ private HeaderCell parseHeaderCell(RevisionManifestDataElementData headerCellRevisionManifest)
+ throws IOException {
+ ExGuid rootObjectId =
+ headerCellRevisionManifest.revisionManifestObjectGroupReferences.get(
+ 0).objectGroupExtendedGUID;
+
+ DataElement element = this.objectGroupDataElements.stream()
+ .filter(d -> d.dataElementExGuid.equals(rootObjectId)).findFirst()
+ .orElse(new DataElement());
+
+ return HeaderCell.createInstance((ObjectGroupDataElementData) element.data);
+ }
+
+ private List<RevisionStoreObjectGroup> parseObjectGroup(CellID objectGroupCellID,
+ MSOneStorePackage msOneStorePackage)
+ throws IOException {
+ StorageIndexCellMapping storageIndexCellMapping =
+ msOneStorePackage.findStorageIndexCellMapping(objectGroupCellID);
+ CellManifestDataElementData cellManifest =
+ this.findCellManifest(storageIndexCellMapping.cellMappingExGuid);
+ List<RevisionStoreObjectGroup> objectGroups = new ArrayList<>();
+ msOneStorePackage.cellManifests.add(cellManifest);
+ StorageIndexRevisionMapping revisionMapping =
+ msOneStorePackage.findStorageIndexRevisionMapping(
+ cellManifest.cellManifestCurrentRevision.cellManifestCurrentRevisionExGuid);
+ RevisionManifestDataElementData revisionManifest =
+ findRevisionManifestDataElement(revisionMapping.revisionMappingExGuid);
+ msOneStorePackage.revisionManifests.add(revisionManifest);
+ RevisionManifestRootDeclare encryptionKeyRoot =
+ revisionManifest.revisionManifestRootDeclareList.stream()
+ .filter(r -> r.rootExGuid.equals(new ExGuid(3,
+ UUID.fromString("4A3717F8-1C14-49E7-9526-81D942DE1741"))))
+ .findFirst().orElse(null);
+ boolean isEncryption = encryptionKeyRoot != null;
+ for (RevisionManifestObjectGroupReferences objRef :
+ revisionManifest.revisionManifestObjectGroupReferences) {
+ ObjectGroupDataElementData dataObject =
+ (ObjectGroupDataElementData) objectGroupDataElements.stream()
+ .filter(d -> d.dataElementExGuid.equals(objRef.objectGroupExtendedGUID))
+ .findFirst().get().data;
+
+ RevisionStoreObjectGroup objectGroup =
+ RevisionStoreObjectGroup.createInstance(objRef.objectGroupExtendedGUID,
+ dataObject, isEncryption);
+ objectGroups.add(objectGroup);
+ }
+
+ return objectGroups;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/exception/DataElementParseErrorException.java
similarity index 63%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/exception/DataElementParseErrorException.java
index b79ef8a..41cf7b5 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/exception/DataElementParseErrorException.java
@@ -14,17 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-class FileNodePtrBackPush {
- FileNodePtr parent;
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.exception;
- public FileNodePtrBackPush(FileNodePtr parent) {
- this.parent = parent;
- this.parent.nodeListPositions.add(0);
+public class DataElementParseErrorException extends RuntimeException {
+
+ private final int index;
+
+ public DataElementParseErrorException(int index, Exception innerException) {
+ super(innerException);
+ this.index = index;
}
- public void dec() {
- parent.nodeListPositions.remove(parent.nodeListPositions.size() - 1);
+ public DataElementParseErrorException(int index, String msg, Exception innerException) {
+ super(msg, innerException);
+ this.index = index;
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/ArrayNumber.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/ArrayNumber.java
new file mode 100644
index 0000000..aac97b1
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/ArrayNumber.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitConverter;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+/**
+ * The class is used to represent the number of the array.
+ */
+public class ArrayNumber implements IProperty {
+ public int number;
+
+ /**
+ * This method is used to deserialize the number of array from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the number of array.
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) throws IOException {
+ this.number = BitConverter.toInt32(byteArray, startIndex);
+ return 4;
+ }
+
+ /**
+ * This method is used to convert the element of the number of array into a byte List.
+ *
+ * @return Return the byte list which store the byte information of the number of array.
+ */
+ public List<Byte> serializeToByteList() {
+ return ByteUtil.toListOfByte(BitConverter.getBytes(this.number));
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/EightBytesOfData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/EightBytesOfData.java
new file mode 100644
index 0000000..26e88d7
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/EightBytesOfData.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+
+/**
+ * This class is used to represent the property contains 8 bytes of data in the PropertySet.rgData stream field.
+ */
+public class EightBytesOfData implements IProperty {
+ public byte[] data;
+
+ /**
+ * This method is used to deserialize the EightBytesOfData from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the EightBytesOfData.
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) {
+ this.data = Arrays.copyOfRange(byteArray, startIndex, startIndex + 8);
+ return 8;
+ }
+
+ /**
+ * This method is used to convert the element of EightBytesOfData into a byte List.
+ *
+ * @return Return the byte list which store the byte information of EightBytesOfData.
+ */
+ public List<Byte> serializeToByteList() {
+ return ByteUtil.toListOfByte(this.data);
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/FourBytesOfData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/FourBytesOfData.java
new file mode 100644
index 0000000..5d6f366
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/FourBytesOfData.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+/**
+ * This class is used to represent the property contains 4 bytes of data in the PropertySet.rgData stream field.
+ */
+public class FourBytesOfData implements IProperty {
+ public byte[] data;
+
+ /**
+ * This method is used to deserialize the FourBytesOfData from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the FourBytesOfData.
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) {
+ this.data = Arrays.copyOfRange(byteArray, startIndex, startIndex + 4);
+ return 4;
+ }
+
+ /**
+ * This method is used to convert the element of FourBytesOfData into a byte List.
+ *
+ * @return Return the byte list which store the byte information of FourBytesOfData.
+ */
+ public List<Byte> serializeToByteList() {
+ return ByteUtil.toListOfByte(this.data);
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/IProperty.java
similarity index 50%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/IProperty.java
index aa01c18..bf402a0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/IProperty.java
@@ -14,30 +14,30 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import java.util.ArrayList;
-import java.util.List;
-
-class FileNodeList {
- FileNodeListHeader fileNodeListHeader;
- List<FileNode> children = new ArrayList<>();
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
- public FileNodeListHeader getFileNodeListHeader() {
- return fileNodeListHeader;
- }
+import java.io.IOException;
+import java.util.List;
- public FileNodeList setFileNodeListHeader(FileNodeListHeader fileNodeListHeader) {
- this.fileNodeListHeader = fileNodeListHeader;
- return this;
- }
- public List<FileNode> getChildren() {
- return children;
- }
+/**
+ * The interface of the property in OneNote file.
+ */
+public interface IProperty {
+ /**
+ * This method is used to convert the element of property into a byte List.
+ *
+ * @return Return the byte list which store the byte information of property.
+ */
+ List<Byte> serializeToByteList() throws IOException;
- public FileNodeList setChildren(List<FileNode> children) {
- this.children = children;
- return this;
- }
+ /**
+ * This method is used to deserialize the property from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the property.
+ */
+ int doDeserializeFromByteArray(byte[] byteArray, int startIndex) throws IOException;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/NoData.java
similarity index 52%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/NoData.java
index aa01c18..4a2c7c2 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/NoData.java
@@ -14,30 +14,33 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
import java.util.ArrayList;
import java.util.List;
-class FileNodeList {
- FileNodeListHeader fileNodeListHeader;
- List<FileNode> children = new ArrayList<>();
-
- public FileNodeListHeader getFileNodeListHeader() {
- return fileNodeListHeader;
- }
-
- public FileNodeList setFileNodeListHeader(FileNodeListHeader fileNodeListHeader) {
- this.fileNodeListHeader = fileNodeListHeader;
- return this;
- }
-
- public List<FileNode> getChildren() {
- return children;
+/**
+ * This class is used to represent the property contains no data.
+ */
+public class NoData implements IProperty {
+ /**
+ * This method is used to deserialize the NoData from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) {
+ return 0;
}
- public FileNodeList setChildren(List<FileNode> children) {
- this.children = children;
- return this;
+ /**
+ * This method is used to convert the element of NoData into a byte List.
+ *
+ * @return Return the byte list which store the byte information of NoData.
+ */
+ public List<Byte> serializeToByteList() {
+ return new ArrayList<>();
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/OneByteOfData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/OneByteOfData.java
new file mode 100644
index 0000000..8c2d64c
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/OneByteOfData.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class is used to represent the property contains 1 byte of data in the PropertySet.rgData stream field.
+ */
+public class OneByteOfData implements IProperty {
+ public byte data;
+
+ /**
+ * This method is used to deserialize the OneByteOfData from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) {
+ this.data = byteArray[startIndex];
+ return 1;
+ }
+
+ /**
+ * This method is used to convert the element of OneByteOfData into a byte List.
+ *
+ * @return Return the byte list which store the byte information of OneByteOfData.
+ */
+ public List<Byte> serializeToByteList() {
+ return new ArrayList<>(this.data);
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/PrtArrayOfPropertyValues.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/PrtArrayOfPropertyValues.java
new file mode 100644
index 0000000..73a01cb
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/PrtArrayOfPropertyValues.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.PropertySet;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.PropertyID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitConverter;
+
+
+/**
+ * The class is used to represent the prtArrayOfPropertyValues .
+ */
+public class PrtArrayOfPropertyValues implements IProperty {
+ public int cProperties;
+ public PropertyID propertyID;
+ public PropertySet[] data;
+
+ /**
+ * This method is used to deserialize the prtArrayOfPropertyValues from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) throws IOException {
+ int index = startIndex;
+ this.cProperties = BitConverter.toInt32(byteArray, index);
+ index += 4;
+ this.propertyID = new PropertyID();
+ int len = this.propertyID.doDeserializeFromByteArray(byteArray, index);
+ index += len;
+ this.data = new PropertySet[this.cProperties];
+ for (int i = 0; i < this.cProperties; i++) {
+ this.data[i] = new PropertySet();
+ int length = this.data[i].doDeserializeFromByteArray(byteArray, index);
+ index += length;
+ }
+
+ return index - startIndex;
+ }
+
+ /**
+ * This method is used to convert the element of the prtArrayOfPropertyValues into a byte List.
+ *
+ * @return Return the byte list which store the byte information of the prtArrayOfPropertyValues.
+ */
+ public List<Byte> serializeToByteList() throws IOException {
+ List<Byte> byteList = new ArrayList<>();
+ for (byte b : BitConverter.getBytes(this.cProperties)) {
+ byteList.add(b);
+ }
+ byteList.addAll(this.propertyID.serializeToByteList());
+ for (PropertySet ps : this.data) {
+ byteList.addAll(ps.serializeToByteList());
+ }
+ return byteList;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/PrtFourBytesOfLengthFollowedByData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/PrtFourBytesOfLengthFollowedByData.java
new file mode 100644
index 0000000..6a8655c
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/PrtFourBytesOfLengthFollowedByData.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitConverter;
+
+/**
+ * This class is used to represent the prtFourBytesOfLengthFollowedByData.
+ */
+public class PrtFourBytesOfLengthFollowedByData implements IProperty {
+ public int cb;
+
+ public byte[] data;
+
+ /**
+ * This method is used to deserialize the prtFourBytesOfLengthFollowedByData from
+ * the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the prtFourBytesOfLengthFollowedByData.
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) throws IOException {
+ int index = startIndex;
+ this.cb = (int) BitConverter.toUInt32(byteArray, startIndex);
+ index += 4;
+ this.data = Arrays.copyOfRange(byteArray, index, index + this.cb);
+ index += this.cb;
+
+ return index - startIndex;
+ }
+
+ /**
+ * This method is used to convert the element of prtFourBytesOfLengthFollowedByData into a byte List.
+ *
+ * @return Return the byte list which store the byte information of prtFourBytesOfLengthFollowedByData.
+ */
+ public List<Byte> serializeToByteList() {
+ List<Byte> byteList = new ArrayList<>();
+ for (byte b : BitConverter.getBytes(this.cb)) {
+ byteList.add(b);
+ }
+ for (byte b : this.data) {
+ byteList.add(b);
+ }
+ return byteList;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/TwoBytesOfData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/TwoBytesOfData.java
new file mode 100644
index 0000000..296e1c4
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/property/TwoBytesOfData.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.property;
+
+import java.util.List;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+/**
+ * This class is used to represent the property contains 2 bytes of data in the PropertySet.rgData stream field.
+ */
+public class TwoBytesOfData implements IProperty {
+ public byte[] data;
+
+
+ /**
+ * This method is used to deserialize the TwoBytesOfData from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the TwoBytesOfData.
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) {
+ this.data = new byte[]{byteArray[startIndex], byteArray[startIndex + 1]};
+
+ return 2;
+ }
+
+ /**
+ * This method is used to convert the element of TwoBytesOfData into a byte List.
+ *
+ * @return Return the byte list which store the byte information of TwoBytesOfData.
+ */
+ public List<Byte> serializeToByteList() {
+ return ByteUtil.toListOfByte(this.data);
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/CellManifestCurrentRevision.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/CellManifestCurrentRevision.java
new file mode 100644
index 0000000..1cc4661
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/CellManifestCurrentRevision.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+public class CellManifestCurrentRevision extends StreamObject {
+ public ExGuid cellManifestCurrentRevisionExGuid;
+
+ /**
+ * Initializes a new instance of the CellManifestCurrentRevision class.
+ */
+ public CellManifestCurrentRevision() {
+ super(StreamObjectTypeHeaderStart.CellManifestCurrentRevision);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.cellManifestCurrentRevisionExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "CellManifestCurrentRevision", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The number of elements actually contained in the list.
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ List<Byte> tmpList = this.cellManifestCurrentRevisionExGuid.serializeToByteList();
+ byteList.addAll(tmpList);
+ return tmpList.size();
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/CellManifestDataElementData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/CellManifestDataElementData.java
new file mode 100644
index 0000000..60428d1
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/CellManifestDataElementData.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Cell manifest data element
+ */
+public class CellManifestDataElementData extends DataElementData {
+ public CellManifestCurrentRevision cellManifestCurrentRevision;
+
+ /**
+ * Initializes a new instance of the CellManifestDataElementData class.
+ */
+ public CellManifestDataElementData() {
+ this.cellManifestCurrentRevision = new CellManifestCurrentRevision();
+ }
+
+ /**
+ * Used to return the length of this element.
+ *
+ * @param byteArray A Byte array
+ * @param startIndex Start position
+ * @return The element length
+ */
+ @Override
+ public int deserializeDataElementDataFromByteArray(byte[] byteArray, int startIndex)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(startIndex);
+ this.cellManifestCurrentRevision =
+ StreamObject.getCurrent(byteArray, index, CellManifestCurrentRevision.class);
+ return index.get() - startIndex;
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @return The Byte list
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws TikaException, IOException {
+ return this.cellManifestCurrentRevision.serializeToByteList();
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElement.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElement.java
new file mode 100644
index 0000000..058c924
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElement.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.exception.DataElementParseErrorException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.Compact64bitInt;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.DataElementType;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.SerialNumber;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.SequenceNumberGenerator;
+
+public class DataElement extends StreamObject {
+
+ /**
+ * Data Element Data Type Mapping
+ */
+ private static final Map<DataElementType, Class> DATA_ELEMENT_DATA_TYPE_MAPPING;
+
+ /**
+ * Initializes static members of the DataElement class
+ */
+ static {
+ DATA_ELEMENT_DATA_TYPE_MAPPING = new HashMap<>();
+ for (DataElementType value : DataElementType.values()) {
+ String className = DataElement.class.getPackage().getName() + "." + value.name();
+
+ try {
+ DATA_ELEMENT_DATA_TYPE_MAPPING.put(value, Class.forName(className));
+ } catch (ClassNotFoundException e) {
+ // This is OK, we are not pulling over every single class
+ }
+ }
+ }
+
+ public ExGuid dataElementExGuid;
+ public SerialNumber serialNumber;
+ public DataElementType dataElementType;
+ public DataElementData data;
+
+ /**
+ * Initializes a new instance of the DataElement class.
+ *
+ * @param type data
+ * element type
+ * *
+ * @param data Specifies
+ * the data
+ * of the
+ * element .
+ */
+
+
+ public DataElement(DataElementType type, DataElementData data) {
+ super(StreamObjectTypeHeaderStart.DataElement);
+ if (!DATA_ELEMENT_DATA_TYPE_MAPPING.containsKey(type)) {
+ throw new IllegalArgumentException("Invalid argument type value" + type.getIntVal());
+ }
+
+ this.dataElementType = type;
+ this.data = data;
+ this.dataElementExGuid =
+ new ExGuid(SequenceNumberGenerator.GetCurrentSerialNumber(), UUID.randomUUID());
+ this.serialNumber = new SerialNumber(UUID.randomUUID(),
+ SequenceNumberGenerator.GetCurrentSerialNumber());
+ }
+
+ /**
+ * Initializes a new instance of the DataElement class.
+ */
+ public DataElement() {
+ super(StreamObjectTypeHeaderStart.DataElement);
+ }
+
+ /**
+ * Used to get data.
+ *
+ * @return Data of
+ * the element
+ */
+ public <T extends DataElementData> T getData(Class<T> clazz) throws TikaException {
+ if (this.data.getClass().equals(clazz)) {
+ return (T) this.data;
+ } else {
+ throw new TikaException(String.format(Locale.US,
+ "Unable to cast DataElementData to the type %s, its actual type is %s",
+ clazz.getName(), this.data.getClass().getName()));
+ }
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A
+ * Byte array
+ * @param currentIndex Start
+ * position
+ * @param lengthOfItems The
+ * length of
+ * the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems) throws TikaException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+
+ try {
+ this.dataElementExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.serialNumber = BasicObject.parse(byteArray, index, SerialNumber.class);
+ this.dataElementType = DataElementType.fromIntVal(
+ (int) BasicObject.parse(byteArray, index, Compact64bitInt.class)
+ .getDecodedValue());
+ } catch (Exception e) {
+ throw new DataElementParseErrorException(index.get(), e);
+ }
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new DataElementParseErrorException(currentIndex.get(),
+ "Failed to check the data element header length, whose value does not cover the " +
+ "dataElementExGUID, SerialNumber and DataElementType", null);
+ }
+
+ if (DATA_ELEMENT_DATA_TYPE_MAPPING.containsKey(this.dataElementType)) {
+ try {
+ this.data = (DataElementData) DATA_ELEMENT_DATA_TYPE_MAPPING.get(this.dataElementType)
+ .newInstance();
+ } catch (InstantiationException | IllegalAccessException e) {
+ throw new TikaException("Could not instantiate a " + dataElementType, e);
+ }
+
+ try {
+ index.addAndGet(
+ this.data.deserializeDataElementDataFromByteArray(byteArray, index.get()));
+ } catch (Exception e) {
+ throw new DataElementParseErrorException(index.get(), e);
+ }
+ } else {
+ throw new DataElementParseErrorException(index.get(),
+ "Failed to create specific data element instance with the type " +
+ this.dataElementType, null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The element length
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException, TikaException {
+ int startIndex = byteList.size();
+ byteList.addAll(this.dataElementExGuid.serializeToByteList());
+ byteList.addAll(this.serialNumber.serializeToByteList());
+ byteList.addAll(
+ new Compact64bitInt(this.dataElementType.getIntVal()).serializeToByteList());
+
+ int headerLength = byteList.size() - startIndex;
+ byteList.addAll(this.data.serializeToByteList());
+
+ return headerLength;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementData.java
new file mode 100644
index 0000000..9e8f426
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementData.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.IFSSHTTPBSerializable;
+
+/**
+ * Base class of data element
+ */
+public abstract class DataElementData implements IFSSHTTPBSerializable {
+ /**
+ * De-serialize data element data from byte array.
+ *
+ * @param byteArray The byte array.
+ * @param startIndex The position where to start.
+ * @return The length of the item.
+ */
+ public abstract int deserializeDataElementDataFromByteArray(byte[] byteArray, int startIndex)
+ throws TikaException, IOException;
+
+ /**
+ * Serialize item to byte list.
+ *
+ * @return The byte list.
+ */
+ public abstract List<Byte> serializeToByteList() throws TikaException, IOException;
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementHash.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementHash.java
new file mode 100644
index 0000000..2657dd0
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementHash.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BinaryItem;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.Compact64bitInt;
+
+/**
+ * Specifies an data element hash stream object
+ */
+public class DataElementHash extends StreamObject {
+ public Compact64bitInt dataElementHashScheme;
+ public BinaryItem dataElementHashData;
+
+ /**
+ * Initializes a new instance of the DataElementHash class.
+ */
+ public DataElementHash() {
+ super(StreamObjectTypeHeaderStart.DataElementHash);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.dataElementHashScheme = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+ this.dataElementHashData = BasicObject.parse(byteArray, index, BinaryItem.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "DataElementHash",
+ "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List
+ *
+ * @param byteList A Byte list
+ * @return The number of elements actually contained in the list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int startPoint = byteList.size();
+ byteList.addAll(this.dataElementHashScheme.serializeToByteList());
+ byteList.addAll(this.dataElementHashData.serializeToByteList());
+
+ return byteList.size() - startPoint;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementPackage.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementPackage.java
new file mode 100644
index 0000000..784b9ad
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataElementPackage.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+
+public class DataElementPackage extends StreamObject {
+
+ public List<DataElement> dataElements = new ArrayList<>();
+ public byte reserved;
+
+ /**
+ * Initializes a new instance of the DataElementHash class.
+ */
+ public DataElementPackage() {
+ super(StreamObjectTypeHeaderStart.DataElementPackage);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ if (lengthOfItems != 1) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "DataElementPackage",
+ "Stream object over-parse error", null);
+ }
+
+ reserved = byteArray[currentIndex.getAndIncrement()];
+
+ this.dataElements = new ArrayList<>();
+ AtomicReference<DataElement> dataElement = new AtomicReference<>();
+ while (StreamObject.tryGetCurrent(byteArray, currentIndex, dataElement,
+ DataElement.class)) {
+ this.dataElements.add(dataElement.get());
+ }
+ }
+
+ /**
+ * Used to convert the element into a byte List
+ *
+ * @param byteList A Byte list
+ * @return The number of elements actually contained in the list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws TikaException, IOException {
+ // Add the reserved byte
+ byteList.add((byte) 0);
+ for (DataElement dataElement : dataElements) {
+ byteList.addAll(dataElement.serializeToByteList());
+ }
+ return 1;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataHashObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataHashObject.java
new file mode 100644
index 0000000..a318250
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataHashObject.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BinaryItem;
+
+public class DataHashObject extends StreamObject {
+ /**
+ * Gets or sets a binary item as specified in [MS-FSSHTTPB] section 2.2.1.3 that specifies a
+ * value that is unique to the file data represented by this root node object.
+ * The value of this item depends on the file chunking algorithm used, as specified in section 2.4.
+ */
+ public BinaryItem data;
+
+ /**
+ * Initializes a new instance of the DataHashObject class.
+ */
+ public DataHashObject() {
+ super(StreamObjectTypeHeaderStart.DataHashObject);
+ this.data = new BinaryItem();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ DataHashObject that = (DataHashObject) o;
+ return Objects.equals(data, that.data);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(data);
+ }
+
+ @Override
+ public String toString() {
+ return "DataHashObject{" + "Data=" + data + ", streamObjectHeaderEnd=" +
+ streamObjectHeaderEnd + '}';
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+
+ this.data = BasicObject.parse(byteArray, index, BinaryItem.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "Signature",
+ "Stream Object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The number of elements
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int length = byteList.size();
+ byteList.addAll(this.data.serializeToByteList());
+ return byteList.size() - length;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataSizeObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataSizeObject.java
new file mode 100644
index 0000000..9cad9f8
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/DataSizeObject.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.LittleEndianBitConverter;
+
+/**
+ * Data Size Object
+ */
+public class DataSizeObject extends StreamObject {
+ public long dataSize;
+
+ /**
+ * Initializes a new instance of the DataSizeObject class.
+ */
+ public DataSizeObject() {
+ super(StreamObjectTypeHeaderStart.DataSizeObject);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems) throws IOException {
+ if (lengthOfItems != 8) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "DataSize",
+ "Stream Object over-parse error", null);
+ }
+
+ this.dataSize = LittleEndianBitConverter.toUInt64(byteArray, currentIndex.get());
+ currentIndex.addAndGet(8);
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return A constant value 8
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) {
+ ByteUtil.appendByteArrayToListOfByte(byteList,
+ LittleEndianBitConverter.getBytes(this.dataSize));
+ return 8;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/EncryptionObject.java
similarity index 75%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/EncryptionObject.java
index 1239231..cc52e3c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/Error.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/EncryptionObject.java
@@ -14,16 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-public enum Error {
- OK,
- SEGV,
- RESERVED_NONZERO,
- UNKNOWN_ENUM,
- INVALID_CONSTANT,
- STRING_TOO_SHORT,
- HEX_OUT_OF_RANGE,
- COMPACT_ID_MISSING,
- UNKNOWN_GUID,
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+
+/**
+ * The class is used to represent the encryption revision store object.
+ */
+class EncryptionObject {
+ public ObjectGroupObjectDeclare objectDeclaration;
+ public byte[] objectData;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/FileDataObject.java
similarity index 69%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/FileDataObject.java
index b79ef8a..be305f8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/FileDataObject.java
@@ -14,17 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-class FileNodePtrBackPush {
- FileNodePtr parent;
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
- public FileNodePtrBackPush(FileNodePtr parent) {
- this.parent = parent;
- this.parent.nodeListPositions.add(0);
- }
-
- public void dec() {
- parent.nodeListPositions.remove(parent.nodeListPositions.size() - 1);
- }
+/**
+ * This class is used to represent the file data.
+ */
+class FileDataObject {
+ public ObjectGroupObjectBLOBDataDeclaration objectDataBLOBDeclaration;
+ public ObjectGroupObjectDataBLOBReference objectDataBLOBReference;
+ public DataElement objectDataBLOBDataElement;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/IntermediateNodeObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/IntermediateNodeObject.java
new file mode 100644
index 0000000..b693905
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/IntermediateNodeObject.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.chunking.ChunkingFactory;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.SequenceNumberGenerator;
+
+public class IntermediateNodeObject extends NodeObject {
+ /**
+ * Initializes a new instance of the IntermediateNodeObject class.
+ */
+ public IntermediateNodeObject() {
+ super(StreamObjectTypeHeaderStart.IntermediateNodeObject);
+ this.intermediateNodeObjectList = new ArrayList<>();
+ }
+
+ /**
+ * Get all the content which is represented by the root node object.
+ *
+ * @return Return the byte list of root node object content.
+ */
+ @Override
+ public List<Byte> getContent() throws TikaException {
+ List<Byte> content = new ArrayList<>();
+
+ for (LeafNodeObject intermediateNode : this.intermediateNodeObjectList) {
+ content.addAll(intermediateNode.getContent());
+ }
+
+ return content;
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ if (lengthOfItems != 0) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "IntermediateNodeObject",
+ "Stream Object over-parse error", null);
+ }
+
+ this.signature = StreamObject.getCurrent(byteArray, index, SignatureObject.class);
+ this.dataSize = StreamObject.getCurrent(byteArray, index, DataSizeObject.class);
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The Byte list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws TikaException, IOException {
+ byteList.addAll(this.signature.serializeToByteList());
+ byteList.addAll(this.dataSize.serializeToByteList());
+ return 0;
+ }
+
+ /**
+ * The class is used to build a root node object.
+ */
+ public static class RootNodeObjectBuilder {
+ /**
+ * This method is used to build a root node object from a byte array
+ *
+ * @param fileContent Specify the byte array.
+ * @return Return a root node object build from the byte array.
+ */
+ public IntermediateNodeObject Build(byte[] fileContent) throws TikaException, IOException {
+ IntermediateNodeObject rootNode = new IntermediateNodeObject();
+ rootNode.signature = new SignatureObject();
+ rootNode.dataSize = new DataSizeObject();
+ rootNode.dataSize.dataSize = fileContent.length;
+ rootNode.exGuid =
+ new ExGuid(SequenceNumberGenerator.GetCurrentSerialNumber(), UUID.randomUUID());
+ rootNode.intermediateNodeObjectList =
+ ChunkingFactory.createChunkingInstance(fileContent).chunking();
+ return rootNode;
+ }
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/JCIDObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/JCIDObject.java
new file mode 100644
index 0000000..cac0f9b
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/JCIDObject.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.JCID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+
+/**
+ * This class is used to represent the JCID object.
+ */
+public class JCIDObject {
+ public ObjectGroupObjectDeclare objectDeclaration;
+ public JCID jcid;
+
+ /**
+ * Construct the JCIDObject instance.
+ *
+ * @param objectDeclaration The Object Declaration structure.
+ * @param objectData The Object Data structure.
+ */
+ public JCIDObject(ObjectGroupObjectDeclare objectDeclaration,
+ ObjectGroupObjectData objectData) throws IOException {
+ this.objectDeclaration = objectDeclaration;
+ this.jcid = new JCID();
+ this.jcid.doDeserializeFromByteArray(ByteUtil.toByteArray(objectData.data.content), 0);
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/LeafNodeObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/LeafNodeObject.java
new file mode 100644
index 0000000..c9dd7d2
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/LeafNodeObject.java
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BinaryItem;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.DataNodeObjectData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.SequenceNumberGenerator;
+
+public class LeafNodeObject extends NodeObject {
+ public DataNodeObjectData dataNodeObjectData;
+ public DataHashObject dataHash;
+
+ /**
+ * Initializes a new instance of the LeafNodeObjectData class.
+ */
+ public LeafNodeObject() {
+ super(StreamObjectTypeHeaderStart.LeafNodeObject);
+ }
+
+ /**
+ * Get all the content which is represented by the intermediate node object.
+ *
+ * @return Return the byte list of intermediate node object content.
+ */
+ @Override
+ public List<Byte> getContent() throws TikaException {
+ List<Byte> content = new ArrayList<Byte>();
+
+ if (this.dataNodeObjectData != null) {
+ ByteUtil.appendByteArrayToListOfByte(content, this.dataNodeObjectData.objectData);
+ } else if (this.intermediateNodeObjectList != null) {
+ for (LeafNodeObject intermediateNode : this.intermediateNodeObjectList) {
+ content.addAll(intermediateNode.getContent());
+ }
+ } else {
+ throw new TikaException(
+ "The DataNodeObjectData and IntermediateNodeObjectList properties in " +
+ "LeafNodeObjectData cannot be null at the same time.");
+ }
+
+ return content;
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ if (lengthOfItems != 0) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "LeafNodeObjectData",
+ "Stream Object over-parse error", null);
+ }
+
+ this.signature = StreamObject.getCurrent(byteArray, index, SignatureObject.class);
+ this.dataSize = StreamObject.getCurrent(byteArray, index, DataSizeObject.class);
+
+ // Try to read StreamObjectHeaderStart to see there is data hash object or not
+ AtomicReference<StreamObjectHeaderStart> streamObjectHeader = new AtomicReference<>();
+ if ((StreamObjectHeaderStart.tryParse(byteArray, index.get(), streamObjectHeader)) != 0) {
+ if (streamObjectHeader.get().type == StreamObjectTypeHeaderStart.DataHashObject) {
+ this.dataHash = StreamObject.getCurrent(byteArray, index, DataHashObject.class);
+ }
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return A constant value
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws TikaException, IOException {
+ byteList.addAll(this.signature.serializeToByteList());
+ byteList.addAll(this.dataSize.serializeToByteList());
+ return 0;
+ }
+
+ /**
+ * The class is used to build a intermediate node object.
+ */
+ public static class IntermediateNodeObjectBuilder {
+ /**
+ * This method is used to build intermediate node object from an list of object group data element
+ *
+ * @param objectGroupList Specify the list of object group data elements.
+ * @param dataObj Specify the object group object.
+ * @param intermediateGuid Specify the intermediate extended GUID.
+ * @return Return the intermediate node object.
+ */
+ public LeafNodeObject Build(List<ObjectGroupDataElementData> objectGroupList,
+ ObjectGroupObjectData dataObj,
+ ExGuid intermediateGuid) throws TikaException, IOException {
+ AtomicReference<LeafNodeObject> node = new AtomicReference<>();
+ AtomicReference<IntermediateNodeObject> rootNode = new AtomicReference<>();
+
+ AtomicInteger index = new AtomicInteger(0);
+ if (StreamObject.tryGetCurrent(ByteUtil.toByteArray(dataObj.data.content), index, node,
+ LeafNodeObject.class)) {
+ if (dataObj.objectExGUIDArray == null) {
+ throw new TikaException(
+ "Failed to build intermediate node because the object extend GUID array does not exist.");
+ }
+
+ node.get().exGuid = intermediateGuid;
+
+ // Contain a single Data Node Object.
+ if (dataObj.objectExGUIDArray.count.getDecodedValue() == 1) {
+ AtomicReference<ObjectGroupObjectDeclare> dataNodeDeclare =
+ new AtomicReference<>();
+ ObjectGroupObjectData dataNodeData = this.FindByExGuid(objectGroupList,
+ dataObj.objectExGUIDArray.content.get(0), dataNodeDeclare);
+ BinaryItem data = dataNodeData.data;
+
+ node.get().dataNodeObjectData =
+ new DataNodeObjectData(ByteUtil.toByteArray(data.content), 0,
+ (int) data.length.getDecodedValue());
+ node.get().dataNodeObjectData.exGuid = dataObj.objectExGUIDArray.content.get(0);
+ node.get().intermediateNodeObjectList = null;
+ } else {
+ // Contain a list of LeafNodeObjectData
+ node.get().intermediateNodeObjectList = new ArrayList<LeafNodeObject>();
+ node.get().dataNodeObjectData = null;
+ for (ExGuid extGuid : dataObj.objectExGUIDArray.content) {
+ AtomicReference<ObjectGroupObjectDeclare> intermediateDeclare =
+ new AtomicReference<>();
+ ObjectGroupObjectData intermediateData =
+ this.FindByExGuid(objectGroupList, extGuid, intermediateDeclare);
+ node.get().intermediateNodeObjectList.add(
+ new IntermediateNodeObjectBuilder().Build(objectGroupList,
+ intermediateData, extGuid));
+ }
+ }
+ } else if (StreamObject.tryGetCurrent(ByteUtil.toByteArray(dataObj.data.content), index,
+ rootNode, IntermediateNodeObject.class)) {
+ // In Sub chunking for larger than 1MB zip file, MOSS2010 could return IntermediateNodeObject.
+ // For easy further process, the rootNode will be replaced by intermediate node instead.
+ node.set(new LeafNodeObject());
+ node.get().intermediateNodeObjectList = new ArrayList<LeafNodeObject>();
+ node.get().dataSize = rootNode.get().dataSize;
+ node.get().exGuid = rootNode.get().exGuid;
+ node.get().signature = rootNode.get().signature;
+ node.get().dataNodeObjectData = null;
+ for (ExGuid extGuid : dataObj.objectExGUIDArray.content) {
+ AtomicReference<ObjectGroupObjectDeclare> intermediateDeclare =
+ new AtomicReference<>();
+ ObjectGroupObjectData intermediateData =
+ this.FindByExGuid(objectGroupList, extGuid, intermediateDeclare);
+ node.get().intermediateNodeObjectList.add(
+ new IntermediateNodeObjectBuilder().Build(objectGroupList,
+ intermediateData, extGuid));
+ }
+ } else {
+ throw new TikaException(
+ "In the ObjectGroupDataElement cannot only contain the " +
+ "IntermediateNodeObject or IntermediateNodeObject.");
+ }
+
+ return node.get();
+ }
+
+ /**
+ * This method is used to build intermediate node object from a byte array with a signature
+ *
+ * @param array Specify the byte array.
+ * @param signature Specify the signature.
+ * @return Return the intermediate node object.
+ */
+ public LeafNodeObject Build(byte[] array, SignatureObject signature) {
+ LeafNodeObject nodeObject = new LeafNodeObject();
+ nodeObject.dataSize = new DataSizeObject();
+ nodeObject.dataSize.dataSize = array.length;
+
+ nodeObject.signature = signature;
+ nodeObject.exGuid =
+ new ExGuid(SequenceNumberGenerator.GetCurrentSerialNumber(), UUID.randomUUID());
+
+ nodeObject.dataNodeObjectData = new DataNodeObjectData(array, 0, array.length);
+ nodeObject.intermediateNodeObjectList = null;
+
+ // Now in the current implementation, one intermediate node only contain one single data object node.
+ return nodeObject;
+ }
+
+ /**
+ * This method is used to find the object group data element using the specified extended GUID
+ *
+ * @param objectGroupList Specify the object group data element list.
+ * @param extendedGuid Specify the extended GUID.
+ * @param declare Specify the output of ObjectGroupObjectDeclare.
+ * @return Return the object group data element if found.
+ */
+
+ private ObjectGroupObjectData FindByExGuid(List<ObjectGroupDataElementData> objectGroupList,
+ ExGuid extendedGuid,
+ AtomicReference<ObjectGroupObjectDeclare> declare)
+ throws TikaException {
+ for (ObjectGroupDataElementData objectGroup : objectGroupList) {
+
+ int findIndex = -1;
+ for (int i = 0;
+ i < objectGroup.objectGroupDeclarations.objectDeclarationList.size(); ++i) {
+ ObjectGroupObjectDeclare objDeclare =
+ objectGroup.objectGroupDeclarations.objectDeclarationList.get(i);
+ if (objDeclare.objectExtendedGUID.equals(extendedGuid)) {
+ findIndex = i;
+ break;
+ }
+ }
+
+ if (findIndex < 0) {
+ continue;
+ }
+
+ declare.set(
+ objectGroup.objectGroupDeclarations.objectDeclarationList.get(findIndex));
+ return objectGroup.objectGroupData.objectGroupObjectDataList.get(findIndex);
+ }
+
+ throw new TikaException("Cannot find the " + extendedGuid.guid.toString());
+ }
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/NodeObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/NodeObject.java
new file mode 100644
index 0000000..e26cfea
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/NodeObject.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+
+public abstract class NodeObject extends StreamObject {
+ public org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid exGuid;
+ public List<LeafNodeObject> intermediateNodeObjectList;
+ public SignatureObject signature;
+ public DataSizeObject dataSize;
+
+ /**
+ * Initializes a new instance of the NodeObject class.
+ *
+ * @param headerType Specify the node object header type.
+ */
+ protected NodeObject(StreamObjectTypeHeaderStart headerType) {
+ super(headerType);
+ }
+
+ /**
+ * Get all the content which is represented by the node object.
+ *
+ * @return Return the byte list of node object content.
+ */
+ public abstract List<Byte> getContent() throws TikaException;
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupData.java
new file mode 100644
index 0000000..47518e8
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupData.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * The ObjectGroupData class.
+ */
+public class ObjectGroupData extends StreamObject {
+ public List<ObjectGroupObjectData> objectGroupObjectDataList;
+ public List<ObjectGroupObjectDataBLOBReference> objectGroupObjectDataBLOBReferenceList;
+
+ /**
+ * Initializes a new instance of the ObjectGroupData class.
+ */
+ public ObjectGroupData() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupData);
+ this.objectGroupObjectDataList = new ArrayList<ObjectGroupObjectData>();
+ this.objectGroupObjectDataBLOBReferenceList =
+ new ArrayList<ObjectGroupObjectDataBLOBReference>();
+ }
+
+ /**
+ * Used to convert the element into a byte List
+ *
+ * @param byteList A Byte list
+ * @return A constant value 0
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws TikaException, IOException {
+ if (this.objectGroupObjectDataList != null) {
+ for (ObjectGroupObjectData objectGroupObjectData : this.objectGroupObjectDataList) {
+ byteList.addAll(objectGroupObjectData.serializeToByteList());
+ }
+ }
+
+ if (this.objectGroupObjectDataBLOBReferenceList != null) {
+ for (ObjectGroupObjectDataBLOBReference objectGroupObjectDataBLOBReference :
+ this.objectGroupObjectDataBLOBReferenceList) {
+ byteList.addAll(objectGroupObjectDataBLOBReference.serializeToByteList());
+ }
+ }
+
+ return 0;
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ if (lengthOfItems != 0) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "ObjectGroupDeclarations",
+ "Stream object over-parse error", null);
+ }
+
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ int headerLength = 0;
+ AtomicReference<StreamObjectHeaderStart> header = new AtomicReference<>();
+
+ this.objectGroupObjectDataList = new ArrayList<>();
+ this.objectGroupObjectDataBLOBReferenceList = new ArrayList<>();
+
+ while ((headerLength = StreamObjectHeaderStart.tryParse(byteArray, index.get(), header)) !=
+ 0) {
+ StreamObjectTypeHeaderStart type = header.get().type;
+ if (type == StreamObjectTypeHeaderStart.ObjectGroupObjectData) {
+ index.addAndGet(headerLength);
+ this.objectGroupObjectDataList.add(
+ (ObjectGroupObjectData) StreamObject.parseStreamObject(header.get(),
+ byteArray, index));
+ } else if (type == StreamObjectTypeHeaderStart.ObjectGroupObjectDataBLOBReference) {
+ index.addAndGet(headerLength);
+ this.objectGroupObjectDataBLOBReferenceList.add(
+ (ObjectGroupObjectDataBLOBReference) StreamObject.parseStreamObject(
+ header.get(), byteArray, index));
+ } else {
+ throw new StreamObjectParseErrorException(index.get(), "ObjectGroupDeclarations",
+ "Failed to parse ObjectGroupData, expect the inner object type either " +
+ "ObjectGroupObjectData or ObjectGroupObjectDataBLOBReference, " +
+ "but actual type value is " + type, null);
+ }
+ }
+
+ currentIndex.set(index.get());
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupDataElementData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupDataElementData.java
new file mode 100644
index 0000000..892ba63
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupDataElementData.java
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BinaryItem;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellIDArray;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.Compact64bitInt;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.DataElementType;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.DataNodeObjectData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGUIDArray;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+public class ObjectGroupDataElementData extends DataElementData {
+ public DataElementHash
+ dataElementHash;
+ public ObjectGroupDeclarations objectGroupDeclarations;
+ public ObjectGroupMetadataDeclarations objectMetadataDeclaration;
+ public ObjectGroupData objectGroupData;
+
+ /**
+ * Initializes a new instance of the ObjectGroupDataElementData class.
+ */
+ public ObjectGroupDataElementData() {
+ this.objectGroupDeclarations = new ObjectGroupDeclarations();
+
+ // The ObjectMetadataDeclaration is only present for MOSS2013, so leave null for default value.
+ this.objectMetadataDeclaration = null;
+
+ // The DataElementHash is only present for MOSS2013, so leave null for default value.
+ this.dataElementHash = null;
+ this.objectGroupData = new ObjectGroupData();
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @return A Byte list
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws TikaException, IOException {
+ List<Byte> result = new ArrayList<>();
+
+ if (this.dataElementHash != null) {
+ result.addAll(this.dataElementHash.serializeToByteList());
+ }
+
+ result.addAll(this.objectGroupDeclarations.serializeToByteList());
+ if (this.objectMetadataDeclaration != null) {
+ result.addAll(this.objectMetadataDeclaration.serializeToByteList());
+ }
+
+ result.addAll(this.objectGroupData.serializeToByteList());
+ return result;
+ }
+
+ /**
+ * Used to return the length of this element.
+ *
+ * @param byteArray A Byte array
+ * @param startIndex Start position
+ * @return The length of the element
+ */
+ @Override
+ public int deserializeDataElementDataFromByteArray(byte[] byteArray, int startIndex)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(startIndex);
+
+ AtomicReference<DataElementHash> dataElementHash = new AtomicReference<>();
+ if (StreamObject.tryGetCurrent(byteArray, index, dataElementHash, DataElementHash.class)) {
+ this.dataElementHash = dataElementHash.get();
+ }
+
+ this.objectGroupDeclarations =
+ StreamObject.getCurrent(byteArray, index, ObjectGroupDeclarations.class);
+
+ AtomicReference<ObjectGroupMetadataDeclarations> objectMetadataDeclaration =
+ new AtomicReference<>(new ObjectGroupMetadataDeclarations());
+ if (StreamObject.tryGetCurrent(byteArray, index, objectMetadataDeclaration,
+ ObjectGroupMetadataDeclarations.class)) {
+ this.objectMetadataDeclaration = objectMetadataDeclaration.get();
+ }
+
+ this.objectGroupData = StreamObject.getCurrent(byteArray, index, ObjectGroupData.class);
+
+ return index.get() - startIndex;
+ }
+
+ /**
+ * The internal class for build a list of DataElement from a node object.
+ */
+ public static class Builder {
+ /**
+ * This method is used to build a list of DataElement from a node object
+ *
+ * @param node Specify the node object.
+ * @return Return the list of data elements build from the specified node object.
+ */
+ public List<DataElement> build(NodeObject node) throws TikaException, IOException {
+ List<DataElement> dataElements = new ArrayList<>();
+ this.traverseNodeObject(node, dataElements);
+ return dataElements;
+ }
+
+ /**
+ * This method is used to travel the node tree and build the ObjectGroupDataElementData
+ * and the extra data element list
+ *
+ * @param node Specify the object node.
+ * @param dataElements Specify the list of data elements.
+ */
+ private void traverseNodeObject(NodeObject node, List<DataElement> dataElements)
+ throws TikaException, IOException {
+ if (node instanceof IntermediateNodeObject) {
+ IntermediateNodeObject intermediateNodeObject = (IntermediateNodeObject) node;
+ ObjectGroupDataElementData data = new ObjectGroupDataElementData();
+ data.objectGroupDeclarations.objectDeclarationList.add(
+ this.createObjectDeclare(node));
+ data.objectGroupData.objectGroupObjectDataList.add(
+ this.createObjectData((IntermediateNodeObject) node));
+
+ dataElements.add(new DataElement(DataElementType.ObjectGroupDataElementData, data));
+
+ for (LeafNodeObject child : intermediateNodeObject.intermediateNodeObjectList) {
+ this.traverseNodeObject(child, dataElements);
+ }
+ } else if (node instanceof LeafNodeObject) {
+ LeafNodeObject intermediateNode = (LeafNodeObject) node;
+
+ ObjectGroupDataElementData data = new ObjectGroupDataElementData();
+ data.objectGroupDeclarations.objectDeclarationList.add(
+ this.createObjectDeclare(node));
+ data.objectGroupData.objectGroupObjectDataList.add(
+ this.createObjectData(intermediateNode));
+
+ if (intermediateNode.dataNodeObjectData != null) {
+ data.objectGroupDeclarations.objectDeclarationList.add(
+ this.createObjectDeclare(intermediateNode.dataNodeObjectData));
+ data.objectGroupData.objectGroupObjectDataList.add(
+ this.createObjectData(intermediateNode.dataNodeObjectData));
+ dataElements.add(
+ new DataElement(DataElementType.ObjectGroupDataElementData, data));
+ return;
+ }
+
+ if (intermediateNode.dataNodeObjectData == null &&
+ intermediateNode.intermediateNodeObjectList != null) {
+ dataElements.add(
+ new DataElement(DataElementType.ObjectGroupDataElementData, data));
+
+ for (LeafNodeObject child : intermediateNode.intermediateNodeObjectList) {
+ this.traverseNodeObject(child, dataElements);
+ }
+
+ return;
+ }
+
+ throw new TikaException(
+ "The DataNodeObjectData and IntermediateNodeObjectList properties in " +
+ "LeafNodeObjectData type cannot be null in the same time.");
+ }
+ }
+
+ /**
+ * This method is used to create ObjectGroupObjectDeclare instance from a node object
+ *
+ * @param node Specify the node object.
+ * @return Return the ObjectGroupObjectDeclare instance.
+ */
+ private ObjectGroupObjectDeclare createObjectDeclare(NodeObject node) throws TikaException {
+ ObjectGroupObjectDeclare objectGroupObjectDeclare = new ObjectGroupObjectDeclare();
+
+ objectGroupObjectDeclare.objectExtendedGUID = node.exGuid;
+ objectGroupObjectDeclare.objectPartitionID = new Compact64bitInt(1);
+ objectGroupObjectDeclare.cellReferencesCount = new Compact64bitInt(0);
+ objectGroupObjectDeclare.objectReferencesCount = new Compact64bitInt(0);
+ objectGroupObjectDeclare.objectDataSize = new Compact64bitInt(node.getContent().size());
+
+ return objectGroupObjectDeclare;
+ }
+
+ /**
+ * This method is used to create ObjectGroupObjectDeclare instance from a data node object
+ *
+ * @param node Specify the node object.
+ * @return Return the ObjectGroupObjectDeclare instance.
+ */
+ private ObjectGroupObjectDeclare createObjectDeclare(DataNodeObjectData node) {
+ ObjectGroupObjectDeclare objectGroupObjectDeclare = new ObjectGroupObjectDeclare();
+
+ objectGroupObjectDeclare.objectExtendedGUID = node.exGuid;
+ objectGroupObjectDeclare.objectPartitionID = new Compact64bitInt(1);
+ objectGroupObjectDeclare.cellReferencesCount = new Compact64bitInt(0);
+ objectGroupObjectDeclare.objectReferencesCount = new Compact64bitInt(1);
+ objectGroupObjectDeclare.objectDataSize = new Compact64bitInt(node.objectData.length);
+
+ return objectGroupObjectDeclare;
+ }
+
+ /**
+ * This method is used to create ObjectGroupObjectData instance from a root node object
+ *
+ * @param node Specify the node object.
+ * @return Return the ObjectGroupObjectData instance.
+ */
+ private ObjectGroupObjectData createObjectData(IntermediateNodeObject node)
+ throws TikaException, IOException {
+ ObjectGroupObjectData objectData = new ObjectGroupObjectData();
+
+ objectData.cellIDArray = new CellIDArray(0, null);
+
+ List<ExGuid> extendedGuidList = new ArrayList<ExGuid>();
+ for (LeafNodeObject child : node.intermediateNodeObjectList) {
+ extendedGuidList.add(child.exGuid);
+ }
+
+ objectData.objectExGUIDArray = new ExGUIDArray(extendedGuidList);
+ objectData.data = new BinaryItem(node.serializeToByteList());
+
+ return objectData;
+ }
+
+ /**
+ * This method is used to create ObjectGroupObjectData instance from a intermediate node object
+ *
+ * @param node Specify the node object.
+ * @return Return the ObjectGroupObjectData instance.
+ */
+ private ObjectGroupObjectData createObjectData(LeafNodeObject node)
+ throws TikaException, IOException {
+ ObjectGroupObjectData objectData = new ObjectGroupObjectData();
+
+ objectData.cellIDArray = new CellIDArray(0, null);
+ List<ExGuid> extendedGuidList = new ArrayList<ExGuid>();
+
+ if (node.dataNodeObjectData != null) {
+ extendedGuidList.add(node.dataNodeObjectData.exGuid);
+ } else if (node.intermediateNodeObjectList != null) {
+ for (LeafNodeObject child : node.intermediateNodeObjectList) {
+ extendedGuidList.add(child.exGuid);
+ }
+ }
+
+ objectData.objectExGUIDArray = new ExGUIDArray(extendedGuidList);
+ objectData.data = new BinaryItem(node.serializeToByteList());
+
+ return objectData;
+ }
+
+ /**
+ * This method is used to create ObjectGroupObjectData instance from a data node object
+ *
+ * @param node Specify the node object.
+ * @return Return the ObjectGroupObjectData instance.
+ */
+ private ObjectGroupObjectData createObjectData(DataNodeObjectData node) {
+ ObjectGroupObjectData objectData = new ObjectGroupObjectData();
+ objectData.cellIDArray = new CellIDArray(0, null);
+ objectData.objectExGUIDArray = new ExGUIDArray(new ArrayList<>());
+ objectData.data = new BinaryItem(ByteUtil.toListOfByte(node.objectData));
+ return objectData;
+ }
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupDeclarations.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupDeclarations.java
new file mode 100644
index 0000000..b4a521b
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupDeclarations.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Object Group Declarations
+ */
+public class ObjectGroupDeclarations extends StreamObject {
+ public List<ObjectGroupObjectDeclare> objectDeclarationList;
+ public List<ObjectGroupObjectBLOBDataDeclaration> objectGroupObjectBLOBDataDeclarationList;
+
+ /**
+ * Initializes a new instance of the ObjectGroupDeclarations class.
+ */
+ public ObjectGroupDeclarations() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupDeclarations);
+ this.objectDeclarationList = new ArrayList<>();
+ this.objectGroupObjectBLOBDataDeclarationList = new ArrayList<>();
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ if (lengthOfItems != 0) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "ObjectGroupDeclarations",
+ "Stream object over-parse error", null);
+ }
+
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ int headerLength = 0;
+ AtomicReference<StreamObjectHeaderStart> header = new AtomicReference<>();
+ this.objectDeclarationList = new ArrayList<>();
+ this.objectGroupObjectBLOBDataDeclarationList = new ArrayList<>();
+ while ((headerLength = StreamObjectHeaderStart.tryParse(byteArray, index.get(), header)) !=
+ 0) {
+ if (header.get().type == StreamObjectTypeHeaderStart.ObjectGroupObjectDeclare) {
+ index.addAndGet(headerLength);
+ this.objectDeclarationList.add(
+ (ObjectGroupObjectDeclare) StreamObject.parseStreamObject(header.get(),
+ byteArray, index));
+ } else if (header.get().type ==
+ StreamObjectTypeHeaderStart.ObjectGroupObjectBLOBDataDeclaration) {
+ index.addAndGet(headerLength);
+ this.objectGroupObjectBLOBDataDeclarationList.add(
+ (ObjectGroupObjectBLOBDataDeclaration) StreamObject.parseStreamObject(
+ header.get(), byteArray, index));
+ } else {
+ throw new StreamObjectParseErrorException(index.get(), "ObjectGroupDeclarations",
+ "Failed to parse ObjectGroupDeclarations, expect the inner object type either " +
+ "ObjectGroupObjectDeclare or ObjectGroupObjectBLOBDataDeclaration, " +
+ "but actual type value is " + header.get().type, null);
+ }
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List
+ *
+ * @param byteList The Byte list
+ * @return A constant value 0
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws TikaException, IOException {
+ if (this.objectDeclarationList != null) {
+ for (ObjectGroupObjectDeclare objectGroupObjectDeclare : this.objectDeclarationList) {
+ byteList.addAll(objectGroupObjectDeclare.serializeToByteList());
+ }
+ }
+
+ if (this.objectGroupObjectBLOBDataDeclarationList != null) {
+ for (ObjectGroupObjectBLOBDataDeclaration objectGroupObjectBLOBDataDeclaration :
+ this.objectGroupObjectBLOBDataDeclarationList) {
+ byteList.addAll(objectGroupObjectBLOBDataDeclaration.serializeToByteList());
+ }
+ }
+
+ return 0;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupMetadata.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupMetadata.java
new file mode 100644
index 0000000..61981b2
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupMetadata.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.Compact64bitInt;
+
+/**
+ * Specifies an object group metadata
+ */
+public class ObjectGroupMetadata extends StreamObject {
+ /**
+ * Gets or sets a compact unsigned 64-bit integer that specifies the expected change frequency of the object.
+ * This value MUST be:
+ * 0, if the change frequency is not known.
+ * 1, if the object is known to change frequently.
+ * 2, if the object is known to change infrequently.
+ * 3, if the object is known to change independently of any other objects.
+ */
+ public Compact64bitInt ObjectChangeFrequency;
+
+ /**
+ * Initializes a new instance of the ObjectGroupMetadata class.
+ */
+ public ObjectGroupMetadata() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupMetadata);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.ObjectChangeFrequency = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "ObjectGroupMetadata",
+ "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List
+ *
+ * @param byteList A Byte list
+ * @return The number of elements actually contained in the list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ List<Byte> tmpList = this.ObjectChangeFrequency.serializeToByteList();
+ byteList.addAll(tmpList);
+ return tmpList.size();
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupMetadataDeclarations.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupMetadataDeclarations.java
new file mode 100644
index 0000000..b268779
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupMetadataDeclarations.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Object Metadata Declaration
+ */
+public class ObjectGroupMetadataDeclarations extends StreamObject {
+ public List<ObjectGroupMetadata> objectGroupMetadataList;
+
+ /**
+ * Initializes a new instance of the ObjectGroupMetadataDeclarations class.
+ */
+ public ObjectGroupMetadataDeclarations() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupMetadataDeclarations);
+ this.objectGroupMetadataList = new ArrayList<>();
+ }
+
+ /**
+ * Used to convert the element into a byte List
+ *
+ * @param byteList A Byte list
+ * @return A constant value 0
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws TikaException, IOException {
+ if (this.objectGroupMetadataList != null) {
+ for (ObjectGroupMetadata objectGroupMetadata : this.objectGroupMetadataList) {
+ byteList.addAll(objectGroupMetadata.serializeToByteList());
+ }
+ }
+
+ return 0;
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ if (lengthOfItems != 0) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "ObjectGroupMetadataDeclarations", "Stream object over-parse error", null);
+ }
+
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ int headerLength;
+ AtomicReference<StreamObjectHeaderStart> header = new AtomicReference<>();
+ this.objectGroupMetadataList = new ArrayList<>();
+
+ while ((headerLength = StreamObjectHeaderStart.tryParse(byteArray, index.get(), header)) !=
+ 0) {
+ index.addAndGet(headerLength);
+ if (header.get().type == StreamObjectTypeHeaderStart.ObjectGroupMetadata) {
+ this.objectGroupMetadataList.add(
+ (ObjectGroupMetadata) StreamObject.parseStreamObject(header.get(),
+ byteArray, index));
+ } else {
+ throw new StreamObjectParseErrorException(index.get(), "ObjectGroupDeclarations",
+ "Failed to parse ObjectGroupMetadataDeclarations, expect the inner object type " +
+ "ObjectGroupMetadata, but actual type value is " +
+ header.get().type, null);
+ }
+ }
+
+ currentIndex.set(index.get());
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectBLOBDataDeclaration.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectBLOBDataDeclaration.java
new file mode 100644
index 0000000..a86d256
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectBLOBDataDeclaration.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.Compact64bitInt;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+/**
+ * object data BLOB declaration
+ */
+public class ObjectGroupObjectBLOBDataDeclaration extends StreamObject {
+ public ExGuid objectExGUID;
+ public ExGuid objectDataBLOBExGUID;
+ public Compact64bitInt objectPartitionID;
+ /**
+ * Gets or sets a compact unsigned 64-bit integer that specifies the size in bytes of the
+ * object.opaque binary data for the declared object.
+ * This MUST match the size of the binary item in the corresponding object data BLOB
+ * referenced by the Object Data BLOB reference for this object.
+ */
+ public Compact64bitInt objectDataSize;
+ public Compact64bitInt objectReferencesCount;
+ public Compact64bitInt cellReferencesCount;
+
+ /**
+ * Initializes a new instance of the ObjectGroupObjectBLOBDataDeclaration class.
+ */
+ public ObjectGroupObjectBLOBDataDeclaration() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupObjectBLOBDataDeclaration);
+ this.objectExGUID = new ExGuid();
+ this.objectDataBLOBExGUID = new ExGuid();
+ this.objectPartitionID = new Compact64bitInt();
+ this.objectDataSize = new Compact64bitInt();
+ this.objectReferencesCount = new Compact64bitInt();
+ this.cellReferencesCount = new Compact64bitInt();
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+
+ this.objectExGUID = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.objectDataBLOBExGUID = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.objectPartitionID = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+ this.objectReferencesCount = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+ this.cellReferencesCount = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "ObjectGroupObjectBLOBDataDeclaration", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The number of the element
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.objectExGUID.serializeToByteList());
+ byteList.addAll(this.objectDataBLOBExGUID.serializeToByteList());
+ byteList.addAll(this.objectPartitionID.serializeToByteList());
+ byteList.addAll(this.objectDataSize.serializeToByteList());
+ byteList.addAll(this.objectReferencesCount.serializeToByteList());
+ byteList.addAll(this.cellReferencesCount.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectData.java
new file mode 100644
index 0000000..6d2f3f7
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectData.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BinaryItem;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellIDArray;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGUIDArray;
+
+public class ObjectGroupObjectData extends StreamObject {
+ public ExGUIDArray objectExGUIDArray;
+ public CellIDArray cellIDArray;
+ public BinaryItem data;
+
+ /**
+ * Initializes a new instance of the ObjectGroupObjectData class.
+ */
+ public ObjectGroupObjectData() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupObjectData);
+ this.objectExGUIDArray = new ExGUIDArray();
+ this.cellIDArray = new CellIDArray();
+ this.data = new BinaryItem();
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.objectExGUIDArray = BasicObject.parse(byteArray, index, ExGUIDArray.class);
+ this.cellIDArray = BasicObject.parse(byteArray, index, CellIDArray.class);
+ this.data = BasicObject.parse(byteArray, index, BinaryItem.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "ObjectGroupObjectData",
+ "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List
+ *
+ * @param byteList A Byte list
+ * @return The number of the element
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.objectExGUIDArray.serializeToByteList());
+ byteList.addAll(this.cellIDArray.serializeToByteList());
+ byteList.addAll(this.data.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectDataBLOBReference.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectDataBLOBReference.java
new file mode 100644
index 0000000..45e577a
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectDataBLOBReference.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellIDArray;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGUIDArray;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+/**
+ * object data BLOB reference
+ */
+public class ObjectGroupObjectDataBLOBReference extends StreamObject {
+ public ExGUIDArray objectExtendedGUIDArray;
+ public CellIDArray cellIDArray;
+ public ExGuid blobExtendedGUID;
+
+ /**
+ * Initializes a new instance of the ObjectGroupObjectDataBLOBReference class.
+ */
+ public ObjectGroupObjectDataBLOBReference() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupObjectDataBLOBReference);
+ this.objectExtendedGUIDArray = new ExGUIDArray();
+ this.cellIDArray = new CellIDArray();
+ this.blobExtendedGUID = new ExGuid();
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.objectExtendedGUIDArray = BasicObject.parse(byteArray, index, ExGUIDArray.class);
+ this.cellIDArray = BasicObject.parse(byteArray, index, CellIDArray.class);
+ this.blobExtendedGUID = BasicObject.parse(byteArray, index, ExGuid.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "ObjectGroupObjectDataBLOBReference", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The number of the elements
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.objectExtendedGUIDArray.serializeToByteList());
+ byteList.addAll(cellIDArray.serializeToByteList());
+ byteList.addAll(this.blobExtendedGUID.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectDeclare.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectDeclare.java
new file mode 100644
index 0000000..3eae87b
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/ObjectGroupObjectDeclare.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+/**
+ * object declaration
+ */
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.Compact64bitInt;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+public class ObjectGroupObjectDeclare extends StreamObject {
+ public ExGuid objectExtendedGUID;
+ public Compact64bitInt objectPartitionID;
+ /**
+ * Gets or sets a compact unsigned 64-bit integer that specifies the size in bytes of the object.binary data opaque
+ * to this protocol for the declared object.
+ * This MUST match the size of the binary item in the corresponding object data for this object.
+ */
+ public Compact64bitInt objectDataSize;
+ public Compact64bitInt objectReferencesCount;
+ public Compact64bitInt cellReferencesCount;
+
+ /**
+ * Initializes a new instance of the ObjectGroupObjectDeclare class.
+ */
+ public ObjectGroupObjectDeclare() {
+ super(StreamObjectTypeHeaderStart.ObjectGroupObjectDeclare);
+ this.objectExtendedGUID = new ExGuid();
+ this.objectPartitionID = new Compact64bitInt();
+ this.objectDataSize = new Compact64bitInt();
+ this.objectReferencesCount = new Compact64bitInt();
+ this.cellReferencesCount = new Compact64bitInt();
+
+ this.objectPartitionID.setDecodedValue(1);
+ this.objectReferencesCount.setDecodedValue(1);
+ this.cellReferencesCount.setDecodedValue(0);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+
+ this.objectExtendedGUID = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.objectPartitionID = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+ this.objectDataSize = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+ this.objectReferencesCount = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+ this.cellReferencesCount = BasicObject.parse(byteArray, index, Compact64bitInt.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "ObjectGroupObjectDeclare", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The number of the element
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.objectExtendedGUID.serializeToByteList());
+ byteList.addAll(this.objectPartitionID.serializeToByteList());
+ byteList.addAll(this.objectDataSize.serializeToByteList());
+ byteList.addAll(this.objectReferencesCount.serializeToByteList());
+ byteList.addAll(this.cellReferencesCount.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/PropertySet.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/PropertySet.java
new file mode 100644
index 0000000..7236354
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/PropertySet.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.ArrayNumber;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.EightBytesOfData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.FourBytesOfData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.IProperty;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.NoData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.OneByteOfData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.PrtArrayOfPropertyValues;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.PrtFourBytesOfLengthFollowedByData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.property.TwoBytesOfData;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.PropertyID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.PropertyType;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitConverter;
+
+/**
+ * This class is used to represent a PropertySet.
+ */
+public class PropertySet implements IProperty {
+ public int cProperties;
+
+ public PropertyID[] rgPrids;
+ public List<IProperty> rgData;
+
+ /**
+ * This method is used to convert the element of PropertySet into a byte List.
+ *
+ * @return Return the byte list which store the byte information of PropertySet.
+ */
+ public List<Byte> serializeToByteList() throws IOException {
+ List<Byte> byteList = new ArrayList<>();
+ for (byte b : BitConverter.getBytes(this.cProperties)) {
+ byteList.add(b);
+ }
+
+ for (PropertyID propertyId : this.rgPrids) {
+ byteList.addAll(propertyId.serializeToByteList());
+ }
+
+ for (IProperty property : this.rgData) {
+ byteList.addAll(property.serializeToByteList());
+ }
+
+ return byteList;
+ }
+
+ /**
+ * This method is used to deserialize the PropertySet from the specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the PropertySet.
+ */
+ public int doDeserializeFromByteArray(byte[] byteArray, int startIndex) throws IOException {
+ int index = startIndex;
+
+ this.cProperties = BitConverter.toInt16(byteArray, startIndex);
+ index += 2;
+ this.rgPrids = new PropertyID[this.cProperties];
+ for (int i = 0; i < this.cProperties; i++) {
+ PropertyID propertyID = new PropertyID();
+ propertyID.doDeserializeFromByteArray(byteArray, index);
+ this.rgPrids[i] = propertyID;
+ index += 4;
+ }
+ this.rgData = new ArrayList<>();
+ for (PropertyID propertyID : this.rgPrids) {
+ IProperty property = null;
+ switch (PropertyType.fromIntVal(propertyID.type)) {
+ case NoData:
+ case Bool:
+ case ObjectID:
+ case ContextID:
+ case ObjectSpaceID:
+ property = new NoData();
+ break;
+ case ArrayOfObjectIDs:
+ case ArrayOfObjectSpaceIDs:
+ case ArrayOfContextIDs:
+ property = new ArrayNumber();
+ break;
+ case OneByteOfData:
+ property = new OneByteOfData();
+ break;
+ case TwoBytesOfData:
+ property = new TwoBytesOfData();
+ break;
+ case FourBytesOfData:
+ property = new FourBytesOfData();
+ break;
+ case EightBytesOfData:
+ property = new EightBytesOfData();
+ break;
+ case FourBytesOfLengthFollowedByData:
+ property = new PrtFourBytesOfLengthFollowedByData();
+ break;
+ case ArrayOfPropertyValues:
+ property = new PrtArrayOfPropertyValues();
+ break;
+ case PropertySet:
+ property = new PropertySet();
+ break;
+ default:
+ break;
+ }
+ if (property != null) {
+ int len = property.doDeserializeFromByteArray(byteArray, index);
+ this.rgData.add(property);
+ index += len;
+ }
+ }
+
+ return index - startIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/PropertySetObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/PropertySetObject.java
new file mode 100644
index 0000000..46dda50
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/PropertySetObject.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.space.ObjectSpaceObjectPropSet;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+
+/**
+ * This class is used to represent the property set.
+ */
+public class PropertySetObject {
+ public ObjectGroupObjectDeclare objectDeclaration;
+ public ObjectSpaceObjectPropSet objectSpaceObjectPropSet;
+
+ /**
+ * Construct the PropertySetObject instance.
+ *
+ * @param objectDeclaration The Object Declaration structure.
+ * @param objectData The Object Data structure.
+ */
+ public PropertySetObject(ObjectGroupObjectDeclare objectDeclaration,
+ ObjectGroupObjectData objectData) throws IOException {
+ this.objectDeclaration = objectDeclaration;
+ this.objectSpaceObjectPropSet = new ObjectSpaceObjectPropSet();
+ this.objectSpaceObjectPropSet.doDeserializeFromByteArray(
+ ByteUtil.toByteArray(objectData.data.content), 0);
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifest.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifest.java
new file mode 100644
index 0000000..001a75a
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+public class RevisionManifest extends StreamObject {
+ public ExGuid revisionID;
+ public ExGuid baseRevisionID;
+
+ /**
+ * Initializes a new instance of the RevisionManifest class.
+ */
+ public RevisionManifest() {
+ super(StreamObjectTypeHeaderStart.RevisionManifest);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.revisionID = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.baseRevisionID = BasicObject.parse(byteArray, index, ExGuid.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "RevisionManifest",
+ "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The length of list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.revisionID.serializeToByteList());
+ byteList.addAll(this.baseRevisionID.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestDataElementData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestDataElementData.java
new file mode 100644
index 0000000..74695d1
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestDataElementData.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.exception.DataElementParseErrorException;
+
+public class RevisionManifestDataElementData extends DataElementData {
+ public RevisionManifest revisionManifest;
+ public List<RevisionManifestRootDeclare> revisionManifestRootDeclareList;
+ public List<RevisionManifestObjectGroupReferences> revisionManifestObjectGroupReferences;
+
+ /**
+ * Initializes a new instance of the RevisionManifestDataElementData class.
+ */
+ public RevisionManifestDataElementData() {
+ this.revisionManifest = new RevisionManifest();
+ this.revisionManifestRootDeclareList = new ArrayList<>();
+ this.revisionManifestObjectGroupReferences = new ArrayList<>();
+ }
+
+ /**
+ * Used to return the length of this element.
+ *
+ * @param byteArray A Byte list
+ * @param startIndex Start position
+ * @return The length of the element
+ */
+ @Override
+ public int deserializeDataElementDataFromByteArray(byte[] byteArray, int startIndex)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(startIndex);
+ this.revisionManifest = StreamObject.getCurrent(byteArray, index, RevisionManifest.class);
+
+ this.revisionManifestRootDeclareList = new ArrayList<>();
+ this.revisionManifestObjectGroupReferences = new ArrayList<>();
+ AtomicReference<StreamObjectHeaderStart> header = new AtomicReference<>();
+ int headerLength = 0;
+ while ((headerLength = StreamObjectHeaderStart.tryParse(byteArray, index.get(), header)) !=
+ 0) {
+ if (header.get().type == StreamObjectTypeHeaderStart.RevisionManifestRootDeclare) {
+ index.addAndGet(headerLength);
+ this.revisionManifestRootDeclareList.add(
+ (RevisionManifestRootDeclare) StreamObject.parseStreamObject(header.get(),
+ byteArray, index));
+ } else if (header.get().type ==
+ StreamObjectTypeHeaderStart.RevisionManifestObjectGroupReferences) {
+ index.addAndGet(headerLength);
+ this.revisionManifestObjectGroupReferences.add(
+ (RevisionManifestObjectGroupReferences) StreamObject.parseStreamObject(
+ header.get(), byteArray, index));
+ } else {
+ throw new DataElementParseErrorException(index.get(),
+ "Failed to parse RevisionManifestDataElement, expect the inner object type " +
+ "RevisionManifestRootDeclare or RevisionManifestObjectGroupReferences, " +
+ "but actual type value is " + header.get().type, null);
+ }
+ }
+
+ return index.get() - startIndex;
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @return A Byte list
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws TikaException, IOException {
+ List<Byte> byteList = new ArrayList<>();
+ byteList.addAll(this.revisionManifest.serializeToByteList());
+
+ if (this.revisionManifestRootDeclareList != null) {
+ for (RevisionManifestRootDeclare revisionManifestRootDeclare : this.revisionManifestRootDeclareList) {
+ byteList.addAll(revisionManifestRootDeclare.serializeToByteList());
+ }
+ }
+
+ if (this.revisionManifestObjectGroupReferences != null) {
+ for (RevisionManifestObjectGroupReferences revisionManifestObjectGroupReferences :
+ this.revisionManifestObjectGroupReferences) {
+ byteList.addAll(revisionManifestObjectGroupReferences.serializeToByteList());
+ }
+ }
+
+ return byteList;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestObjectGroupReferences.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestObjectGroupReferences.java
new file mode 100644
index 0000000..ceb92bc
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestObjectGroupReferences.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+/**
+ * Specifies a revision manifest object group references, each followed by object group extended GUIDs
+ */
+public class RevisionManifestObjectGroupReferences extends StreamObject {
+ public ExGuid objectGroupExtendedGUID;
+
+ /**
+ * Initializes a new instance of the RevisionManifestObjectGroupReferences class.
+ */
+ public RevisionManifestObjectGroupReferences() {
+ super(StreamObjectTypeHeaderStart.RevisionManifestObjectGroupReferences);
+ }
+
+ /**
+ * Initializes a new instance of the RevisionManifestObjectGroupReferences class.
+ *
+ * @param objectGroupExtendedGUID Extended GUID
+ */
+ public RevisionManifestObjectGroupReferences(ExGuid objectGroupExtendedGUID) {
+ super(StreamObjectTypeHeaderStart.RevisionManifestObjectGroupReferences);
+ this.objectGroupExtendedGUID = objectGroupExtendedGUID;
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.objectGroupExtendedGUID = BasicObject.parse(byteArray, index, ExGuid.class);
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "RevisionManifestObjectGroupReferences", "Stream object over-parse error",
+ null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The number of elements actually contained in the list.
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ List<Byte> tmpList = this.objectGroupExtendedGUID.serializeToByteList();
+ byteList.addAll(tmpList);
+ return tmpList.size();
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestRootDeclare.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestRootDeclare.java
new file mode 100644
index 0000000..6861940
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionManifestRootDeclare.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+/**
+ * Specifies a revision manifest root declare, each followed by root and object extended GUIDs
+ */
+public class RevisionManifestRootDeclare extends StreamObject {
+ public ExGuid rootExGuid;
+ public ExGuid objectExGuid;
+
+ /**
+ * Initializes a new instance of the RevisionManifestRootDeclare class.
+ */
+ public RevisionManifestRootDeclare() {
+ super(StreamObjectTypeHeaderStart.RevisionManifestRootDeclare);
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte list
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.rootExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.objectExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "RevisionManifestRootDeclare", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The length of list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.rootExGuid.serializeToByteList());
+ byteList.addAll(this.objectExGuid.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionStoreObject.java
similarity index 51%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionStoreObject.java
index aa01c18..5885262 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodeList.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionStoreObject.java
@@ -14,30 +14,32 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-import java.util.ArrayList;
-import java.util.List;
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
-class FileNodeList {
- FileNodeListHeader fileNodeListHeader;
- List<FileNode> children = new ArrayList<>();
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellIDArray;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGUIDArray;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
- public FileNodeListHeader getFileNodeListHeader() {
- return fileNodeListHeader;
- }
- public FileNodeList setFileNodeListHeader(FileNodeListHeader fileNodeListHeader) {
- this.fileNodeListHeader = fileNodeListHeader;
- return this;
- }
+/**
+ * The class is used to represent the revision store object.
+ */
+public class RevisionStoreObject {
- public List<FileNode> getChildren() {
- return children;
- }
+ public ExGuid objectID;
+ public ExGuid objectGroupID;
+ public JCIDObject jcid;
+ public PropertySetObject propertySet;
+ public FileDataObject
+ fileDataObject;
+ public ExGUIDArray referencedObjectID;
+ public CellIDArray referencedObjectSpacesID;
+
+ /**
+ * Initialize the class.
+ */
+ public RevisionStoreObject() {
- public FileNodeList setChildren(List<FileNode> children) {
- this.children = children;
- return this;
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionStoreObjectGroup.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionStoreObjectGroup.java
new file mode 100644
index 0000000..48b7daa
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/RevisionStoreObjectGroup.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+public class RevisionStoreObjectGroup {
+ public ExGuid objectGroupID;
+ public List<RevisionStoreObject> objects;
+ public List<EncryptionObject> encryptionObjects;
+
+ public RevisionStoreObjectGroup(ExGuid objectGroupId) {
+ this.objects = new ArrayList<>();
+ this.encryptionObjects = new ArrayList<>();
+ this.objectGroupID = objectGroupId;
+ }
+
+ public static RevisionStoreObjectGroup createInstance(ExGuid objectGroupId,
+ ObjectGroupDataElementData dataObject,
+ boolean isEncryption) throws IOException {
+ RevisionStoreObjectGroup objectGroup = new RevisionStoreObjectGroup(objectGroupId);
+ Map<ExGuid, RevisionStoreObject> objectDict = new HashMap<>();
+ if (!isEncryption) {
+ RevisionStoreObject revisionObject = null;
+ for (int i = 0; i < dataObject.objectGroupDeclarations.objectDeclarationList.size();
+ i++) {
+ ObjectGroupObjectDeclare objectDeclaration =
+ dataObject.objectGroupDeclarations.objectDeclarationList.get(i);
+ ObjectGroupObjectData objectData =
+ dataObject.objectGroupData.objectGroupObjectDataList.get(i);
+
+ if (!objectDict.containsKey(objectDeclaration.objectExtendedGUID)) {
+ revisionObject = new RevisionStoreObject();
+ revisionObject.objectGroupID = objectGroupId;
+ revisionObject.objectID = objectDeclaration.objectExtendedGUID;
+ objectDict.put(objectDeclaration.objectExtendedGUID, revisionObject);
+ } else {
+ revisionObject = objectDict.get(objectDeclaration.objectExtendedGUID);
+ }
+ if (objectDeclaration.objectPartitionID.getDecodedValue() == 4) {
+ revisionObject.jcid = new JCIDObject(objectDeclaration, objectData);
+ } else if (objectDeclaration.objectPartitionID.getDecodedValue() == 1) {
+ revisionObject.propertySet =
+ new PropertySetObject(objectDeclaration, objectData);
+ if (revisionObject.jcid.jcid.isFileData != 0) {
+ revisionObject.referencedObjectID = objectData.objectExGUIDArray;
+ revisionObject.referencedObjectSpacesID = objectData.cellIDArray;
+ }
+ }
+ }
+
+ for (int i = 0; i <
+ dataObject.objectGroupDeclarations.objectGroupObjectBLOBDataDeclarationList.size();
+ i++) {
+ ObjectGroupObjectBLOBDataDeclaration objectGroupObjectBLOBDataDeclaration =
+ dataObject.objectGroupDeclarations.objectGroupObjectBLOBDataDeclarationList.get(
+ i);
+ ObjectGroupObjectDataBLOBReference objectGroupObjectDataBLOBReference =
+ dataObject.objectGroupData.objectGroupObjectDataBLOBReferenceList.get(i);
+ if (!objectDict.containsKey(objectGroupObjectBLOBDataDeclaration.objectExGUID)) {
+ revisionObject = new RevisionStoreObject();
+ objectDict.put(objectGroupObjectBLOBDataDeclaration.objectExGUID,
+ revisionObject);
+ } else {
+ revisionObject =
+ objectDict.get(objectGroupObjectBLOBDataDeclaration.objectExGUID);
+ }
+ if (objectGroupObjectBLOBDataDeclaration.objectPartitionID.getDecodedValue() == 2) {
+ revisionObject.fileDataObject = new FileDataObject();
+ revisionObject.fileDataObject.objectDataBLOBDeclaration =
+ objectGroupObjectBLOBDataDeclaration;
+ revisionObject.fileDataObject.objectDataBLOBReference =
+ objectGroupObjectDataBLOBReference;
+ }
+ }
+ objectGroup.objects.addAll(objectDict.values());
+ } else {
+ for (int i = 0; i < dataObject.objectGroupDeclarations.objectDeclarationList.size();
+ i++) {
+ ObjectGroupObjectDeclare objectDeclaration =
+ dataObject.objectGroupDeclarations.objectDeclarationList.get(i);
+ ObjectGroupObjectData objectData =
+ dataObject.objectGroupData.objectGroupObjectDataList.get(i);
+
+ if (objectDeclaration.objectPartitionID.getDecodedValue() == 1) {
+ EncryptionObject encrypObject = new EncryptionObject();
+ encrypObject.objectDeclaration = objectDeclaration;
+ encrypObject.objectData = ByteUtil.toByteArray(objectData.data.content);
+ objectGroup.encryptionObjects.add(encrypObject);
+ }
+ }
+ }
+
+ return objectGroup;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/SignatureObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/SignatureObject.java
new file mode 100644
index 0000000..535ba30
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/SignatureObject.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BinaryItem;
+
+/**
+ * Signature Object
+ */
+public class SignatureObject extends StreamObject {
+ /**
+ * Gets or sets a binary item as specified in [MS-FSSHTTPB] section 2.2.1.3 that specifies a
+ * value that is unique to the file data represented by this root node object.
+ * The value of this item depends on the file chunking algorithm used, as specified in section 2.4.
+ */
+ public BinaryItem signatureData;
+
+ /**
+ * Initializes a new instance of the SignatureObject class.
+ */
+ public SignatureObject() {
+ super(StreamObjectTypeHeaderStart.SignatureObject);
+ this.signatureData = new BinaryItem();
+ }
+
+ /**
+ * Used to de-serialize the element.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+
+ this.signatureData = BasicObject.parse(byteArray, index, BinaryItem.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "Signature",
+ "Stream Object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The number of elements
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int length = byteList.size();
+ byteList.addAll(this.signatureData.serializeToByteList());
+ return byteList.size() - length;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexCellMapping.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexCellMapping.java
new file mode 100644
index 0000000..101e4f7
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexCellMapping.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.SerialNumber;
+
+/**
+ * Specifies the storage index cell mappings (with cell identifier, cell mapping extended GUID,
+ * and cell mapping serial number)
+ */
+public class StorageIndexCellMapping extends StreamObject {
+ public CellID cellID;
+ public ExGuid cellMappingExGuid;
+ public SerialNumber cellMappingSerialNumber;
+
+ /**
+ * Initializes a new instance of the StorageIndexCellMapping class.
+ */
+ public StorageIndexCellMapping() {
+ super(StreamObjectTypeHeaderStart.StorageIndexCellMapping);
+ }
+
+ /**
+ * Used to de-serialize the items.
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.cellID = BasicObject.parse(byteArray, index, CellID.class);
+ this.cellMappingExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.cellMappingSerialNumber = BasicObject.parse(byteArray, index, SerialNumber.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(), "StorageIndexCellMapping",
+ "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The length of list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.cellID.serializeToByteList());
+ byteList.addAll(this.cellMappingExGuid.serializeToByteList());
+ byteList.addAll(this.cellMappingSerialNumber.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexDataElementData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexDataElementData.java
new file mode 100644
index 0000000..2e84ab9
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexDataElementData.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.exception.DataElementParseErrorException;
+
+public class StorageIndexDataElementData extends DataElementData {
+ public StorageIndexManifestMapping storageIndexManifestMapping;
+ public List<StorageIndexCellMapping> storageIndexCellMappingList;
+ public List<StorageIndexRevisionMapping> storageIndexRevisionMappingList;
+
+ /**
+ * Initializes a new instance of the StorageIndexDataElementData class.
+ */
+ public StorageIndexDataElementData() {
+ this.storageIndexManifestMapping = new StorageIndexManifestMapping();
+ this.storageIndexCellMappingList = new ArrayList<>();
+ this.storageIndexRevisionMappingList = new ArrayList<>();
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @return A Byte list
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws TikaException, IOException {
+ List<Byte> byteList = new ArrayList<>();
+
+ if (this.storageIndexManifestMapping != null) {
+ byteList.addAll(this.storageIndexManifestMapping.serializeToByteList());
+ }
+
+ if (this.storageIndexCellMappingList != null) {
+ for (StorageIndexCellMapping cellMapping : this.storageIndexCellMappingList) {
+ byteList.addAll(cellMapping.serializeToByteList());
+ }
+ }
+
+ // Storage Index Revision Mapping
+ if (this.storageIndexRevisionMappingList != null) {
+ for (StorageIndexRevisionMapping revisionMapping : this.storageIndexRevisionMappingList) {
+ byteList.addAll(revisionMapping.serializeToByteList());
+ }
+ }
+
+ return byteList;
+ }
+
+ /**
+ * Used to de-serialize the data element.
+ *
+ * @param byteArray Byte array
+ * @param startIndex Start position
+ * @return The length of the element
+ */
+ @Override
+ public int deserializeDataElementDataFromByteArray(byte[] byteArray, int startIndex)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(startIndex);
+ int headerLength = 0;
+ AtomicReference<StreamObjectHeaderStart> header = new AtomicReference<>();
+ boolean isStorageIndexManifestMappingExist = false;
+ while ((headerLength = StreamObjectHeaderStart.tryParse(byteArray, index.get(), header)) !=
+ 0) {
+ index.addAndGet(headerLength);
+ if (header.get().type == StreamObjectTypeHeaderStart.StorageIndexManifestMapping) {
+ if (isStorageIndexManifestMappingExist) {
+ throw new DataElementParseErrorException(index.get() - headerLength,
+ "Failed to parse StorageIndexDataElement, only can contain zero or one " +
+ "StorageIndexManifestMapping", null);
+ }
+
+ this.storageIndexManifestMapping =
+ (StorageIndexManifestMapping) StreamObject.parseStreamObject(header.get(),
+ byteArray, index);
+ isStorageIndexManifestMappingExist = true;
+ } else if (header.get().type == StreamObjectTypeHeaderStart.StorageIndexCellMapping) {
+ this.storageIndexCellMappingList.add(
+ (StorageIndexCellMapping) StreamObject.parseStreamObject(header.get(),
+ byteArray, index));
+ } else if (header.get().type ==
+ StreamObjectTypeHeaderStart.StorageIndexRevisionMapping) {
+ this.storageIndexRevisionMappingList.add(
+ (StorageIndexRevisionMapping) StreamObject.parseStreamObject(header.get(),
+ byteArray, index));
+ } else {
+ throw new DataElementParseErrorException(index.get() - headerLength,
+ "Failed to parse StorageIndexDataElement, expect the inner object type " +
+ "StorageIndexCellMapping or StorageIndexRevisionMapping, but actual type value is " +
+ header.get().type, null);
+ }
+ }
+
+ return index.get() - startIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexManifestMapping.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexManifestMapping.java
new file mode 100644
index 0000000..50e7b0c
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexManifestMapping.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.SerialNumber;
+
+public class StorageIndexManifestMapping extends StreamObject {
+ public ExGuid manifestMappingExGuid;
+ public SerialNumber manifestMappingSerialNumber;
+
+ /**
+ * Initializes a new instance of the StorageIndexManifestMapping class.
+ */
+ public StorageIndexManifestMapping() {
+ super(StreamObjectTypeHeaderStart.StorageIndexManifestMapping);
+ }
+
+ /**
+ * Used to Deserialize the items.
+ *
+ * @param byteArray Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.manifestMappingExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.manifestMappingSerialNumber = BasicObject.parse(byteArray, index, SerialNumber.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "StorageIndexManifestMapping", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The length of list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.manifestMappingExGuid.serializeToByteList());
+ byteList.addAll(this.manifestMappingSerialNumber.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexRevisionMapping.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexRevisionMapping.java
new file mode 100644
index 0000000..2805750
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageIndexRevisionMapping.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.SerialNumber;
+
+/**
+ * Specifies the storage index revision mappings (with revision and revision mapping
+ * extended GUIDs, and revision mapping serial number)
+ */
+public class StorageIndexRevisionMapping extends StreamObject {
+ public ExGuid revisionExGuid;
+ public ExGuid revisionMappingExGuid;
+ public SerialNumber revisionMappingSerialNumber;
+
+ /**
+ * Initializes a new instance of the StorageIndexRevisionMapping class.
+ */
+ public StorageIndexRevisionMapping() {
+ super(StreamObjectTypeHeaderStart.StorageIndexRevisionMapping);
+ }
+
+ /**
+ * Used to de-serialize the items
+ *
+ * @param byteArray A Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.revisionExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.revisionMappingExGuid = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.revisionMappingSerialNumber = BasicObject.parse(byteArray, index, SerialNumber.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "StorageIndexRevisionMapping", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The length of list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.revisionExGuid.serializeToByteList());
+ byteList.addAll(this.revisionMappingExGuid.serializeToByteList());
+ byteList.addAll(this.revisionMappingSerialNumber.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestDataElementData.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestDataElementData.java
new file mode 100644
index 0000000..d3162e1
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestDataElementData.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.exception.DataElementParseErrorException;
+
+public class StorageManifestDataElementData extends DataElementData {
+ public StorageManifestSchemaGUID storageManifestSchemaGUID;
+ public List<StorageManifestRootDeclare> storageManifestRootDeclareList;
+
+ /**
+ * Initializes a new instance of the StorageManifestDataElementData class.
+ */
+ public StorageManifestDataElementData() {
+ // Storage Manifest
+ this.storageManifestSchemaGUID = new StorageManifestSchemaGUID();
+ this.storageManifestRootDeclareList = new ArrayList<>();
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @return A Byte list
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws TikaException, IOException {
+ List<Byte> byteList = new ArrayList<Byte>();
+ byteList.addAll(this.storageManifestSchemaGUID.serializeToByteList());
+
+ if (this.storageManifestRootDeclareList != null) {
+ for (StorageManifestRootDeclare storageManifestRootDeclare : this.storageManifestRootDeclareList) {
+ byteList.addAll(storageManifestRootDeclare.serializeToByteList());
+ }
+ }
+
+ return byteList;
+ }
+
+ /**
+ * Used to de-serialize data element.
+ *
+ * @param byteArray Byte array
+ * @param startIndex Start position
+ * @return The length of the array
+ */
+ @Override
+ public int deserializeDataElementDataFromByteArray(byte[] byteArray, int startIndex)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(startIndex);
+
+ this.storageManifestSchemaGUID =
+ StreamObject.getCurrent(byteArray, index, StorageManifestSchemaGUID.class);
+ this.storageManifestRootDeclareList = new ArrayList<>();
+
+ AtomicReference<StreamObjectHeaderStart> header = new AtomicReference<>();
+ int headerLength = 0;
+ while ((headerLength = StreamObjectHeaderStart.tryParse(byteArray, index.get(), header)) !=
+ 0) {
+ if (header.get().type == StreamObjectTypeHeaderStart.StorageManifestRootDeclare) {
+ index.addAndGet(headerLength);
+ this.storageManifestRootDeclareList.add(
+ (StorageManifestRootDeclare) StreamObject.parseStreamObject(header.get(),
+ byteArray, index));
+ } else {
+ throw new DataElementParseErrorException(index.get(),
+ "Failed to parse StorageManifestDataElement, expect the inner object type " +
+ "StorageManifestRootDeclare, but actual type value is " +
+ header.get().type, null);
+ }
+ }
+
+ return index.get() - startIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestRootDeclare.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestRootDeclare.java
new file mode 100644
index 0000000..1a5c54a
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestRootDeclare.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellID;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
+
+/**
+ * Specifies one or more storage manifest root declare.
+ */
+public class StorageManifestRootDeclare extends StreamObject {
+ public ExGuid rootExGUID;
+ public CellID cellID;
+
+ /**
+ * Initializes a new instance of the StorageManifestRootDeclare class.
+ */
+ public StorageManifestRootDeclare() {
+ super(StreamObjectTypeHeaderStart.StorageManifestRootDeclare);
+ }
+
+ /**
+ * Used to de-serialize the items.
+ *
+ * @param byteArray Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ this.rootExGUID = BasicObject.parse(byteArray, index, ExGuid.class);
+ this.cellID = BasicObject.parse(byteArray, index, CellID.class);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "StorageManifestRootDeclare", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return The length of list
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) throws IOException {
+ int itemsIndex = byteList.size();
+ byteList.addAll(this.rootExGUID.serializeToByteList());
+ byteList.addAll(this.cellID.serializeToByteList());
+ return byteList.size() - itemsIndex;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestSchemaGUID.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestSchemaGUID.java
new file mode 100644
index 0000000..70f7990
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StorageManifestSchemaGUID.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+
+/**
+ * Specifies a storage manifest schema GUID
+ */
+public class StorageManifestSchemaGUID extends StreamObject {
+ public UUID guid;
+
+ /**
+ * Initializes a new instance of the StorageManifestSchemaGUID class.
+ */
+ public StorageManifestSchemaGUID() {
+ super(StreamObjectTypeHeaderStart.StorageManifestSchemaGUID);
+ // this.GUID = DataElementExGuids.StorageManifestGUID;
+ }
+
+ /**
+ * Used to de-serialize the items.
+ *
+ * @param byteArray Byte array
+ * @param currentIndex Start position
+ * @param lengthOfItems The length of the items
+ */
+ @Override
+ protected void deserializeItemsFromByteArray(byte[] byteArray, AtomicInteger currentIndex,
+ int lengthOfItems) {
+ AtomicInteger index = new AtomicInteger(currentIndex.get());
+ byte[] temp = Arrays.copyOf(byteArray, 16);
+ this.guid = UUID.nameUUIDFromBytes(temp);
+ index.addAndGet(16);
+
+ if (index.get() - currentIndex.get() != lengthOfItems) {
+ throw new StreamObjectParseErrorException(currentIndex.get(),
+ "StorageManifestSchemaGUID", "Stream object over-parse error", null);
+ }
+
+ currentIndex.set(index.get());
+ }
+
+ /**
+ * Used to convert the element into a byte List.
+ *
+ * @param byteList A Byte list
+ * @return A constant value 16
+ */
+ @Override
+ protected int serializeItemsToByteList(List<Byte> byteList) {
+ byteList.addAll(
+ ByteUtil.toListOfByte(this.guid.toString().getBytes(StandardCharsets.UTF_8)));
+ return 16;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObject.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObject.java
new file mode 100644
index 0000000..81192b2
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObject.java
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.IFSSHTTPBSerializable;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitReader;
+
+public abstract class StreamObject implements IFSSHTTPBSerializable {
+
+ /**
+ * Hash set contains the StreamObjectTypeHeaderStart type.
+ */
+ private static final Set<StreamObjectTypeHeaderStart> compoundTypes = new HashSet<>(
+ Arrays.asList(StreamObjectTypeHeaderStart.DataElement,
+ StreamObjectTypeHeaderStart.Knowledge,
+ StreamObjectTypeHeaderStart.CellKnowledge,
+ StreamObjectTypeHeaderStart.DataElementPackage,
+ StreamObjectTypeHeaderStart.ObjectGroupDeclarations,
+ StreamObjectTypeHeaderStart.ObjectGroupData,
+ StreamObjectTypeHeaderStart.WaterlineKnowledge,
+ StreamObjectTypeHeaderStart.ContentTagKnowledge,
+ StreamObjectTypeHeaderStart.Request,
+ StreamObjectTypeHeaderStart.FsshttpbSubResponse,
+ StreamObjectTypeHeaderStart.SubRequest,
+ StreamObjectTypeHeaderStart.ReadAccessResponse,
+ StreamObjectTypeHeaderStart.SpecializedKnowledge,
+ StreamObjectTypeHeaderStart.WriteAccessResponse,
+ StreamObjectTypeHeaderStart.QueryChangesFilter,
+ StreamObjectTypeHeaderStart.ResponseError,
+ StreamObjectTypeHeaderStart.UserAgent,
+ StreamObjectTypeHeaderStart.FragmentKnowledge,
+ StreamObjectTypeHeaderStart.ObjectGroupMetadataDeclarations,
+ StreamObjectTypeHeaderStart.LeafNodeObject,
+ StreamObjectTypeHeaderStart.IntermediateNodeObject,
+ StreamObjectTypeHeaderStart.TargetPartitionId));
+
+ /**
+ * The dictionary of StreamObjectTypeHeaderStart and type.
+ */
+ private static final Map<StreamObjectTypeHeaderStart, Class> streamObjectTypeMapping;
+
+ static {
+ streamObjectTypeMapping = new HashMap<>();
+ for (StreamObjectTypeHeaderStart value : StreamObjectTypeHeaderStart.values()) {
+ String className = StreamObject.class.getPackage().getName() + "." + value.name();
+ try {
+ streamObjectTypeMapping.put(value, Class.forName(className));
+ } catch (ClassNotFoundException e) {
+ // This is OK, we are not pulling over every single class
+ }
+ }
+ }
+
+ StreamObjectHeaderEnd streamObjectHeaderEnd;
+ /**
+ * Gets the StreamObjectTypeHeaderStart.
+ */
+ private StreamObjectTypeHeaderStart streamObjectType;
+ /**
+ * Gets the length of items.
+ */
+ private int lengthOfItems;
+ private StreamObjectHeaderStart streamObjectHeaderStart;
+
+ /**
+ * Initializes a new instance of the StreamObject class.
+ *
+ * @param streamObjectType The instance of StreamObjectTypeHeaderStart.
+ */
+ protected StreamObject(StreamObjectTypeHeaderStart streamObjectType) {
+ this.streamObjectType = streamObjectType;
+ }
+
+ /**
+ * Gets the StreamObjectTypeHeaderStart
+ */
+ public static Set<StreamObjectTypeHeaderStart> getCompoundTypes() {
+ return compoundTypes;
+ }
+
+ /**
+ * Gets the StreamObjectTypeMapping
+ */
+ public static Map<StreamObjectTypeHeaderStart, Class> getStreamObjectTypeMapping() {
+ return streamObjectTypeMapping;
+ }
+
+ /**
+ * Get current stream object.
+ *
+ * @param byteArray The byte array which contains message.
+ * @param index The position where to start.
+ * @return The current object instance.
+ */
+ public static <T extends StreamObject> T getCurrent(byte[] byteArray, AtomicInteger index,
+ Class<T> clazz)
+ throws TikaException, IOException {
+ AtomicInteger tmpIndex = new AtomicInteger(index.get());
+ int length;
+ AtomicReference<StreamObjectHeaderStart> streamObjectHeader = new AtomicReference<>();
+ if ((length =
+ StreamObjectHeaderStart.tryParse(byteArray, tmpIndex.get(), streamObjectHeader)) ==
+ 0) {
+ throw new StreamObjectParseErrorException(tmpIndex.get(), clazz.getName(),
+ "Failed to extract either 16bit or 32bit stream object header in the current index.",
+ null);
+ }
+
+ tmpIndex.addAndGet(length);
+
+ StreamObject streamObject =
+ parseStreamObject(streamObjectHeader.get(), byteArray, tmpIndex);
+
+ if (!streamObject.getClass().equals(clazz)) {
+ String destClassName = "(null)";
+ if (streamObjectTypeMapping.containsKey(streamObjectHeader.get().type)) {
+ destClassName =
+ streamObjectTypeMapping.get(streamObjectHeader.get().type).getName();
+ }
+ throw new StreamObjectParseErrorException(tmpIndex.get(), clazz.getName(),
+ String.format(Locale.US,
+ "Failed to get stream object as expect type %s, actual type is %s",
+ clazz.getName(), destClassName), null);
+ }
+
+ // Store the current index to the ref parameter index.
+ index.set(tmpIndex.get());
+ return (T) streamObject;
+ }
+
+ /**
+ * Parse stream object from byte array.
+ *
+ * @param header The instance of StreamObjectHeaderStart.
+ * @param byteArray The byte array.
+ * @param index The position where to start.
+ * @return The instance of StreamObject.
+ */
+ public static StreamObject parseStreamObject(StreamObjectHeaderStart header, byte[] byteArray,
+ AtomicInteger index) throws IOException, TikaException {
+ if (streamObjectTypeMapping.containsKey(header.type)) {
+ Class headerTypeClass = streamObjectTypeMapping.get(header.type);
+ StreamObject streamObject;
+ try {
+ streamObject = (StreamObject) headerTypeClass.newInstance();
+ } catch (InstantiationException | IllegalAccessException e) {
+ throw new TikaException("Could not instantiate class " + headerTypeClass, e);
+ }
+
+ int res = streamObject.deserializeFromByteArray(header, byteArray, index.get());
+ index.addAndGet(res);
+
+ return streamObject;
+ }
+
+ int tmpIndex = index.get();
+ tmpIndex -=
+ header.headerType == StreamObjectHeaderStart.STREAM_OBJECT_HEADER_START_16_BIT ? 2 : 4;
+ throw new StreamObjectParseErrorException(tmpIndex, "Unknown", String.format(Locale.US,
+ "Failed to create the specified stream object instance, the type %s of stream object " +
+ "header in the current index is not defined", header.type.getIntVal()),
+ null);
+ }
+
+ /**
+ * Try to get current object, true will returned if success.
+ *
+ * @param byteArray The byte array.
+ * @param index The position where to start.
+ * @param streamObject The instance that want to get.
+ * @return The result of whether get success.
+ */
+
+ public static <T extends StreamObject> boolean tryGetCurrent(byte[] byteArray,
+ AtomicInteger index,
+ AtomicReference<T> streamObject,
+ Class<T> clazz)
+ throws TikaException, IOException {
+ AtomicInteger tmpIndex = new AtomicInteger(index.get());
+
+ int length = 0;
+ AtomicReference<StreamObjectHeaderStart> streamObjectHeader = new AtomicReference<>();
+ if ((length =
+ StreamObjectHeaderStart.tryParse(byteArray, tmpIndex.get(), streamObjectHeader)) ==
+ 0) {
+ return false;
+ }
+
+ tmpIndex.addAndGet(length);
+ if (streamObjectTypeMapping.containsKey(streamObjectHeader.get().type) &&
+ streamObjectTypeMapping.get(streamObjectHeader.get().type).equals(clazz)) {
+ streamObject.set((T) parseStreamObject(streamObjectHeader.get(), byteArray, tmpIndex));
+ } else {
+ return false;
+ }
+
+ index.set(tmpIndex.get());
+ return true;
+ }
+
+ /**
+ * Serialize item to byte list.
+ *
+ * @return The byte list.
+ */
+ public List<Byte> serializeToByteList() throws IOException, TikaException {
+ List<Byte> byteList = new ArrayList<>();
+
+ int lengthOfItems = this.serializeItemsToByteList(byteList);
+
+ AtomicReference<StreamObjectHeaderStart> header = new AtomicReference<>();
+ if (this.streamObjectType.getIntVal() <= 0x3F && lengthOfItems <= 127) {
+ header.set(new StreamObjectHeaderStart16bit(this.streamObjectType, lengthOfItems));
+ } else {
+ header.set(new StreamObjectHeaderStart32bit(this.streamObjectType, lengthOfItems));
+ }
+
+ byteList.addAll(0, header.get().serializeToByteList());
+
+ if (compoundTypes.contains(this.streamObjectType)) {
+ if (this.streamObjectType.getIntVal() <= 0x3F) {
+ byteList.addAll(new StreamObjectHeaderEnd8bit(
+ this.streamObjectType.getIntVal()).serializeToByteList());
+ } else {
+ byteList.addAll(new StreamObjectHeaderEnd16bit(
+ this.streamObjectType.getIntVal()).serializeToByteList());
+ }
+ }
+
+ return byteList;
+ }
+
+ /**
+ * Used to return the length of this element.
+ *
+ * @param header Then instance of StreamObjectHeaderStart.
+ * @param byteArray The byte list
+ * @param startIndex The position where to start.
+ * @return The element length
+ */
+ public int deserializeFromByteArray(StreamObjectHeaderStart header, byte[] byteArray,
+ int startIndex) throws IOException, TikaException {
+ this.streamObjectType = header.type;
+ this.lengthOfItems = header.length;
+
+ if (header instanceof StreamObjectHeaderStart32bit) {
+ if (header.length == 32767) {
+ this.lengthOfItems =
+ (int) ((StreamObjectHeaderStart32bit) header).largeLength.getDecodedValue();
+ }
+ }
+
+ AtomicInteger index = new AtomicInteger(startIndex);
+ this.streamObjectHeaderStart = header;
+ this.deserializeItemsFromByteArray(byteArray, index, this.lengthOfItems);
+
+ if (compoundTypes.contains(this.streamObjectType)) {
+ StreamObjectHeaderEnd end = null;
+ BitReader bitReader = new BitReader(byteArray, index.get());
+ int aField = bitReader.readInt32(2);
+ if (aField == 0x1) {
+ end = BasicObject.parse(byteArray, index, StreamObjectHeaderEnd8bit.class);
+ }
+ if (aField == 0x3) {
+ end = BasicObject.parse(byteArray, index, StreamObjectHeaderEnd16bit.class);
+ }
+
+ if (end.type.getIntVal() != this.streamObjectType.getIntVal()) {
+ throw new StreamObjectParseErrorException(index.get(), null,
+ "Unexpected the stream header end value " +
+ this.streamObjectType.getIntVal(), null);
+ }
+
+ this.streamObjectHeaderEnd = end;
+ }
+
+ return index.get() - startIndex;
+ }
+
+ /**
+ * Serialize items to byte list.
+ *
+ * @param byteList The byte list need to serialized.
+ * @return The length in bytes for additional data if the current stream object has, otherwise return 0.
+ */
+ protected abstract int serializeItemsToByteList(List<Byte> byteList)
+ throws IOException, TikaException;
+
+ /**
+ * De-serialize items from byte array.
+ *
+ * @param byteArray The byte array which contains response message.
+ * @param currentIndex The index special where to start.
+ * @param lengthOfItems The length of items.
+ */
+ protected abstract void deserializeItemsFromByteArray(byte[] byteArray,
+ AtomicInteger currentIndex,
+ int lengthOfItems)
+ throws TikaException, IOException;
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd.java
similarity index 65%
copy from tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
copy to tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd.java
index b79ef8a..11e5bb5 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/FileNodePtrBackPush.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd.java
@@ -14,17 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.microsoft.onenote;
-class FileNodePtrBackPush {
- FileNodePtr parent;
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
- public FileNodePtrBackPush(FileNodePtr parent) {
- this.parent = parent;
- this.parent.nodeListPositions.add(0);
- }
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
- public void dec() {
- parent.nodeListPositions.remove(parent.nodeListPositions.size() - 1);
- }
+public abstract class StreamObjectHeaderEnd extends BasicObject {
+ /**
+ * Gets or sets the type of the stream object.
+ * value 1 for 8-bit stream object header start,
+ * value 3 for 16-bit stream object header start.
+ */
+ StreamObjectTypeHeaderEnd type;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd16bit.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd16bit.java
new file mode 100644
index 0000000..4822fce
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd16bit.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitReader;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitWriter;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.LittleEndianBitConverter;
+
+/**
+ * An 16-bit header for a compound object would indicate the end of a stream object
+ */
+public class StreamObjectHeaderEnd16bit extends StreamObjectHeaderEnd {
+ /**
+ * Initializes a new instance of the StreamObjectHeaderEnd16bit class with the specified type value.
+ *
+ * @param type Specify the integer value of the type.
+ */
+ public StreamObjectHeaderEnd16bit(int type) throws TikaException {
+ this.type = StreamObjectTypeHeaderEnd.fromIntVal(type);
+ if (this.type == null) {
+ throw new TikaException(String.format(Locale.US,
+ "The type value RuntimeException is not defined for the stream object end 16-bit header",
+ type));
+ }
+
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderEnd16bit class with the specified type value.
+ *
+ * @param headerType Specify the value of the type.
+ */
+ public StreamObjectHeaderEnd16bit(StreamObjectTypeHeaderEnd headerType) {
+ this.type = headerType;
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderEnd16bit class, this is the default constructor.
+ */
+ public StreamObjectHeaderEnd16bit() {
+ }
+
+ /**
+ * This method is used to convert the element of StreamObjectHeaderEnd16bit basic object into a byte List.
+ *
+ * @return Return the byte list which store the byte information of StreamObjectHeaderEnd16bit.
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws IOException {
+ BitWriter bitFieldWriter = new BitWriter(2);
+ bitFieldWriter.appendInit32(0x3, 2);
+ bitFieldWriter.appendUInit32(this.type.getIntVal(), 14);
+ return bitFieldWriter.getByteList();
+ }
+
+ /**
+ * This method is used to get the byte value of the 16-bit stream object header End.
+ *
+ * @return Return StreamObjectHeaderEnd8bit value represented by unsigned short integer.
+ */
+ public short toUint16() throws IOException {
+ List<Byte> bytes = this.serializeToByteList();
+ return LittleEndianBitConverter.ToUInt16(ByteUtil.toByteArray(bytes), 0);
+ }
+
+ /**
+ * This method is used to deserialize the StreamObjectHeaderEnd16bit basic object from the
+ * specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the StreamObjectHeaderEnd16bit basic object.
+ */
+ @Override
+ protected int doDeserializeFromByteArray(byte[] byteArray, int startIndex)
+ throws IOException, TikaException {
+ BitReader reader = new BitReader(byteArray, startIndex);
+ int headerType = reader.readInt32(2);
+
+ if (headerType != 0x3) {
+ throw new TikaException(String.format(Locale.US,
+ "Failed to get the StreamObjectHeaderEnd16bit header type value, expect value %d, " +
+ "but actual value is %s", 0x3, headerType));
+ }
+
+ int typeValue = reader.readUInt32(14);
+ this.type = StreamObjectTypeHeaderEnd.fromIntVal(typeValue);
+ if (this.type == null) {
+ throw new TikaException(String.format(Locale.US,
+ "Failed to get the StreamObjectHeaderEnd16bit type value, the value %d is not defined",
+ typeValue));
+ }
+
+ return 2;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd8bit.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd8bit.java
new file mode 100644
index 0000000..157e0e1
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderEnd8bit.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitReader;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitWriter;
+
+/**
+ * An 8-bit header for a compound object would indicate the end of a stream object
+ */
+public class StreamObjectHeaderEnd8bit extends StreamObjectHeaderEnd {
+ /**
+ * Initializes a new instance of the StreamObjectHeaderEnd8bit class with the specified type value.
+ *
+ * @param type Specify the integer value of the type.
+ */
+ public StreamObjectHeaderEnd8bit(int type) throws TikaException {
+
+ this.type = StreamObjectTypeHeaderEnd.fromIntVal(type);
+ if (this.type == null) {
+ throw new TikaException(String.format(Locale.US,
+ "The type value %s is not defined for the stream object end 8 bit header",
+ type));
+ }
+
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderEnd8bit class, this is the default constructor.
+ */
+ public StreamObjectHeaderEnd8bit() {
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderEnd8bit class with the specified type value.
+ *
+ * @param type Specify the value of the type.
+ */
+ public StreamObjectHeaderEnd8bit(StreamObjectTypeHeaderEnd type) throws TikaException {
+ this(type.getIntVal());
+ }
+
+ /**
+ * This method is used to convert the element of StreamObjectHeaderEnd8bit basic object into a byte List.
+ *
+ * @return Return the byte list which store the byte information of StreamObjectHeaderEnd8bit.
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws IOException {
+ BitWriter bitFieldWriter = new BitWriter(1);
+ bitFieldWriter.appendInit32(0x1, 2);
+ bitFieldWriter.appendUInit32(this.type.getIntVal(), 6);
+ return bitFieldWriter.getByteList();
+ }
+
+ /**
+ * This method is used to get the byte value of the 8bit stream object header End.
+ *
+ * @return Return StreamObjectHeaderEnd8bit value represented by byte.
+ */
+ public byte toByte() throws IOException {
+ List<Byte> bytes = this.serializeToByteList();
+
+ if (bytes.size() != 1) {
+ throw new IOException("The unexpected StreamObjectHeaderEnd8bit length");
+ }
+
+ return bytes.get(0);
+ }
+
+ /**
+ * This method is used to deserialize the StreamObjectHeaderEnd8bit basic object from the
+ * specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the StreamObjectHeaderEnd8bit basic object.
+ */
+ @Override
+ protected int doDeserializeFromByteArray(byte[] byteArray, int startIndex)
+ throws IOException, TikaException {
+ BitReader reader = new BitReader(byteArray, startIndex);
+ int headerType = reader.readInt32(2);
+
+ if (headerType != 0x1) {
+ throw new TikaException(String.format(Locale.US,
+ "Failed to get the StreamObjectHeaderEnd8bit header type value, " +
+ "expect value %s, but actual value is %s", 0x1, headerType));
+ }
+
+ int typeValue = reader.readUInt32(6);
+ this.type = StreamObjectTypeHeaderEnd.fromIntVal(typeValue);
+ if (this.type == null) {
+ throw new TikaException(String.format(Locale.US,
+ "Failed to get the StreamObjectHeaderEnd8bit type value, the value %s is not defined",
+ typeValue));
+ }
+
+ return 1;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart.java
new file mode 100644
index 0000000..a16255c
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+
+/**
+ * This class specifies the base class for 16-bit or 32-bit stream object header start
+ */
+public abstract class StreamObjectHeaderStart extends BasicObject {
+ /**
+ * Specify for 16-bit stream object header start.
+ */
+ public static final int STREAM_OBJECT_HEADER_START_16_BIT = 0x0;
+
+ /**
+ * Specify for 32-bit stream object header start.
+ */
+ public static final int STREAM_OBJECT_HEADER_START_32_BIT = 0x02;
+ public StreamObjectTypeHeaderStart type;
+ /**
+ * Gets or sets the type of the stream object.
+ * value 0 for 16-bit stream object header start,
+ * value 2 for 32-bit stream object header start.
+ */
+ protected int headerType;
+ /**
+ * Gets or sets a value that specifies if set a compound parse type is needed and
+ * MUST be ended with either an 8-bit stream object header end or a 16-bit stream object header end.
+ * If the bit is zero, it specifies a single object. Otherwise it specifies a compound object.
+ */
+ protected int compound;
+ protected int length;
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart class.
+ */
+ protected StreamObjectHeaderStart() {
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart class with specified header type.
+ *
+ * @param streamObjectTypeHeaderStart Specify the value of the StreamObjectHeaderStart Type.
+ */
+ protected StreamObjectHeaderStart(StreamObjectTypeHeaderStart streamObjectTypeHeaderStart) {
+ this.type = streamObjectTypeHeaderStart;
+ }
+
+ /**
+ * This method is used to parse the actual 16bit or 32bit stream header.
+ *
+ * @param byteArray Specify the Byte array.
+ * @param startIndex Specify the start position.
+ * @param streamObjectHeader Specify the out value for the parse result.
+ * @return Return true if success, otherwise returns false.
+ */
+ public static int tryParse(byte[] byteArray, int startIndex,
+ AtomicReference<StreamObjectHeaderStart> streamObjectHeader) {
+ int headerType = byteArray[startIndex] & 0x03;
+ if (headerType == StreamObjectHeaderStart.STREAM_OBJECT_HEADER_START_16_BIT) {
+ streamObjectHeader.set(new StreamObjectHeaderStart16bit());
+ } else {
+ if (headerType == StreamObjectHeaderStart.STREAM_OBJECT_HEADER_START_32_BIT) {
+ streamObjectHeader.set(new StreamObjectHeaderStart32bit());
+ } else {
+ return 0;
+ }
+ }
+
+ try {
+ return streamObjectHeader.get().deserializeFromByteArray(byteArray, startIndex);
+ } catch (Exception e) {
+ return 0;
+ }
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart16bit.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart16bit.java
new file mode 100644
index 0000000..4000bc2
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart16bit.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitReader;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitWriter;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.LittleEndianBitConverter;
+
+/**
+ * An 16-bit header for a compound object would indicate the start of a stream object
+ */
+public class StreamObjectHeaderStart16bit extends StreamObjectHeaderStart {
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart16bit class with specified type and length.
+ *
+ * @param type Specify the type of the StreamObjectHeaderStart16bit.
+ * @param length Specify the length of the StreamObjectHeaderStart16bit.
+ */
+ public StreamObjectHeaderStart16bit(StreamObjectTypeHeaderStart type, int length)
+ throws TikaException {
+ if (this.length > 127) {
+ throw new TikaException(
+ "Field Length - 16-bit Stream Object Header Start, Length (7-bits): A 7-bit " +
+ "unsigned integer that specifies the length in bytes for additional data " +
+ "(if any). If the length is more than 127 bytes, a 32-bit stream object header " +
+ "start MUST be used.");
+ }
+
+ this.headerType = 0x0;
+ this.type = type;
+ this.compound = StreamObject.getCompoundTypes().contains(this.type) ? 1 : 0;
+ this.length = length;
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart16bit class with specified type.
+ *
+ * @param type Specify the type of the StreamObjectHeaderStart16bit.
+ */
+ public StreamObjectHeaderStart16bit(StreamObjectTypeHeaderStart type) throws TikaException {
+ this(type, 0);
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart16bit class, this is the default constructor.
+ */
+ public StreamObjectHeaderStart16bit() {
+ }
+
+ /**
+ * This method is used to convert the element of StreamObjectHeaderStart16bit basic object into a byte List.
+ *
+ * @return Return the byte list which store the byte information of StreamObjectHeaderStart16bit.
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws IOException {
+ BitWriter bitField = new BitWriter(2);
+ bitField.appendInit32(this.headerType, 2);
+ bitField.appendInit32(this.compound, 1);
+ bitField.appendUInit32(this.type.getIntVal(), 6);
+ bitField.appendInit32(this.length, 7);
+ List<Byte> result = new ArrayList<>();
+ ByteUtil.appendByteArrayToListOfByte(result, bitField.getBytes());
+ return result;
+ }
+
+ /**
+ * This method is used to get the Uint16 value of the 16bit stream object header.
+ *
+ * @return Return the ushort value.
+ */
+ public short ToUint16() throws IOException {
+ return LittleEndianBitConverter.ToUInt16(ByteUtil.toByteArray(this.serializeToByteList()),
+ 0);
+ }
+
+ /**
+ * This method is used to deserialize the StreamObjectHeaderStart16bit basic object from the
+ * specified byte array and start index.
+ *
+ * @param byteArray Specify the byte array.
+ * @param startIndex Specify the start index from the byte array.
+ * @return Return the length in byte of the StreamObjectHeaderStart16bit basic object.
+ */
+ @Override
+ protected int doDeserializeFromByteArray(byte[] byteArray, int startIndex)
+ throws IOException, TikaException {
+ BitReader bitReader = new BitReader(byteArray, startIndex);
+ this.headerType = bitReader.readInt32(2);
+ if (this.headerType != StreamObjectHeaderStart.STREAM_OBJECT_HEADER_START_16_BIT) {
+ throw new TikaException(String.format(Locale.US,
+ "Failed to get the StreamObjectHeaderStart16bit header type value, expect value %s, " +
+ "but actual value is %s", STREAM_OBJECT_HEADER_START_16_BIT,
+ this.headerType));
+ }
+
+ this.compound = bitReader.readInt32(1);
+ int typeValue = bitReader.readInt32(6);
+ this.type = StreamObjectTypeHeaderStart.fromIntVal(typeValue);
+ if (type == null) {
+ throw new TikaException(String.format(Locale.US,
+ "Failed to get the StreamObjectHeaderStart16bit type value, the value %s is not defined",
+ typeValue));
+ }
+
+ if (StreamObject.getCompoundTypes().contains(type) && this.compound != 1) {
+ throw new TikaException(String.format(Locale.US,
+ "Failed to parse the StreamObjectHeaderStart16bit header. If the type value is %s then " +
+ "the compound value should 1, but actual value is 0", typeValue));
+ }
+
+ this.length = bitReader.readInt32(7);
+ if (this.length > 127) {
+ throw new TikaException(
+ "16-bit Stream Object Header Start, Length (7-bits): A 7-bit unsigned integer that " +
+ "specifies the length in bytes for additional data (if any). If the length is more than " +
+ "127 bytes, a 32-bit stream object header start MUST be used.");
+ }
+
+ return 2;
+ }
+}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart32bit.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart32bit.java
new file mode 100644
index 0000000..07a2bce
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/StreamObjectHeaderStart32bit.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.BasicObject;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.Compact64bitInt;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitReader;
+import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.BitWriter;
+
+/**
+ * An 32-bit header for a compound object would indicate the start of a stream object
+ */
+public class StreamObjectHeaderStart32bit extends StreamObjectHeaderStart {
+ /**
+ * Gets or sets an optional compact uint64 that specifies the length in bytes for additional data (if any).
+ * This field MUST be specified if the Length field contains 32767 and MUST NOT be specified if the Length field
+ * contains any other value than 32767.
+ */
+ public Compact64bitInt largeLength;
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart32bit class with specified type and length.
+ *
+ * @param type Specify the type of the StreamObjectHeaderStart32bit.
+ * @param length Specify the length of the StreamObjectHeaderStart32bit.
+ */
+ public StreamObjectHeaderStart32bit(StreamObjectTypeHeaderStart type, int length) {
+ this.headerType = StreamObjectHeaderStart.STREAM_OBJECT_HEADER_START_32_BIT;
+ this.type = type;
+ this.compound = StreamObject.getCompoundTypes().contains(this.type) ? 1 : 0;
+
+ if (length >= 32767) {
+ this.length = 32767;
+ this.largeLength = new Compact64bitInt(length);
+ } else {
+ this.length = length;
+ this.largeLength = null;
+ }
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart32bit class, this is the default constructor.
+ */
+ public StreamObjectHeaderStart32bit() {
+ }
+
+ /**
+ * Initializes a new instance of the StreamObjectHeaderStart32bit class with specified type.
+ *
+ * @param streamObjectTypeHeaderStart Specify the type of the StreamObjectHeaderStart32bit.
+ */
+ public StreamObjectHeaderStart32bit(StreamObjectTypeHeaderStart streamObjectTypeHeaderStart) {
+ this.type = streamObjectTypeHeaderStart;
+ }
+
+ /**
+ * This method is used to convert the element of StreamObjectHeaderStart32bit basic object into a byte List.
+ *
+ * @return Return the byte list which store the byte information of StreamObjectHeaderStart32bit.
+ */
+ @Override
+ public List<Byte> serializeToByteList() throws IOException {
... 7296 lines suppressed ...