You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by nd...@apache.org on 2024/03/28 09:06:04 UTC
(tika) 01/01: TIKA-4229
This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch TIKA-4229-add-microsoft-graph-fetcher
in repository https://gitbox.apache.org/repos/asf/tika.git
commit eb4e0c12fbec2772a0ba406d2ab8ac6200b0148a
Author: Nicholas DiPiazza <nd...@apache.org>
AuthorDate: Thu Mar 28 04:04:33 2024 -0500
TIKA-4229
initial attempt to add microsoft graph fetcher
---
.../tika/pipes/fetcher/config/AbstractConfig.java | 4 +
tika-pipes/tika-fetchers/pom.xml | 1 +
.../tika-fetcher-microsoft-graph/pom.xml | 151 +++++++++++++++++++++
.../microsoftgraph/MicrosoftGraphFetcher.java | 140 +++++++++++++++++++
.../config/AadCredentialConfigBase.java | 40 ++++++
.../Client2CertificateCredentialsConfig.java | 50 +++++++
.../config/ClientCertificateCredentialsConfig.java | 40 ++++++
.../config/ClientSecretCredentialsConfig.java | 30 ++++
.../config/MsGraphFetcherConfig.java | 65 +++++++++
.../microsoftgraph/MicrosoftGraphFetcherTest.java | 100 ++++++++++++++
.../src/test/resources/log4j2.xml | 32 +++++
11 files changed, 653 insertions(+)
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java
new file mode 100644
index 000000000..536fc44b1
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java
@@ -0,0 +1,4 @@
+package org.apache.tika.pipes.fetcher.config;
+
+public abstract class AbstractConfig {
+}
diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml
index 7830a74d6..8b957e8cf 100644
--- a/tika-pipes/tika-fetchers/pom.xml
+++ b/tika-pipes/tika-fetchers/pom.xml
@@ -36,6 +36,7 @@
<module>tika-fetcher-s3</module>
<module>tika-fetcher-gcs</module>
<module>tika-fetcher-az-blob</module>
+ <module>tika-fetcher-microsoft-graph</module>
</modules>
<dependencies>
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml
new file mode 100644
index 000000000..e40c8354f
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml
@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-fetchers</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>3.0.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-fetcher-microsoft-graph</artifactId>
+ <name>Microsoft Graph Tika Pipes Fetcher</name>
+
+ <properties>
+ <maven.compiler.source>11</maven.compiler.source>
+ <maven.compiler.target>11</maven.compiler.target>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <azure-identity.version>1.11.0</azure-identity.version>
+ <microsoft-graph.version>6.4.0</microsoft-graph.version>
+ <microsoft-kiota-serialization-json.version>1.1.1</microsoft-kiota-serialization-json.version>
+ <junit-jupiter-engine.version>5.9.2</junit-jupiter-engine.version>
+ <wiremock.version>3.3.1</wiremock.version>
+ <mockito-junit-jupiter.version>5.3.1</mockito-junit-jupiter.version>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.microsoft.graph</groupId>
+ <artifactId>microsoft-graph</artifactId>
+ <version>${microsoft-graph.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.azure</groupId>
+ <artifactId>azure-identity</artifactId>
+ <version>${azure-identity.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-engine</artifactId>
+ <version>${junit-jupiter-engine.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-junit-jupiter</artifactId>
+ <version>${mockito-junit-jupiter.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <Automatic-Module-Name>org.apache.tika.pipes.fetcher.s3</Automatic-Module-Name>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven.shade.version}</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createDependencyReducedPom>
+ false
+ </createDependencyReducedPom>
+ <!-- <filters> -->
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*</exclude>
+ <exclude>LICENSE.txt</exclude>
+ <exclude>NOTICE.txt</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/LICENSE</resource>
+ <file>target/classes/META-INF/LICENSE</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/NOTICE</resource>
+ <file>target/classes/META-INF/NOTICE</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/DEPENDENCIES</resource>
+ <file>target/classes/META-INF/DEPENDENCIES</file>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+ </build>
+
+ <scm>
+ <tag>3.0.0-BETA-rc1</tag>
+ </scm>
+</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java
new file mode 100644
index 000000000..771790692
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph;
+
+import com.azure.identity.ClientCertificateCredentialBuilder;
+import com.azure.identity.ClientSecretCredentialBuilder;
+import com.microsoft.graph.serviceclient.GraphServiceClient;
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetcher.AbstractFetcher;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.ClientCertificateCredentialsConfig;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.ClientSecretCredentialsConfig;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.MsGraphFetcherConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+
+/**
+ * Fetches files from Microsoft Graph API.
+ * Fetch keys are ${siteDriveId},${driveItemId}
+ */
+public class MicrosoftGraphFetcher extends AbstractFetcher implements Initializable {
+ private static final Logger LOGGER = LoggerFactory.getLogger(MicrosoftGraphFetcher.class);
+ private GraphServiceClient graphClient;
+ private MsGraphFetcherConfig msGraphFetcherConfig;
+ private long[] throttleSeconds;
+
+ public MicrosoftGraphFetcher() {
+
+ }
+
+ public MicrosoftGraphFetcher(MsGraphFetcherConfig msGraphFetcherConfig) {
+ this.msGraphFetcherConfig = msGraphFetcherConfig;
+ }
+
+ /**
+ * Set seconds to throttle retries as a comma-delimited list, e.g.: 30,60,120,600
+ * @param commaDelimitedLongs
+ * @throws TikaConfigException
+ */
+ @Field
+ public void setThrottleSeconds(String commaDelimitedLongs) throws TikaConfigException {
+ String[] longStrings = commaDelimitedLongs.split(",");
+ long[] seconds = new long[longStrings.length];
+ for (int i = 0; i < longStrings.length; i++) {
+ try {
+ seconds[i] = Long.parseLong(longStrings[i]);
+ } catch (NumberFormatException e) {
+ throw new TikaConfigException(e.getMessage());
+ }
+ }
+ setThrottleSeconds(seconds);
+ }
+ public void setThrottleSeconds(long[] throttleSeconds) {
+ this.throttleSeconds = throttleSeconds;
+ }
+
+ @Override
+ public void initialize(Map<String, Param> map) {
+ String[] scopes = msGraphFetcherConfig.getScopes().toArray(new String[0]);
+ if (msGraphFetcherConfig.getCredentials() instanceof ClientCertificateCredentialsConfig) {
+ ClientCertificateCredentialsConfig credentials = (ClientCertificateCredentialsConfig) msGraphFetcherConfig.getCredentials();
+ graphClient = new GraphServiceClient(new ClientCertificateCredentialBuilder()
+ .clientId(credentials.getClientId())
+ .tenantId(credentials.getTenantId())
+ .pfxCertificate(new ByteArrayInputStream(credentials.getCertificateBytes()))
+ .clientCertificatePassword(credentials.getCertificatePassword())
+ .build(), scopes);
+ } else if (msGraphFetcherConfig.getCredentials() instanceof ClientSecretCredentialsConfig) {
+ ClientSecretCredentialsConfig credentials =
+ (ClientSecretCredentialsConfig) msGraphFetcherConfig.getCredentials();
+ graphClient = new GraphServiceClient(
+ new ClientSecretCredentialBuilder()
+ .tenantId(credentials.getTenantId())
+ .clientId(credentials.getClientId())
+ .clientSecret(credentials.getClientSecret()).build(), scopes);
+ }
+ }
+
+ @Override
+ public void checkInitialization(InitializableProblemHandler initializableProblemHandler)
+ throws TikaConfigException {
+ }
+
+ @Override
+ public InputStream fetch(String fetchKey, Metadata metadata) throws TikaException, IOException {
+ int tries = 0;
+ Exception ex;
+ do {
+ try {
+ long start = System.currentTimeMillis();
+ String[] fetchKeySplit = fetchKey.split(",");
+ String siteDriveId = fetchKeySplit[0];
+ String driveItemId = fetchKeySplit[1];
+ InputStream is = graphClient.drives().byDriveId(siteDriveId)
+ .items()
+ .byDriveItemId(driveItemId)
+ .content()
+ .get();
+
+ long elapsed = System.currentTimeMillis() - start;
+ LOGGER.debug("Total to fetch {}", elapsed);
+ return is;
+ } catch (Exception e) {
+ LOGGER.warn("Exception fetching on retry=" + tries, e);
+ ex = e;
+ }
+ LOGGER.warn("Sleeping for {} seconds before retry", throttleSeconds[tries]);
+ try {
+ Thread.sleep(throttleSeconds[tries]);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ } while (++tries < throttleSeconds.length);
+ throw new TikaException("Could not parse " + fetchKey, ex);
+ }
+}
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/AadCredentialConfigBase.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/AadCredentialConfigBase.java
new file mode 100644
index 000000000..e4204739c
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/AadCredentialConfigBase.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public abstract class AadCredentialConfigBase {
+ private String tenantId;
+ private String clientId;
+
+ public String getTenantId() {
+ return tenantId;
+ }
+
+ public AadCredentialConfigBase setTenantId(String tenantId) {
+ this.tenantId = tenantId;
+ return this;
+ }
+
+ public String getClientId() {
+ return clientId;
+ }
+
+ public AadCredentialConfigBase setClientId(String clientId) {
+ this.clientId = clientId;
+ return this;
+ }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/Client2CertificateCredentialsConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/Client2CertificateCredentialsConfig.java
new file mode 100644
index 000000000..d9128373e
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/Client2CertificateCredentialsConfig.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public class Client2CertificateCredentialsConfig {
+ private String tenantId;
+ private String clientId;
+ private String clientSecret;
+
+ public String getTenantId() {
+ return tenantId;
+ }
+
+ public Client2CertificateCredentialsConfig setTenantId(String tenantId) {
+ this.tenantId = tenantId;
+ return this;
+ }
+
+ public String getClientId() {
+ return clientId;
+ }
+
+ public Client2CertificateCredentialsConfig setClientId(String clientId) {
+ this.clientId = clientId;
+ return this;
+ }
+
+ public String getClientSecret() {
+ return clientSecret;
+ }
+
+ public Client2CertificateCredentialsConfig setClientSecret(String clientSecret) {
+ this.clientSecret = clientSecret;
+ return this;
+ }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientCertificateCredentialsConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientCertificateCredentialsConfig.java
new file mode 100644
index 000000000..2927519f1
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientCertificateCredentialsConfig.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public class ClientCertificateCredentialsConfig extends AadCredentialConfigBase {
+ private byte[] certificateBytes;
+ private String certificatePassword;
+
+ public byte[] getCertificateBytes() {
+ return certificateBytes;
+ }
+
+ public ClientCertificateCredentialsConfig setCertificateBytes(byte[] certificateBytes) {
+ this.certificateBytes = certificateBytes;
+ return this;
+ }
+
+ public String getCertificatePassword() {
+ return certificatePassword;
+ }
+
+ public ClientCertificateCredentialsConfig setCertificatePassword(String certificatePassword) {
+ this.certificatePassword = certificatePassword;
+ return this;
+ }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientSecretCredentialsConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientSecretCredentialsConfig.java
new file mode 100644
index 000000000..2989af941
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientSecretCredentialsConfig.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public class ClientSecretCredentialsConfig extends AadCredentialConfigBase {
+ private String clientSecret;
+
+ public String getClientSecret() {
+ return clientSecret;
+ }
+
+ public ClientSecretCredentialsConfig setClientSecret(String clientSecret) {
+ this.clientSecret = clientSecret;
+ return this;
+ }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MsGraphFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MsGraphFetcherConfig.java
new file mode 100644
index 000000000..46e365893
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MsGraphFetcherConfig.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+import org.apache.tika.pipes.fetcher.config.AbstractConfig;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class MsGraphFetcherConfig extends AbstractConfig {
+ private long[] throttleSeconds;
+ private boolean spoolToTemp;
+ private AadCredentialConfigBase credentials;
+
+ private List<String> scopes = new ArrayList<>();
+ public boolean isSpoolToTemp() {
+ return spoolToTemp;
+ }
+
+ public MsGraphFetcherConfig setSpoolToTemp(boolean spoolToTemp) {
+ this.spoolToTemp = spoolToTemp;
+ return this;
+ }
+
+ public long[] getThrottleSeconds() {
+ return throttleSeconds;
+ }
+
+ public MsGraphFetcherConfig setThrottleSeconds(long[] throttleSeconds) {
+ this.throttleSeconds = throttleSeconds;
+ return this;
+ }
+
+ public AadCredentialConfigBase getCredentials() {
+ return credentials;
+ }
+
+ public MsGraphFetcherConfig setCredentials(AadCredentialConfigBase credentials) {
+ this.credentials = credentials;
+ return this;
+ }
+
+ public List<String> getScopes() {
+ return scopes;
+ }
+
+ public MsGraphFetcherConfig setScopes(List<String> scopes) {
+ this.scopes = scopes;
+ return this;
+ }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcherTest.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcherTest.java
new file mode 100644
index 000000000..059a93265
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcherTest.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph;
+
+import com.microsoft.graph.drives.DrivesRequestBuilder;
+import com.microsoft.graph.drives.item.DriveItemRequestBuilder;
+import com.microsoft.graph.drives.item.items.ItemsRequestBuilder;
+import com.microsoft.graph.drives.item.items.item.DriveItemItemRequestBuilder;
+import com.microsoft.graph.drives.item.items.item.content.ContentRequestBuilder;
+import com.microsoft.graph.serviceclient.GraphServiceClient;
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.ClientCertificateCredentialsConfig;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.MsGraphFetcherConfig;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+import org.mockito.Spy;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+
+@ExtendWith(MockitoExtension.class)
+class MicrosoftGraphFetcherTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(MicrosoftGraphFetcherTest.class);
+ static byte[] certificateBytes = "test cert file here".getBytes(StandardCharsets.UTF_8);
+ static String certificatePassword = "somepasswordhere";
+ static String clientId = "12312312-1234-1234-1234-112312312313";
+ static String tenantId = "32132132-4332-5432-4321-121231231232";
+ static String siteDriveId = "99999999-1234-1111-1111-12312312312";
+ static String driveItemid = "asfsadfsadfsafdusahdfiuhfdsusadfjuafiagfaigf";
+
+ @Mock
+ GraphServiceClient graphClient;
+ @Spy
+ @SuppressWarnings("unused")
+ MsGraphFetcherConfig msGraphFetcherConfig = new MsGraphFetcherConfig()
+ .setCredentials(new ClientCertificateCredentialsConfig()
+ .setCertificateBytes(certificateBytes)
+ .setCertificatePassword(certificatePassword)
+ .setClientId(clientId)
+ .setTenantId(tenantId))
+ .setScopes(Collections.singletonList(".default"));
+
+ @Mock
+ DrivesRequestBuilder drivesRequestBuilder;
+
+ @Mock
+ DriveItemRequestBuilder driveItemRequestBuilder;
+
+ @Mock
+ ItemsRequestBuilder itemsRequestBuilder;
+
+ @Mock
+ DriveItemItemRequestBuilder driveItemItemRequestBuilder;
+
+ @Mock
+ ContentRequestBuilder contentRequestBuilder;
+
+ @InjectMocks
+ MicrosoftGraphFetcher microsoftGraphFetcher;
+
+ @Test
+ void fetch() throws Exception {
+ try (AutoCloseable ignored = MockitoAnnotations.openMocks(this)) {
+ Mockito.when(graphClient.drives()).thenReturn(drivesRequestBuilder);
+ Mockito.when(drivesRequestBuilder.byDriveId(siteDriveId)).thenReturn(driveItemRequestBuilder);
+ Mockito.when(driveItemRequestBuilder.items()).thenReturn(itemsRequestBuilder);
+ Mockito.when(itemsRequestBuilder.byDriveItemId(driveItemid)).thenReturn(driveItemItemRequestBuilder);
+ Mockito.when(driveItemItemRequestBuilder.content()).thenReturn(contentRequestBuilder);
+ String content = "content";
+ Mockito.when(contentRequestBuilder.get()).thenReturn(new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)));
+ InputStream resultingInputStream = microsoftGraphFetcher.fetch(siteDriveId + "," + driveItemid, new Metadata());
+ Assertions.assertEquals(content, IOUtils.toString(resultingInputStream, StandardCharsets.UTF_8));
+ }
+ }
+}
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/resources/log4j2.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/resources/log4j2.xml
new file mode 100644
index 000000000..c88e66e99
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/resources/log4j2.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<Configuration status="WARN">
+ <Appenders>
+ <Console name="Console" target="SYSTEM_ERR">
+ <PatternLayout pattern="%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n"/>
+ </Console>
+ </Appenders>
+ <Loggers>
+ <Root level="info">
+ <AppenderRef ref="Console"/>
+ </Root>
+ </Loggers>
+</Configuration>
\ No newline at end of file