You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by nd...@apache.org on 2024/03/28 09:06:03 UTC

(tika) branch TIKA-4229-add-microsoft-graph-fetcher created (now eb4e0c12f)

This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a change to branch TIKA-4229-add-microsoft-graph-fetcher
in repository https://gitbox.apache.org/repos/asf/tika.git


      at eb4e0c12f TIKA-4229

This branch includes the following new commits:

     new eb4e0c12f TIKA-4229

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



(tika) 01/01: TIKA-4229

Posted by nd...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4229-add-microsoft-graph-fetcher
in repository https://gitbox.apache.org/repos/asf/tika.git

commit eb4e0c12fbec2772a0ba406d2ab8ac6200b0148a
Author: Nicholas DiPiazza <nd...@apache.org>
AuthorDate: Thu Mar 28 04:04:33 2024 -0500

    TIKA-4229
    
    initial attempt to add microsoft graph fetcher
---
 .../tika/pipes/fetcher/config/AbstractConfig.java  |   4 +
 tika-pipes/tika-fetchers/pom.xml                   |   1 +
 .../tika-fetcher-microsoft-graph/pom.xml           | 151 +++++++++++++++++++++
 .../microsoftgraph/MicrosoftGraphFetcher.java      | 140 +++++++++++++++++++
 .../config/AadCredentialConfigBase.java            |  40 ++++++
 .../Client2CertificateCredentialsConfig.java       |  50 +++++++
 .../config/ClientCertificateCredentialsConfig.java |  40 ++++++
 .../config/ClientSecretCredentialsConfig.java      |  30 ++++
 .../config/MsGraphFetcherConfig.java               |  65 +++++++++
 .../microsoftgraph/MicrosoftGraphFetcherTest.java  | 100 ++++++++++++++
 .../src/test/resources/log4j2.xml                  |  32 +++++
 11 files changed, 653 insertions(+)

diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java
new file mode 100644
index 000000000..536fc44b1
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java
@@ -0,0 +1,4 @@
+package org.apache.tika.pipes.fetcher.config;
+
+public abstract class AbstractConfig {
+}
diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml
index 7830a74d6..8b957e8cf 100644
--- a/tika-pipes/tika-fetchers/pom.xml
+++ b/tika-pipes/tika-fetchers/pom.xml
@@ -36,6 +36,7 @@
     <module>tika-fetcher-s3</module>
     <module>tika-fetcher-gcs</module>
     <module>tika-fetcher-az-blob</module>
+    <module>tika-fetcher-microsoft-graph</module>
   </modules>
 
   <dependencies>
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml
new file mode 100644
index 000000000..e40c8354f
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml
@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>tika-fetchers</artifactId>
+        <groupId>org.apache.tika</groupId>
+        <version>3.0.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>tika-fetcher-microsoft-graph</artifactId>
+    <name>Microsoft Graph Tika Pipes Fetcher</name>
+
+    <properties>
+        <maven.compiler.source>11</maven.compiler.source>
+        <maven.compiler.target>11</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <azure-identity.version>1.11.0</azure-identity.version>
+        <microsoft-graph.version>6.4.0</microsoft-graph.version>
+        <microsoft-kiota-serialization-json.version>1.1.1</microsoft-kiota-serialization-json.version>
+        <junit-jupiter-engine.version>5.9.2</junit-jupiter-engine.version>
+        <wiremock.version>3.3.1</wiremock.version>
+        <mockito-junit-jupiter.version>5.3.1</mockito-junit-jupiter.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.microsoft.graph</groupId>
+            <artifactId>microsoft-graph</artifactId>
+            <version>${microsoft-graph.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.azure</groupId>
+            <artifactId>azure-identity</artifactId>
+            <version>${azure-identity.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <version>${junit-jupiter-engine.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-core</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-junit-jupiter</artifactId>
+            <version>${mockito-junit-jupiter.version}</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <configuration>
+                    <archive>
+                        <manifestEntries>
+                            <Automatic-Module-Name>org.apache.tika.pipes.fetcher.s3</Automatic-Module-Name>
+                        </manifestEntries>
+                    </archive>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>${maven.shade.version}</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <createDependencyReducedPom>
+                                false
+                            </createDependencyReducedPom>
+                            <!-- <filters> -->
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*</exclude>
+                                        <exclude>LICENSE.txt</exclude>
+                                        <exclude>NOTICE.txt</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                            <transformers>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                                    <resource>META-INF/LICENSE</resource>
+                                    <file>target/classes/META-INF/LICENSE</file>
+                                </transformer>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                                    <resource>META-INF/NOTICE</resource>
+                                    <file>target/classes/META-INF/NOTICE</file>
+                                </transformer>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                                    <resource>META-INF/DEPENDENCIES</resource>
+                                    <file>target/classes/META-INF/DEPENDENCIES</file>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+
+        </plugins>
+    </build>
+
+    <scm>
+        <tag>3.0.0-BETA-rc1</tag>
+    </scm>
+</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java
new file mode 100644
index 000000000..771790692
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph;
+
+import com.azure.identity.ClientCertificateCredentialBuilder;
+import com.azure.identity.ClientSecretCredentialBuilder;
+import com.microsoft.graph.serviceclient.GraphServiceClient;
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetcher.AbstractFetcher;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.ClientCertificateCredentialsConfig;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.ClientSecretCredentialsConfig;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.MsGraphFetcherConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+
+/**
+ * Fetches files from Microsoft Graph API.
+ * Fetch keys are ${siteDriveId},${driveItemId}
+ */
+public class MicrosoftGraphFetcher extends AbstractFetcher implements Initializable {
+    private static final Logger LOGGER = LoggerFactory.getLogger(MicrosoftGraphFetcher.class);
+    private GraphServiceClient graphClient;
+    private MsGraphFetcherConfig msGraphFetcherConfig;
+    private long[] throttleSeconds;
+
+    public MicrosoftGraphFetcher() {
+
+    }
+
+    public MicrosoftGraphFetcher(MsGraphFetcherConfig msGraphFetcherConfig) {
+        this.msGraphFetcherConfig = msGraphFetcherConfig;
+    }
+
+    /**
+     * Set seconds to throttle retries as a comma-delimited list, e.g.: 30,60,120,600
+     * @param commaDelimitedLongs
+     * @throws TikaConfigException
+     */
+    @Field
+    public void setThrottleSeconds(String commaDelimitedLongs) throws TikaConfigException {
+        String[] longStrings = commaDelimitedLongs.split(",");
+        long[] seconds = new long[longStrings.length];
+        for (int i = 0; i < longStrings.length; i++) {
+            try {
+                seconds[i] = Long.parseLong(longStrings[i]);
+            } catch (NumberFormatException e) {
+                throw new TikaConfigException(e.getMessage());
+            }
+        }
+        setThrottleSeconds(seconds);
+    }
+    public void setThrottleSeconds(long[] throttleSeconds) {
+        this.throttleSeconds = throttleSeconds;
+    }
+
+    @Override
+    public void initialize(Map<String, Param> map) {
+        String[] scopes = msGraphFetcherConfig.getScopes().toArray(new String[0]);
+        if (msGraphFetcherConfig.getCredentials() instanceof ClientCertificateCredentialsConfig) {
+            ClientCertificateCredentialsConfig credentials = (ClientCertificateCredentialsConfig) msGraphFetcherConfig.getCredentials();
+            graphClient = new GraphServiceClient(new ClientCertificateCredentialBuilder()
+                    .clientId(credentials.getClientId())
+                    .tenantId(credentials.getTenantId())
+                    .pfxCertificate(new ByteArrayInputStream(credentials.getCertificateBytes()))
+                    .clientCertificatePassword(credentials.getCertificatePassword())
+                    .build(), scopes);
+        } else if (msGraphFetcherConfig.getCredentials() instanceof ClientSecretCredentialsConfig) {
+            ClientSecretCredentialsConfig credentials =
+                    (ClientSecretCredentialsConfig) msGraphFetcherConfig.getCredentials();
+            graphClient = new GraphServiceClient(
+                    new ClientSecretCredentialBuilder()
+                            .tenantId(credentials.getTenantId())
+                            .clientId(credentials.getClientId())
+                            .clientSecret(credentials.getClientSecret()).build(), scopes);
+        }
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler initializableProblemHandler)
+            throws TikaConfigException {
+    }
+
+    @Override
+    public InputStream fetch(String fetchKey, Metadata metadata) throws TikaException, IOException {
+        int tries = 0;
+        Exception ex;
+        do {
+            try {
+                long start = System.currentTimeMillis();
+                String[] fetchKeySplit = fetchKey.split(",");
+                String siteDriveId = fetchKeySplit[0];
+                String driveItemId = fetchKeySplit[1];
+                InputStream is = graphClient.drives().byDriveId(siteDriveId)
+                        .items()
+                        .byDriveItemId(driveItemId)
+                        .content()
+                        .get();
+
+                long elapsed = System.currentTimeMillis() - start;
+                LOGGER.debug("Total to fetch {}", elapsed);
+                return is;
+            } catch (Exception e) {
+                LOGGER.warn("Exception fetching on retry=" + tries, e);
+                ex = e;
+            }
+            LOGGER.warn("Sleeping for {} seconds before retry", throttleSeconds[tries]);
+            try {
+                Thread.sleep(throttleSeconds[tries]);
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+            }
+        } while (++tries < throttleSeconds.length);
+        throw new TikaException("Could not parse " + fetchKey, ex);
+    }
+}
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/AadCredentialConfigBase.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/AadCredentialConfigBase.java
new file mode 100644
index 000000000..e4204739c
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/AadCredentialConfigBase.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public abstract class AadCredentialConfigBase {
+    private String tenantId;
+    private String clientId;
+
+    public String getTenantId() {
+        return tenantId;
+    }
+
+    public AadCredentialConfigBase setTenantId(String tenantId) {
+        this.tenantId = tenantId;
+        return this;
+    }
+
+    public String getClientId() {
+        return clientId;
+    }
+
+    public AadCredentialConfigBase setClientId(String clientId) {
+        this.clientId = clientId;
+        return this;
+    }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/Client2CertificateCredentialsConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/Client2CertificateCredentialsConfig.java
new file mode 100644
index 000000000..d9128373e
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/Client2CertificateCredentialsConfig.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public class Client2CertificateCredentialsConfig {
+    private String tenantId;
+    private String clientId;
+    private String clientSecret;
+
+    public String getTenantId() {
+        return tenantId;
+    }
+
+    public Client2CertificateCredentialsConfig setTenantId(String tenantId) {
+        this.tenantId = tenantId;
+        return this;
+    }
+
+    public String getClientId() {
+        return clientId;
+    }
+
+    public Client2CertificateCredentialsConfig setClientId(String clientId) {
+        this.clientId = clientId;
+        return this;
+    }
+
+    public String getClientSecret() {
+        return clientSecret;
+    }
+
+    public Client2CertificateCredentialsConfig setClientSecret(String clientSecret) {
+        this.clientSecret = clientSecret;
+        return this;
+    }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientCertificateCredentialsConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientCertificateCredentialsConfig.java
new file mode 100644
index 000000000..2927519f1
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientCertificateCredentialsConfig.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public class ClientCertificateCredentialsConfig extends AadCredentialConfigBase {
+    private byte[] certificateBytes;
+    private String certificatePassword;
+
+    public byte[] getCertificateBytes() {
+        return certificateBytes;
+    }
+
+    public ClientCertificateCredentialsConfig setCertificateBytes(byte[] certificateBytes) {
+        this.certificateBytes = certificateBytes;
+        return this;
+    }
+
+    public String getCertificatePassword() {
+        return certificatePassword;
+    }
+
+    public ClientCertificateCredentialsConfig setCertificatePassword(String certificatePassword) {
+        this.certificatePassword = certificatePassword;
+        return this;
+    }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientSecretCredentialsConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientSecretCredentialsConfig.java
new file mode 100644
index 000000000..2989af941
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/ClientSecretCredentialsConfig.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+public class ClientSecretCredentialsConfig extends AadCredentialConfigBase {
+    private String clientSecret;
+
+    public String getClientSecret() {
+        return clientSecret;
+    }
+
+    public ClientSecretCredentialsConfig setClientSecret(String clientSecret) {
+        this.clientSecret = clientSecret;
+        return this;
+    }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MsGraphFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MsGraphFetcherConfig.java
new file mode 100644
index 000000000..46e365893
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MsGraphFetcherConfig.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph.config;
+
+import org.apache.tika.pipes.fetcher.config.AbstractConfig;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class MsGraphFetcherConfig extends AbstractConfig {
+    private long[] throttleSeconds;
+    private boolean spoolToTemp;
+    private AadCredentialConfigBase credentials;
+
+    private List<String> scopes = new ArrayList<>();
+    public boolean isSpoolToTemp() {
+        return spoolToTemp;
+    }
+
+    public MsGraphFetcherConfig setSpoolToTemp(boolean spoolToTemp) {
+        this.spoolToTemp = spoolToTemp;
+        return this;
+    }
+
+    public long[] getThrottleSeconds() {
+        return throttleSeconds;
+    }
+
+    public MsGraphFetcherConfig setThrottleSeconds(long[] throttleSeconds) {
+        this.throttleSeconds = throttleSeconds;
+        return this;
+    }
+
+    public AadCredentialConfigBase getCredentials() {
+        return credentials;
+    }
+
+    public MsGraphFetcherConfig setCredentials(AadCredentialConfigBase credentials) {
+        this.credentials = credentials;
+        return this;
+    }
+
+    public List<String> getScopes() {
+        return scopes;
+    }
+
+    public MsGraphFetcherConfig setScopes(List<String> scopes) {
+        this.scopes = scopes;
+        return this;
+    }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcherTest.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcherTest.java
new file mode 100644
index 000000000..059a93265
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcherTest.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetchers.microsoftgraph;
+
+import com.microsoft.graph.drives.DrivesRequestBuilder;
+import com.microsoft.graph.drives.item.DriveItemRequestBuilder;
+import com.microsoft.graph.drives.item.items.ItemsRequestBuilder;
+import com.microsoft.graph.drives.item.items.item.DriveItemItemRequestBuilder;
+import com.microsoft.graph.drives.item.items.item.content.ContentRequestBuilder;
+import com.microsoft.graph.serviceclient.GraphServiceClient;
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.ClientCertificateCredentialsConfig;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.MsGraphFetcherConfig;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+import org.mockito.Spy;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+
+@ExtendWith(MockitoExtension.class)
+class MicrosoftGraphFetcherTest {
+    private static final Logger LOGGER = LoggerFactory.getLogger(MicrosoftGraphFetcherTest.class);
+    static byte[] certificateBytes = "test cert file here".getBytes(StandardCharsets.UTF_8);
+    static String certificatePassword = "somepasswordhere";
+    static String clientId = "12312312-1234-1234-1234-112312312313";
+    static String tenantId = "32132132-4332-5432-4321-121231231232";
+    static String siteDriveId = "99999999-1234-1111-1111-12312312312";
+    static String driveItemid = "asfsadfsadfsafdusahdfiuhfdsusadfjuafiagfaigf";
+
+    @Mock
+    GraphServiceClient graphClient;
+    @Spy
+    @SuppressWarnings("unused")
+    MsGraphFetcherConfig msGraphFetcherConfig = new MsGraphFetcherConfig()
+            .setCredentials(new ClientCertificateCredentialsConfig()
+                    .setCertificateBytes(certificateBytes)
+                    .setCertificatePassword(certificatePassword)
+                    .setClientId(clientId)
+                    .setTenantId(tenantId))
+            .setScopes(Collections.singletonList(".default"));
+
+    @Mock
+    DrivesRequestBuilder drivesRequestBuilder;
+
+    @Mock
+    DriveItemRequestBuilder driveItemRequestBuilder;
+
+    @Mock
+    ItemsRequestBuilder itemsRequestBuilder;
+
+    @Mock
+    DriveItemItemRequestBuilder driveItemItemRequestBuilder;
+
+    @Mock
+    ContentRequestBuilder contentRequestBuilder;
+
+    @InjectMocks
+    MicrosoftGraphFetcher microsoftGraphFetcher;
+
+    @Test
+    void fetch() throws Exception {
+        try (AutoCloseable ignored = MockitoAnnotations.openMocks(this)) {
+            Mockito.when(graphClient.drives()).thenReturn(drivesRequestBuilder);
+            Mockito.when(drivesRequestBuilder.byDriveId(siteDriveId)).thenReturn(driveItemRequestBuilder);
+            Mockito.when(driveItemRequestBuilder.items()).thenReturn(itemsRequestBuilder);
+            Mockito.when(itemsRequestBuilder.byDriveItemId(driveItemid)).thenReturn(driveItemItemRequestBuilder);
+            Mockito.when(driveItemItemRequestBuilder.content()).thenReturn(contentRequestBuilder);
+            String content = "content";
+            Mockito.when(contentRequestBuilder.get()).thenReturn(new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)));
+            InputStream resultingInputStream = microsoftGraphFetcher.fetch(siteDriveId + "," + driveItemid, new Metadata());
+            Assertions.assertEquals(content, IOUtils.toString(resultingInputStream, StandardCharsets.UTF_8));
+        }
+    }
+}
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/resources/log4j2.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/resources/log4j2.xml
new file mode 100644
index 000000000..c88e66e99
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/test/resources/log4j2.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<Configuration status="WARN">
+  <Appenders>
+    <Console name="Console" target="SYSTEM_ERR">
+      <PatternLayout pattern="%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n"/>
+    </Console>
+  </Appenders>
+  <Loggers>
+    <Root level="info">
+      <AppenderRef ref="Console"/>
+    </Root>
+  </Loggers>
+</Configuration>
\ No newline at end of file