You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2023/01/29 16:39:35 UTC

svn commit: r1907078 - in /pdfbox/trunk/fontbox/src: main/java/org/apache/fontbox/ttf/ main/java/org/apache/fontbox/ttf/gsub/ main/java/org/apache/fontbox/ttf/model/ test/java/org/apache/fontbox/ttf/ test/java/org/apache/fontbox/ttf/gsub/

Author: lehmi
Date: Sun Jan 29 16:39:35 2023
New Revision: 1907078

URL: http://svn.apache.org/viewvc?rev=1907078&view=rev
Log:
PDFBOX-4189: support unsupported languages by returning a DefaultGSubWorker as proposed by Vladimir Plizga closes #153

Added:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorker.java   (with props)
    pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableLiberationFontTest.java   (with props)
    pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorkerTest.java   (with props)
Modified:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java?rev=1907078&r1=1907077&r2=1907078&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java Sun Jan 29 16:39:35 2023
@@ -26,6 +26,7 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -701,6 +702,39 @@ public class GlyphSubstitutionTable exte
         return gsubData;
     }
 
+    /**
+     * Builds a new {@link GsubData} instance for given script tag. In contrast to neighbour {@link #getGsubData()}
+     * method, this one does not try to find the first supported language and load GSUB data for it. Instead, it fetches
+     * the data for the given {@code scriptTag} (if it's supported by the font) leaving the language unspecified. It
+     * means that even after successful reading of GSUB data, the actual glyph substitution may not work if there is no
+     * corresponding {@link GsubWorker} implementation for it.
+     *
+     * @implNote This method performs searching on every invocation (no results are cached)
+     * @param scriptTag a <a href="https://learn.microsoft.com/en-us/typography/opentype/spec/scripttags">script tag</a>
+     * for which the data is needed
+     * @return GSUB data for the given script or {@code null} if no such script in the font
+     */
+    public GsubData getGsubData(String scriptTag)
+    {
+        ScriptTable scriptTable = scriptList.get(scriptTag);
+        if (scriptTable == null)
+        {
+            return null;
+        }
+        return new GlyphSubstitutionDataExtractor().getGsubData(scriptTag, scriptTable,
+                featureListTable, lookupListTable);
+    }
+
+    /**
+     * @return a read-only view of the
+     * <a href="https://learn.microsoft.com/en-us/typography/opentype/spec/scripttags">script tags</a> for which this
+     * GSUB table has records
+     */
+    public Set<String> getSupportedScriptTags()
+    {
+        return Collections.unmodifiableSet(scriptList.keySet());
+    }
+
     private RangeRecord readRangeRecord(TTFDataStream data) throws IOException
     {
         int startGlyphID = data.readUnsignedShort();

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java?rev=1907078&r1=1907077&r2=1907078&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java Sun Jan 29 16:39:35 2023
@@ -640,7 +640,9 @@ public class TrueTypeFont implements Fon
     /**
      * Returns the GSubData of the GlyphSubstitutionTable if present.
      * 
-     * @return the GSubData of the GlyphSubstitutionTable
+     * @ @return the GSubData of the GlyphSubstitutionTable or {@link GsubData#NO_DATA_FOUND} if either no GSUB data is
+     * available or its scripts are not supported
+     * 
      * @throws IOException if the font data could not be read
      */
     public GsubData getGsubData() throws IOException

Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorker.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorker.java?rev=1907078&view=auto
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorker.java (added)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorker.java Sun Jan 29 16:39:35 2023
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.fontbox.ttf.gsub;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.fontbox.ttf.GlyphSubstitutionTable;
+
+/**
+ * A default implementation of {@link GsubWorker} that actually does not transform the glyphs yet allows to correctly
+ * {@linkplain GlyphSubstitutionTable#getGsubData(String) load} GSUB table data even from fonts for which a complete
+ * glyph substitution is not implemented.
+ *
+ * @author Vladimir Plizga
+ */
+class DefaultGsubWorker implements GsubWorker
+{
+    private static final Log LOG = LogFactory.getLog(DefaultGsubWorker.class);
+
+    @Override
+    public List<Integer> applyTransforms(List<Integer> originalGlyphIds)
+    {
+        LOG.warn(getClass().getSimpleName() + " class does not perform actual GSUB substitutions. "
+                + "Perhaps the selected language is not yet supported by the FontBox library.");
+        // Make the result read-only to prevent accidental modifications of the source list
+        return Collections.unmodifiableList(originalGlyphIds);
+    }
+}

Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java?rev=1907078&r1=1907077&r2=1907078&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java Sun Jan 29 16:39:35 2023
@@ -64,7 +64,28 @@ public class GlyphSubstitutionDataExtrac
         {
             return GsubData.NO_DATA_FOUND;
         }
+        return buildMapBackedGsubData(featureListTable, lookupListTable, scriptTableDetails);
+    }
 
+    /**
+     * Unlike {@link #getGsubData(Map, FeatureListTable, LookupListTable)}, this method doesn't iterate over supported
+     * {@link Language}'s searching for the first match with the scripts of the font. Instead, it unconditionally
+     * creates {@link ScriptTableDetails} instance with language left {@linkplain Language#UNSPECIFIED unspecified}.
+     * 
+     * @return {@link GsubData} instance built especially for the given {@code scriptName}
+     */
+    public GsubData getGsubData(String scriptName, ScriptTable scriptTable,
+            FeatureListTable featureListTable, LookupListTable lookupListTable)
+    {
+        ScriptTableDetails scriptTableDetails = new ScriptTableDetails(Language.UNSPECIFIED,
+                scriptName, scriptTable);
+
+        return buildMapBackedGsubData(featureListTable, lookupListTable, scriptTableDetails);
+    }
+
+    private MapBackedGsubData buildMapBackedGsubData(FeatureListTable featureListTable,
+            LookupListTable lookupListTable, ScriptTableDetails scriptTableDetails)
+    {
         ScriptTable scriptTable = scriptTableDetails.getScriptTable();
 
         Map<String, Map<List<Integer>, Integer>> gsubData = new LinkedHashMap<>();

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java?rev=1907078&r1=1907077&r2=1907078&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java Sun Jan 29 16:39:35 2023
@@ -37,8 +37,7 @@ public class GsubWorkerFactory
         case BENGALI:
             return new GsubWorkerForBengali(cmapLookup, gsubData);
         default:
-            throw new UnsupportedOperationException(
-                    "The language " + gsubData.getLanguage() + " is not yet supported");
+            return new DefaultGsubWorker();
         }
 
     }

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java?rev=1907078&r1=1907077&r2=1907078&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java Sun Jan 29 16:39:35 2023
@@ -33,7 +33,15 @@ import org.apache.fontbox.ttf.table.comm
 public enum Language
 {
 
-    BENGALI(new String[] { "bng2", "beng" });
+    BENGALI(new String[] { "bng2", "beng" }),
+
+    /**
+     * An entry explicitly denoting the absence of any concrete language. May be useful when no actual glyph
+     * substitution is required but only the content of GSUB table is of interest.
+     *
+     * Must be the last one as it is not a language per se.
+     */
+    UNSPECIFIED(new String[0]);
 
     private final String[] scriptNames;
 

Added: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableLiberationFontTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableLiberationFontTest.java?rev=1907078&view=auto
==============================================================================
--- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableLiberationFontTest.java (added)
+++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableLiberationFontTest.java Sun Jan 29 16:39:35 2023
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.fontbox.ttf;
+
+import static java.util.Arrays.asList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.fontbox.ttf.model.GsubData;
+import org.apache.fontbox.ttf.model.Language;
+import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+/**
+ * A bunch of tests on {@link GlyphSubstitutionTable} for {@code LiberationSans-Regular} font
+ *
+ * @author Vladimir Plizga
+ */
+class GlyphSubstitutionTableLiberationFontTest
+{
+
+    private OpenTypeFont font;
+
+    @BeforeEach
+    void setUp() throws IOException
+    {
+        OTFParser otfParser = new OTFParser();
+        String fontPath = "src/test/resources/ttf/LiberationSans-Regular.ttf";
+        try (RandomAccessRead fontFile = new RandomAccessReadBufferedFile(fontPath))
+        {
+            font = otfParser.parse(fontFile);
+        }
+    }
+
+    @AfterEach
+    void tearDown() throws IOException
+    {
+        font.close();
+    }
+
+    @Test
+    @DisplayName("getGsubData() with no args yields empty result ")
+    void getGsubDataDefault() throws IOException
+    {
+        // given
+
+        // when
+        GsubData gsubData = font.getGsubData();
+
+        // then
+        assertNotNull(gsubData);
+        assertSame(GsubData.NO_DATA_FOUND, gsubData);
+    }
+
+    @Test
+    @DisplayName("getGsubData() for an unsupported script yields null")
+    void getGsubDataForUnsupportedScriptTag() throws IOException
+    {
+        // given
+        GlyphSubstitutionTable gsub = font.getGsub();
+
+        // when
+        GsubData gsubData = gsub.getGsubData("<some_non_existent_script_tag>");
+
+        // then
+        assertNull(gsubData);
+    }
+
+    @Test
+    @DisplayName("getGsubData() for 'cyrl' tag yields GSUB features of Cyrillic script")
+    void testGetGsubDataForCyrillic() throws IOException
+    {
+        // given
+        GlyphSubstitutionTable gsub = font.getGsub();
+        String cyrillicScriptTag = "cyrl";
+        List<String> expectedFeatures = asList("subs", "sups");
+
+        // when
+        GsubData cyrillicGsubData = gsub.getGsubData(cyrillicScriptTag);
+
+        // then
+        assertNotNull(cyrillicGsubData);
+        assertEquals(cyrillicScriptTag, cyrillicGsubData.getActiveScriptName());
+        assertEquals(new HashSet<>(expectedFeatures), cyrillicGsubData.getSupportedFeatures());
+    }
+
+    @Test
+    @DisplayName("All the script tags are loaded from GSUB as is")
+    void getSupportedScriptTags() throws IOException
+    {
+        // given
+        GlyphSubstitutionTable gsub = font.getGsub();
+        List<String> expectedSet = asList("DFLT", "bopo", "copt", "cyrl", "grek", "hebr", "latn");
+
+        // when
+        Set<String> supportedScriptTags = gsub.getSupportedScriptTags();
+
+        // then
+        assertEquals(new HashSet<>(expectedSet), supportedScriptTags);
+    }
+
+    @DisplayName("GSUB data is loaded for all scripts supported by the font")
+    @ParameterizedTest
+    @ValueSource(strings = { "DFLT", "bopo", "copt", "cyrl", "grek", "hebr", "latn" })
+    void checkGsubDataLoadingForAllSupportedScripts(String scriptTag) throws IOException
+    {
+        // given
+        GlyphSubstitutionTable gsub = font.getGsub();
+
+        // when
+        GsubData gsubData = gsub.getGsubData(scriptTag);
+
+        // then
+        assertNotNull(gsubData);
+        assertNotSame(GsubData.NO_DATA_FOUND, gsubData);
+
+        assertEquals(Language.UNSPECIFIED, gsubData.getLanguage());
+        assertEquals(scriptTag, gsubData.getActiveScriptName());
+    }
+
+}
\ No newline at end of file

Propchange: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableLiberationFontTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorkerTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorkerTest.java?rev=1907078&view=auto
==============================================================================
--- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorkerTest.java (added)
+++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorkerTest.java Sun Jan 29 16:39:35 2023
@@ -0,0 +1,50 @@
+package org.apache.fontbox.ttf.gsub;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.function.Executable;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * @author Vladimir Plizga
+ */
+class DefaultGsubWorkerTest
+{
+
+    @Test
+    @DisplayName("Transformation result is actually a read-only version of the argument")
+    void applyTransforms()
+    {
+        // given
+        DefaultGsubWorker sut = new DefaultGsubWorker();
+        List<Integer> originalGlyphIds = Arrays.asList(1, 2, 3, 4, 5);
+
+        // when
+        List<Integer> pseudoTransformedIds = sut.applyTransforms(originalGlyphIds);
+        Executable modification = pseudoTransformedIds::clear;
+
+        // then
+        assertEquals(originalGlyphIds, pseudoTransformedIds);
+        assertThrows(UnsupportedOperationException.class, modification);
+    }
+}
\ No newline at end of file

Propchange: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/DefaultGsubWorkerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native