You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2007/05/25 19:15:57 UTC

svn commit: r541726 - /incubator/abdera/java/trunk/extensions/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java

Author: jmsnell
Date: Fri May 25 10:15:57 2007
New Revision: 541726

URL: http://svn.apache.org/viewvc?view=rev&rev=541726
Log:
Add direction guessing algorithms.  These provide generally reasonable results when the dir attribute has not been set explicitly.  There are three methods:

 1. Defer to Java Bidi analysis.
 2. Use the in-scope language tag or default locale (implemented by IE7)
 3. Analyze the bidi properties of each character (implemented by Snarfer)
 
In each case, the default is to defer to the dir attribute if present, otherwise run the guessing algorithm.
 
 

Modified:
    incubator/abdera/java/trunk/extensions/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java

Modified: incubator/abdera/java/trunk/extensions/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java?view=diff&rev=541726&r1=541725&r2=541726
==============================================================================
--- incubator/abdera/java/trunk/extensions/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java (original)
+++ incubator/abdera/java/trunk/extensions/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java Fri May 25 10:15:57 2007
@@ -17,11 +17,16 @@
 */
 package org.apache.abdera.ext.bidi;
 
+import java.text.AttributedString;
+import java.text.Bidi;
+import java.util.Locale;
+
 import javax.xml.namespace.QName;
 
 import org.apache.abdera.model.Base;
 import org.apache.abdera.model.Element;
 import org.apache.abdera.i18n.io.CharUtils;
+import org.apache.abdera.i18n.lang.Lang;
 
 /**
  * <p>This is (hopefully) temporary.  Ideally, this would be wrapped into the 
@@ -29,7 +34,7 @@
  * still details being worked out on the Atom WG list and it's likely that
  * at least one other impl (mozilla) will do something slightly different.</p>
  * 
- * <p>Based on http://www.ietf.org/internet-drafts/draft-snell-atompub-bidi-02.txt</p>
+ * <p>Based on http://www.ietf.org/internet-drafts/draft-snell-atompub-bidi-04.txt</p>
  * 
  * <p>Example:</p>
  * <pre>
@@ -157,4 +162,147 @@
     return getBidiText(getDirection(element),element.getAttributeValue(name));
   }
   
+  
+  /**
+   * Attempt to guess the base direction using the in-scope language.  
+   * Implements the method used by Internet Explorer 7's feed view
+   * documented here: http://blogs.msdn.com/rssteam/archive/2007/05/17/reading-feeds-in-right-to-left-order.aspx.
+   * 
+   * This algorithm differs slightly from the method documented in that the 
+   * primary language tag is case insensitive.
+   * 
+   * If the language tag is not specified, then the default Locale is used to 
+   * determine the direction.
+   * 
+   * If the dir attribute is specified, the direction will be determine using it's value 
+   * instead of the language
+   */
+  public static <T extends Element>Direction guessDirectionFromLanguage(T element) {
+    return guessDirectionFromLanguage(element, false);
+  }
+  
+  /**
+   * Attempt to guess the base direction using the in-scope language.  
+   * Implements the method used by Internet Explorer 7's feed view
+   * documented here: http://blogs.msdn.com/rssteam/archive/2007/05/17/reading-feeds-in-right-to-left-order.aspx.
+   * 
+   * This algorithm differs slightly from the method documented in that the 
+   * primary language tag is case insensitive.
+   * 
+   * If the language tag is not specified, then the default Locale is used to 
+   * determine the direction.
+   * 
+   * According to the Atom Bidi spec, if the dir attribute is set explicitly, we 
+   * should not do language guessing.  This restriction can be bypassed by setting
+   * ignoredir to true.
+   */
+  public static <T extends Element>Direction guessDirectionFromLanguage(T element, boolean ignoredir) {
+    if (!ignoredir && hasDirection(element)) return getDirection(element);
+    Lang lang = element.getLanguageTag();
+    if (lang == null) {
+      Locale l = Locale.getDefault();
+      lang = new Lang(l.getLanguage());
+    }
+    String primary = lang.getPrimary();
+    return (primary.equalsIgnoreCase("ar") ||
+            primary.equalsIgnoreCase("fa") ||
+            primary.equalsIgnoreCase("ur") ||
+            primary.equalsIgnoreCase("ps") ||
+            primary.equalsIgnoreCase("syr") ||
+            primary.equalsIgnoreCase("dv") ||
+            primary.equalsIgnoreCase("he") ||
+            primary.equalsIgnoreCase("yi")) ? Direction.RTL : Direction.LTR; 
+  }
+
+  /**
+   * Attempt to guess the base direction of an element using an analysis of
+   * the directional properties of the characters used.  This is a brute-force
+   * style approach that can achieve fairly reasonable results when the element
+   * text consists primarily of characters with the same bidi properties.  This
+   * approach is implemented by the Snarfer feed reader as is documented at
+   * http://www.xn--8ws00zhy3a.com/blog/2006/12/right-to-left-rss   
+   * 
+   * If the dir attribute is specified, the direction will be determine using it's value 
+   * instead of the characteristics of the text
+   */
+  public static <T extends Element>Direction guessDirectionFromTextProperties(T element) {
+    return guessDirectionFromTextProperties(element, false);
+  }
+  
+  /**
+   * Attempt to guess the base direction of an element using an analysis of
+   * the directional properties of the characters used.  This is a brute-force
+   * style approach that can achieve fairly reasonable results when the element
+   * text consists primarily of characters with the same bidi properties.  This
+   * approach is implemented by the Snarfer feed reader as is documented at
+   * http://www.xn--8ws00zhy3a.com/blog/2006/12/right-to-left-rss   
+   * 
+   * According to the Atom Bidi spec, if the dir attribute is set explicitly, we 
+   * should not do language guessing.  This restriction can be bypassed by setting
+   * ignoredir to true.
+   */
+  public static <T extends Element>Direction guessDirectionFromTextProperties(T element, boolean ignoredir) {
+    Direction dir = Direction.UNSPECIFIED;
+    if (!ignoredir && hasDirection(element)) return getDirection(element);
+    String text = element.getText();
+    if (text != null) {
+      int c = 0;
+      for (int n = 0; n < text.length(); n++) {
+        char ch = text.charAt(n);
+        if (Bidi.requiresBidi(new char[] {ch}, 0, 1)) c++;
+        else c--;
+      }
+      dir = (c > 0) ? Direction.RTL : Direction.LTR;
+    }
+    return dir;
+  }
+
+  /**
+   * Use Java's built in support for bidi text to determine the base directionality
+   * of the element's text.  The response to this only indicates the *base* directionality,
+   * it does not indicate whether or not there are any RTL characters in the text.
+   * 
+   * If the dir attribute is specified, the direction will be determine using it's value 
+   * instead of the characteristics of the text
+   */
+  public static <T extends Element>Direction guessDirectionFromJavaBidi(T element) {
+    return guessDirectionFromJavaBidi(element, false);
+  }
+  
+  /**
+   * Use Java's built in support for bidi text to determine the base directionality
+   * of the element's text.  The response to this only indicates the *base* directionality,
+   * it does not indicate whether or not there are any RTL characters in the text.
+   * 
+   * According to the Atom Bidi spec, if the dir attribute is set explicitly, we 
+   * should not do language guessing.  This restriction can be bypassed by setting
+   * ignoredir to true.
+   */
+  public static <T extends Element>Direction guessDirectionFromJavaBidi(T element, boolean ignoredir) {
+    Direction dir = Direction.UNSPECIFIED;
+    if (!ignoredir && hasDirection(element)) return getDirection(element);
+    String text = element.getText();
+    if (text != null) {
+      AttributedString s = new AttributedString(text);
+      Bidi bidi = new Bidi(s.getIterator());
+      dir = (bidi.baseIsLeftToRight()) ? Direction.LTR : Direction.RTL;
+    }
+    return dir;
+  }
+  
+  private static <T extends Element>boolean hasDirection(T element) {
+    boolean answer = false;
+    String dir = element.getAttributeValue("dir");
+    if (dir != null && dir.length() > 0)
+      answer = true;
+    else if (dir == null) {
+      // if the direction is unspecified on this element, 
+      // let's see if we've inherited it
+      Base parent = element.getParentElement(); 
+      if (parent != null && 
+          parent instanceof Element)
+        answer = hasDirection((Element)parent);
+    }
+    return answer;
+  }
 }