You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@myfaces.apache.org by de...@apache.org on 2017/09/17 12:29:26 UTC

[myfaces-trinidad] branch mstarets_webcrawler created (now f3743ca)

This is an automated email from the ASF dual-hosted git repository.

deki pushed a change to branch mstarets_webcrawler
in repository https://gitbox.apache.org/repos/asf/myfaces-trinidad.git.


      at f3743ca  Agent support for web crawlers

This branch includes the following new commits:

     new 992807e  Experimental branch to improve web crawler support
     new 7958e92  Agent support for web crawlers
     new f3743ca  Agent support for web crawlers

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


-- 
To stop receiving notification emails like this one, please contact
['"commits@myfaces.apache.org" <co...@myfaces.apache.org>'].

[myfaces-trinidad] 01/03: Experimental branch to improve web crawler support

Posted by de...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

deki pushed a commit to branch mstarets_webcrawler
in repository https://gitbox.apache.org/repos/asf/myfaces-trinidad.git

commit 992807ea620b1bdb47c8ac71b11385a983d7f3c7
Author: Max Starets <ms...@apache.org>
AuthorDate: Mon Sep 20 15:34:28 2010 +0000

    Experimental branch to improve web crawler support

-- 
To stop receiving notification emails like this one, please contact
"commits@myfaces.apache.org" <co...@myfaces.apache.org>.

[myfaces-trinidad] 03/03: Agent support for web crawlers

Posted by de...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

deki pushed a commit to branch mstarets_webcrawler
in repository https://gitbox.apache.org/repos/asf/myfaces-trinidad.git

commit f3743ca7e41f15fef022e11844fc7636d93cf405
Author: Max Starets <ms...@apache.org>
AuthorDate: Thu Oct 28 20:43:50 2010 +0000

    Agent support for web crawlers
---
 .../trinidadinternal/agent/AgentFactoryImpl.java   | 55 ++++++++++++++++++++++
 .../trinidadinternal/agent/AgentNameUtil.java      |  5 ++
 .../trinidadinternal/agent/TrinidadAgent.java      | 25 +++++++++-
 .../ui/laf/NameAndAgentScorer.java                 | 17 +++++--
 .../ui/laf/base/desktop/BaseDesktopUtils.java      | 11 +++--
 .../ui/laf/base/pda/PdaHtmlLafUtils.java           |  4 +-
 .../ui/laf/simple/pda/SimplePdaUtils.java          |  4 +-
 .../main/resources/META-INF/agent/capabilities.xml | 10 ++++
 8 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentFactoryImpl.java b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentFactoryImpl.java
index 1ef6b29..f637e1e 100644
--- a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentFactoryImpl.java
+++ b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentFactoryImpl.java
@@ -115,6 +115,28 @@ public class AgentFactoryImpl implements AgentFactory
       _populateUnknownAgentImpl(null, agent);
       return;
     }
+    
+    // Temporary for testing !!!
+    if (facesContext != null && facesContext.getExternalContext().getRequestParameterMap().
+                        get("googlebot") != null)
+    {
+      _populateGoogleCrawlerAgentImpl("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", agent, 25);
+      return;
+    }
+    
+    int googlebotIndex = userAgent.indexOf(_GOOGLEBOT_ID);
+    if (googlebotIndex >= 0)
+    {
+      _populateGoogleCrawlerAgentImpl(userAgent, agent, googlebotIndex);
+      return;
+    }
+    
+    if (userAgent.startsWith(_MSNBOT_ID))
+    {
+      _populateMsnCrawlerAgentImpl(userAgent, agent);
+      return;
+    }
+    
 
     //the useragent string for telnet and PDA design time will start with
     //OracleJDevMobile because in each of these cases we know we have an
@@ -932,6 +954,37 @@ public class AgentFactoryImpl implements AgentFactory
     agentObj.setMakeModel(Agent.MAKE_MODEL_UNKNOWN);
 
   }
+  
+  /**
+   * Returns an AgentEntry for the Google web crawler
+   */
+  private void _populateGoogleCrawlerAgentImpl(String userAgent, AgentImpl agentObj, int idIndex)
+  {
+    agentObj.setType(Agent.TYPE_WEBCRAWLER);
+
+    agentObj.setAgent(Agent.AGENT_GOOGLEBOT);
+    agentObj.setAgentVersion(_getVersion(userAgent, idIndex + _GOOGLEBOT_ID.length()));
+    agentObj.setPlatform(Agent.PLATFORM_UNKNOWN);
+    agentObj.setPlatformVersion(Agent.PLATFORM_VERSION_UNKNOWN);
+    agentObj.setMakeModel(Agent.MAKE_MODEL_UNKNOWN);
+
+  }
+  
+  
+  /**
+   * Returns an AgentEntry for the msnbot (Bing, Yahoo) web crawler
+   */
+  private void _populateMsnCrawlerAgentImpl(String userAgent, AgentImpl agentObj)
+  {
+    agentObj.setType(Agent.TYPE_WEBCRAWLER);
+
+    agentObj.setAgent(Agent.AGENT_MSNBOT);
+    agentObj.setAgentVersion(_getVersion(userAgent, userAgent.indexOf('/')));
+    agentObj.setPlatform(Agent.PLATFORM_UNKNOWN);
+    agentObj.setPlatformVersion(Agent.PLATFORM_VERSION_UNKNOWN);
+    agentObj.setMakeModel(Agent.MAKE_MODEL_UNKNOWN);
+
+  }
 
 
   /**
@@ -995,4 +1048,6 @@ public class AgentFactoryImpl implements AgentFactory
   static final private String _IASW_DEVICE_HINT_PARAM = "X-Oracle-Device.Class";
   static final private TrinidadLogger _LOG = TrinidadLogger.createTrinidadLogger(AgentFactoryImpl.class);
 
+  static final private String _GOOGLEBOT_ID = "Googlebot";
+  static final private String _MSNBOT_ID = "msnbot";
 }
diff --git a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentNameUtil.java b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentNameUtil.java
index ba3a162..df7c4f8 100644
--- a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentNameUtil.java
+++ b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/AgentNameUtil.java
@@ -106,6 +106,11 @@ public class AgentNameUtil
     {
       return TrinidadAgent.TYPE_PHONE;
     }
+    
+    if (otype == Agent.TYPE_WEBCRAWLER)
+    {
+      return TrinidadAgent.TYPE_WEBCRAWLER;
+    }
 
     //Default to desktop (This is UIX 2.2 logic)
     return TrinidadAgent.TYPE_DESKTOP;
diff --git a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/TrinidadAgent.java b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/TrinidadAgent.java
index b2ce1f7..f0840b1 100644
--- a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/TrinidadAgent.java
+++ b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/agent/TrinidadAgent.java
@@ -258,6 +258,11 @@ public abstract class TrinidadAgent implements Agent, Cloneable
    * Application constant for voice
    */
   static public final int TYPE_VOICE = 3;
+  
+  /**
+   * Application constant for web crawlers
+   */
+  static public final int TYPE_WEBCRAWLER = 4;
 
   /**
    * Enumeration representing an Application
@@ -336,7 +341,15 @@ public abstract class TrinidadAgent implements Agent, Cloneable
     /**
      * Application enum for opera.
      */
-    OPERA("opera", AGENT_OPERA);
+    OPERA("opera", AGENT_OPERA),
+    /**
+     * Application enum for Google web crawler.
+     */
+    GOOGLEBOT("googlebot", AGENT_GOOGLEBOT),
+    /**
+     * Application enum for Bing web crawler.
+     */
+    MSNBOT("msnbot", AGENT_MSNBOT);
 
     /**
      * Return the appropriate Application instance given the name of an Application
@@ -525,6 +538,16 @@ public abstract class TrinidadAgent implements Agent, Cloneable
   static public final int APPLICATION_OPERA = Application.OPERA.ordinal();
   
   /**
+   * Application constant for Google web crawler.
+   */
+  static public final int APPLICATION_GOOGLEBOT = Application.GOOGLEBOT.ordinal();
+  
+  /**
+   * Application constant for Bing web crawler.
+   */
+  static public final int APPLICATION_MSNBOT = Application.MSNBOT.ordinal();
+  
+  /**
    * OS constant for an unknown operating system.
    */
   static public final int OS_UNKNOWN = 0;
diff --git a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/NameAndAgentScorer.java b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/NameAndAgentScorer.java
index 852e6ca..0b58933 100644
--- a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/NameAndAgentScorer.java
+++ b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/NameAndAgentScorer.java
@@ -40,14 +40,14 @@ public class NameAndAgentScorer extends LookAndFeelScorer
    */
   public NameAndAgentScorer(
     String  requiredLafName,
-    Integer requiredAgentType,
     Integer requiredAgentApplication,
     Integer requiredAgentMajorVersion,
-    Integer requiredAgentOS
+    Integer requiredAgentOS,
+    Integer... requiredAgentTypes
     )
   {
     _lafName           = requiredLafName;
-    _agentType         = requiredAgentType;
+    _agentTypes        = requiredAgentTypes;
     _agentApplication  = requiredAgentApplication;
     _agentMajorVersion = requiredAgentMajorVersion;
     _agentOS           = requiredAgentOS;
@@ -76,7 +76,14 @@ public class NameAndAgentScorer extends LookAndFeelScorer
     int nameScore = _scoreName(lafName);
 
     TrinidadAgent agent = context. getAgent();
-    int typeScore = _score(_agentType, agent.getAgentType());
+    
+    int typeScore = NO_MATCH;
+    int agentType = agent.getAgentType();
+    for (int type: _agentTypes)
+    {
+      typeScore = Math.max(typeScore, _score(type, agentType));
+    }
+    
     int appScore = _score(_agentApplication, agent.getAgentApplication().ordinal());
     int versScore = _score(_agentMajorVersion, agent.getAgentMajorVersion());
     int osScore = _score(_agentOS, agent.getAgentOS());
@@ -124,7 +131,7 @@ public class NameAndAgentScorer extends LookAndFeelScorer
   }
   
   private String  _lafName;
-  private Integer _agentType;
+  private Integer _agentTypes[];
   private Integer _agentApplication;
   private Integer _agentMajorVersion;
   private Integer _agentOS;
diff --git a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/desktop/BaseDesktopUtils.java b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/desktop/BaseDesktopUtils.java
index 40de8ae..8587091 100644
--- a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/desktop/BaseDesktopUtils.java
+++ b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/desktop/BaseDesktopUtils.java
@@ -69,7 +69,7 @@ public class BaseDesktopUtils extends XhtmlLafUtils
 
   // We use a scorer which matches base/desktop - but not Netscape
   private static final NameAndAgentScorer _SCORER =
-    new NoNetscapeScorer("base");
+    new NoNetscapeScorer("base", TrinidadAgent.TYPE_DESKTOP, TrinidadAgent.TYPE_WEBCRAWLER);
 
   /**
    * @deprecated This class comes from the old Java 1.2 UIX codebase and should not be used anymore.
@@ -77,14 +77,15 @@ public class BaseDesktopUtils extends XhtmlLafUtils
   @Deprecated
   private static class NoNetscapeScorer extends NameAndAgentScorer
   {
-    public NoNetscapeScorer(String lafName)
+    public NoNetscapeScorer(String lafName, int type1, int type2)
     {
-      // Initialize the NameAndAgentScorer for base/desktop
+      // Initialize the NameAndAgentScorer for base
       super(lafName,
-            TrinidadAgent.TYPE_DESKTOP,
             null,
             null,
-            null);
+            null,
+            type1,
+            type2);
     }
 
     @Override
diff --git a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/pda/PdaHtmlLafUtils.java b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/pda/PdaHtmlLafUtils.java
index c37d94f..7423af0 100644
--- a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/pda/PdaHtmlLafUtils.java
+++ b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/base/pda/PdaHtmlLafUtils.java
@@ -82,8 +82,8 @@ public class PdaHtmlLafUtils extends XhtmlLafUtils
 
   private static final NameAndAgentScorer _SCORER =
     new NameAndAgentScorer(null,
-                           TrinidadAgent.TYPE_PDA,
                            null,
                            null,
-                           null);
+                           null,
+                           TrinidadAgent.TYPE_PDA);
 }
diff --git a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/simple/pda/SimplePdaUtils.java b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/simple/pda/SimplePdaUtils.java
index fa43623..a2ecf04 100644
--- a/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/simple/pda/SimplePdaUtils.java
+++ b/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/ui/laf/simple/pda/SimplePdaUtils.java
@@ -49,10 +49,10 @@ public class SimplePdaUtils extends PdaHtmlLafUtils
 
   private static final NameAndAgentScorer _PDA_SCORER =
     new NameAndAgentScorer("simple",
-                           TrinidadAgent.TYPE_PDA,
                            null,
                            null,
-                           null);
+                           null,
+                           TrinidadAgent.TYPE_PDA);
 
 
 }
diff --git a/trinidad-impl/src/main/resources/META-INF/agent/capabilities.xml b/trinidad-impl/src/main/resources/META-INF/agent/capabilities.xml
index 0d7a14b..cba735f 100644
--- a/trinidad-impl/src/main/resources/META-INF/agent/capabilities.xml
+++ b/trinidad-impl/src/main/resources/META-INF/agent/capabilities.xml
@@ -74,6 +74,16 @@
       <include refid="html" />
       <include src="email.xml" />
     </capabilities>
+    
+    <capabilities id="googlebot" agents="googlebot">
+      <include refid="html" />
+      <include src="email.xml" />
+    </capabilities>
+    
+    <capabilities id="msnbot" agents="msnbot">
+      <include refid="html" />
+      <include src="email.xml" />
+    </capabilities>
 
     <capabilities id="webkit" agents="webkit">
       <include refid="html" />

-- 
To stop receiving notification emails like this one, please contact
"commits@myfaces.apache.org" <co...@myfaces.apache.org>.

[myfaces-trinidad] 02/03: Agent support for web crawlers

Posted by de...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

deki pushed a commit to branch mstarets_webcrawler
in repository https://gitbox.apache.org/repos/asf/myfaces-trinidad.git

commit 7958e92cc58caf4cf82479baac49f11e513e6342
Author: Max Starets <ms...@apache.org>
AuthorDate: Thu Oct 28 20:42:35 2010 +0000

    Agent support for web crawlers
---
 .../java/org/apache/myfaces/trinidad/context/Agent.java   | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/trinidad-api/src/main/java/org/apache/myfaces/trinidad/context/Agent.java b/trinidad-api/src/main/java/org/apache/myfaces/trinidad/context/Agent.java
index ee4d12c..300d069 100644
--- a/trinidad-api/src/main/java/org/apache/myfaces/trinidad/context/Agent.java
+++ b/trinidad-api/src/main/java/org/apache/myfaces/trinidad/context/Agent.java
@@ -60,6 +60,11 @@ public interface Agent
    * Constant for Phone sized devices
    */
   public static final Object TYPE_PHONE = "phone";
+  
+  /**
+   * Constant for Web Crawlers
+   */
+  public static final Object TYPE_WEBCRAWLER = "webcrawler";
 
   /**
    * Constant for unknown platform
@@ -183,6 +188,16 @@ public interface Agent
    * Constant for basic HTML (without JavaScript) Browser agent.
    */
   public static final String AGENT_GENERICPDA = "genericpda";
+  
+  /**
+   * Constant for MSN web crawler (currently used by Bing and Yahoo)
+   */
+  public static final String AGENT_MSNBOT = "msnbot";
+  
+  /**
+   * Constant for Google web crawler
+   */
+  public static final String AGENT_GOOGLEBOT = "googlebot";
 
   /**
    * Constant for unknown Agent version

-- 
To stop receiving notification emails like this one, please contact
"commits@myfaces.apache.org" <co...@myfaces.apache.org>.