You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by "Timothy Allison (JIRA)" <ji...@apache.org> on 2013/04/02 19:41:15 UTC
[jira] [Updated] (LUCENE-949) AnalyzingQueryParser can't work with
leading wildcards.
[ https://issues.apache.org/jira/browse/LUCENE-949?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Timothy Allison updated LUCENE-949:
-----------------------------------
Attachment: AnalyzingQueryParser.java
This allows for leading wildcards in the AnalyzingQueryParser if setAllowLeadingWildcard is set to true.
> AnalyzingQueryParser can't work with leading wildcards.
> -------------------------------------------------------
>
> Key: LUCENE-949
> URL: https://issues.apache.org/jira/browse/LUCENE-949
> Project: Lucene - Core
> Issue Type: Bug
> Components: core/queryparser
> Affects Versions: 2.2
> Reporter: Stefan Klein
> Attachments: AnalyzingQueryParser.java
>
>
> The getWildcardQuery mehtod in AnalyzingQueryParser.java need the following changes to accept leading wildcards:
> protected Query getWildcardQuery(String field, String termStr) throws ParseException
> {
> String useTermStr = termStr;
> String leadingWildcard = null;
> if ("*".equals(field))
> {
> if ("*".equals(useTermStr))
> return new MatchAllDocsQuery();
> }
> boolean hasLeadingWildcard = (useTermStr.startsWith("*") || useTermStr.startsWith("?")) ? true : false;
> if (!getAllowLeadingWildcard() && hasLeadingWildcard)
> throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
> if (getLowercaseExpandedTerms())
> {
> useTermStr = useTermStr.toLowerCase();
> }
> if (hasLeadingWildcard)
> {
> leadingWildcard = useTermStr.substring(0, 1);
> useTermStr = useTermStr.substring(1);
> }
> List tlist = new ArrayList();
> List wlist = new ArrayList();
> /*
> * somewhat a hack: find/store wildcard chars in order to put them back
> * after analyzing
> */
> boolean isWithinToken = (!useTermStr.startsWith("?") && !useTermStr.startsWith("*"));
> isWithinToken = true;
> StringBuffer tmpBuffer = new StringBuffer();
> char[] chars = useTermStr.toCharArray();
> for (int i = 0; i < useTermStr.length(); i++)
> {
> if (chars[i] == '?' || chars[i] == '*')
> {
> if (isWithinToken)
> {
> tlist.add(tmpBuffer.toString());
> tmpBuffer.setLength(0);
> }
> isWithinToken = false;
> }
> else
> {
> if (!isWithinToken)
> {
> wlist.add(tmpBuffer.toString());
> tmpBuffer.setLength(0);
> }
> isWithinToken = true;
> }
> tmpBuffer.append(chars[i]);
> }
> if (isWithinToken)
> {
> tlist.add(tmpBuffer.toString());
> }
> else
> {
> wlist.add(tmpBuffer.toString());
> }
> // get Analyzer from superclass and tokenize the term
> TokenStream source = getAnalyzer().tokenStream(field, new StringReader(useTermStr));
> org.apache.lucene.analysis.Token t;
> int countTokens = 0;
> while (true)
> {
> try
> {
> t = source.next();
> }
> catch (IOException e)
> {
> t = null;
> }
> if (t == null)
> {
> break;
> }
> if (!"".equals(t.termText()))
> {
> try
> {
> tlist.set(countTokens++, t.termText());
> }
> catch (IndexOutOfBoundsException ioobe)
> {
> countTokens = -1;
> }
> }
> }
> try
> {
> source.close();
> }
> catch (IOException e)
> {
> // ignore
> }
> if (countTokens != tlist.size())
> {
> /*
> * this means that the analyzer used either added or consumed
> * (common for a stemmer) tokens, and we can't build a WildcardQuery
> */
> throw new ParseException("Cannot build WildcardQuery with analyzer " + getAnalyzer().getClass()
> + " - tokens added or lost");
> }
> if (tlist.size() == 0)
> {
> return null;
> }
> else if (tlist.size() == 1)
> {
> if (wlist.size() == 1)
> {
> /*
> * if wlist contains one wildcard, it must be at the end,
> * because: 1) wildcards at 1st position of a term by
> * QueryParser where truncated 2) if wildcard was *not* in end,
> * there would be *two* or more tokens
> */
> StringBuffer sb = new StringBuffer();
> if (hasLeadingWildcard)
> {
> // adding leadingWildcard
> sb.append(leadingWildcard);
> }
> sb.append((String) tlist.get(0));
> sb.append(wlist.get(0).toString());
> return super.getWildcardQuery(field, sb.toString());
> }
> else if (wlist.size() == 0 && hasLeadingWildcard)
> {
> /*
> * if wlist contains no wildcard, it must be at 1st position
> */
> StringBuffer sb = new StringBuffer();
> if (hasLeadingWildcard)
> {
> // adding leadingWildcard
> sb.append(leadingWildcard);
> }
> sb.append((String) tlist.get(0));
> sb.append(wlist.get(0).toString());
> return super.getWildcardQuery(field, sb.toString());
> }
> else
> {
> /*
> * we should never get here! if so, this method was called with
> * a termStr containing no wildcard ...
> */
> throw new IllegalArgumentException("getWildcardQuery called without wildcard");
> }
> }
> else
> {
> /*
> * the term was tokenized, let's rebuild to one token with wildcards
> * put back in postion
> */
> StringBuffer sb = new StringBuffer();
> if (hasLeadingWildcard)
> {
> // adding leadingWildcard
> sb.append(leadingWildcard);
> }
> for (int i = 0; i < tlist.size(); i++)
> {
> sb.append((String) tlist.get(i));
> if (wlist != null && wlist.size() > i)
> {
> sb.append((String) wlist.get(i));
> }
> }
> return super.getWildcardQuery(field, sb.toString());
> }
> }
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org