Logo Search packages:      
Sourcecode: qt4-x11 version File versions

QueryParser.cpp

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "QueryParser.h"

#include "CLucene/analysis/AnalysisHeader.h"
#include "CLucene/util/Reader.h"
#include "CLucene/search/SearchHeader.h"
#include "CLucene/index/Term.h"

#include "TokenList.h"
#include "QueryToken.h"
#include "QueryParserBase.h"
#include "Lexer.h"

CL_NS_USE(util)
CL_NS_USE(index)
CL_NS_USE(analysis)
CL_NS_USE(search)

CL_NS_DEF(queryParser)

    QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : QueryParserBase(_analyzer){
    //Func - Constructor.
      //       Instantiates a QueryParser for the named field _field
      //Pre  - _field != NULL
      //Post - An instance has been created

            if ( _field )
                  field = STRDUP_TtoT(_field);
            else
                  field = NULL;
            tokens = NULL;
            lowercaseExpandedTerms = true;
      }

      QueryParser::~QueryParser() {
      //Func - Destructor
      //Pre  - true
      //Post - The instance has been destroyed

        _CLDELETE_CARRAY(field);
      }

    //static
    Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){
    //Func - Returns a new instance of the Query class with a specified query, field and
    //       analyzer values.
    //Pre  - query != NULL and holds the query to parse
      //       field != NULL and holds the default field for query terms
      //       analyzer holds a valid reference to an Analyzer and is used to
      //       find terms in the query text
      //Post - query has been parsed and an instance of Query has been returned

            CND_PRECONDITION(query != NULL, "query is NULL");
        CND_PRECONDITION(field != NULL, "field is NULL");

            QueryParser parser(field, analyzer);
            return parser.parse(query);
      }

    Query* QueryParser::parse(const TCHAR* query){
      //Func - Returns a parsed Query instance
      //Pre  - query != NULL and contains the query value to be parsed
      //Post - Returns a parsed Query Instance

        CND_PRECONDITION(query != NULL, "query is NULL");

            //Instantie a Stringer that can read the query string
        Reader* r = _CLNEW StringReader(query);

            //Check to see if r has been created properly
            CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r");

            //Pointer for the return value
            Query* ret = NULL;

            try{
                  //Parse the query managed by the StringReader R and return a parsed Query instance
                  //into ret
                  ret = parse(r);
            }_CLFINALLY (
                  _CLDELETE(r);
            );

            return ret;
      }

      Query* QueryParser::parse(Reader* reader){
      //Func - Returns a parsed Query instance
      //Pre  - reader contains a valid reference to a Reader and manages the query string
      //Post - A parsed Query instance has been returned or

            //instantiate the TokenList tokens
            TokenList _tokens;
            this->tokens = &_tokens;

            //Instantiate a lexer
            Lexer lexer(this, reader);

            //tokens = lexer.Lex();
            //Lex the tokens
            lexer.Lex(tokens);

            //Peek to the first token and check if is an EOF
            if (tokens->peek()->Type == QueryToken::EOF_){
                  // The query string failed to yield any tokens.  We discard the
                  // TokenList tokens and raise an exceptioin.
                  QueryToken* token = this->tokens->extract();
                  _CLDELETE(token);
               _CLTHROWA(CL_ERR_Parse, "No query given.");
            }

            //Return the parsed Query instance
            Query* ret = MatchQuery(field);
            this->tokens = NULL;
            return ret;
      }

00124       int32_t QueryParser::MatchConjunction(){
      //Func - matches for CONJUNCTION
      //       CONJUNCTION ::= <AND> | <OR>
      //Pre  - tokens != NULL
      //Post - if the first token is an AND or an OR then
      //       the token is extracted and deleted and CONJ_AND or CONJ_OR is returned
      //       otherwise CONJ_NONE is returned

        CND_PRECONDITION(tokens != NULL, "tokens is NULL");

            switch(tokens->peek()->Type){
                  case QueryToken::AND_ :
                        //Delete the first token of tokenlist
                        ExtractAndDeleteToken();
                        return CONJ_AND;
                  case QueryToken::OR   :
                        //Delete the first token of tokenlist
                        ExtractAndDeleteToken();
                        return CONJ_OR;
                  default :
                        return CONJ_NONE;
            }
      }

00148       int32_t QueryParser::MatchModifier(){
      //Func - matches for MODIFIER
      //       MODIFIER ::= <PLUS> | <MINUS> | <NOT>
      //Pre  - tokens != NULL
      //Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned
      //       if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned
      //       otherwise MOD_NONE is returned
            CND_PRECONDITION(tokens != NULL, "tokens is NULL");

            switch(tokens->peek()->Type){
                  case QueryToken::PLUS :
                        //Delete the first token of tokenlist
                        ExtractAndDeleteToken();
                        return MOD_REQ;
                  case QueryToken::MINUS :
                  case QueryToken::NOT   :
                        //Delete the first token of tokenlist
                        ExtractAndDeleteToken();
                        return MOD_NOT;
                  default :
                        return MOD_NONE;
            }
      }

      Query* QueryParser::MatchQuery(const TCHAR* field){
      //Func - matches for QUERY
      //       QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)*
      //Pre  - field != NULL
      //Post -

            CND_PRECONDITION(tokens != NULL, "tokens is NULL");

            vector<BooleanClause*> clauses;

            Query* q = NULL;

            int32_t mods = MOD_NONE;
            int32_t conj = CONJ_NONE;

            //match for MODIFIER
            mods = MatchModifier();

            //match for CLAUSE
            q = MatchClause(field);
            AddClause(clauses, CONJ_NONE, mods, q);

            // match for CLAUSE*
            while(true){
                  QueryToken* p = tokens->peek();
                  if(p->Type == QueryToken::EOF_){
                        QueryToken* qt = MatchQueryToken(QueryToken::EOF_);
                        _CLDELETE(qt);
                        break;
                  }

                  if(p->Type == QueryToken::RPAREN){
                        //MatchQueryToken(QueryToken::RPAREN);
                        break;
                  }

                  //match for a conjuction (AND OR NOT)
                  conj = MatchConjunction();
                  //match for a modifier
                  mods = MatchModifier();

                  q = MatchClause(field);
                  if ( q != NULL )
                        AddClause(clauses, conj, mods, q);
            }

            // finalize query
            if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL
                  BooleanClause* c = clauses[0];
                  Query* q = c->query;

                  //Condition check to be sure clauses[0] is valid
                  CND_CONDITION(c != NULL, "c is NULL");

                  //Tell the boolean clause not to delete its query
                  c->deleteQuery=false;
                  //Clear the clauses list
                  clauses.clear();
                  _CLDELETE(c);

                  return q;
            }else{
                  return GetBooleanQuery(clauses);
            }
      }

      Query* QueryParser::MatchClause(const TCHAR* field){
      //Func - matches for CLAUSE
      //       CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>))
      //Pre  - field != NULL
      //Post -

            Query* q = NULL;
            const TCHAR* sfield = field;
            bool delField = false;

            QueryToken *DelToken = NULL;

            //match for [TERM <COLON>]
            QueryToken* term = tokens->extract();
            if(term->Type == QueryToken::TERM && tokens->peek()->Type == QueryToken::COLON){
                  DelToken = MatchQueryToken(QueryToken::COLON);

                  CND_CONDITION(DelToken != NULL,"DelToken is NULL");
                  _CLDELETE(DelToken);

                  TCHAR* tmp = STRDUP_TtoT(term->Value);
                  discardEscapeChar(tmp);
                  delField = true;
                  sfield = tmp;
                  _CLDELETE(term);
            }else{
                  tokens->push(term);
                  term = NULL;
            }

            // match for
            // TERM | (<LPAREN> QUERY <RPAREN>)
            if(tokens->peek()->Type == QueryToken::LPAREN){
                  DelToken = MatchQueryToken(QueryToken::LPAREN);

                  CND_CONDITION(DelToken != NULL,"DelToken is NULL");
                  _CLDELETE(DelToken);

                  q = MatchQuery(sfield);
                  //DSR:2004.11.01:
                  //If exception is thrown while trying to match trailing parenthesis,
                  //need to prevent q from leaking.

                  try{
                     DelToken = MatchQueryToken(QueryToken::RPAREN);

                     CND_CONDITION(DelToken != NULL,"DelToken is NULL");
                     _CLDELETE(DelToken);

                  }catch(...) {
                        _CLDELETE(q);
                        throw;
                  }
            }else{
                  q = MatchTerm(sfield);
            }

        if ( delField )
              _CLDELETE_CARRAY(sfield);
        return q;
      }


      Query* QueryParser::MatchTerm(const TCHAR* field){
      //Func - matches for TERM
      //       TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER
      //                [ <FUZZY> ] [ <CARAT> <NUMBER> [<FUZZY>]]
      //                      | (<RANGEIN> | <RANGEEX>) [<CARAT> <NUMBER>]
      //                      | <QUOTED> [SLOP] [<CARAT> <NUMBER>]
      //Pre  - field != NULL
      //Post -

            QueryToken* term = NULL;
            QueryToken* slop = NULL;
            QueryToken* boost = NULL;

            bool prefix = false;
            bool wildcard = false;
            bool fuzzy = false;
            bool rangein = false;
            Query* q = NULL;

            term = tokens->extract();
            QueryToken* DelToken = NULL; //Token that is about to be deleted

            switch(term->Type){
                  case QueryToken::TERM:
                  case QueryToken::NUMBER:
                  case QueryToken::PREFIXTERM:
                  case QueryToken::WILDTERM:
                  { //start case
                        //Check if type of QueryToken term is a prefix term
                        if(term->Type == QueryToken::PREFIXTERM){
                              prefix = true;
                        }
                        //Check if type of QueryToken term is a wildcard term
                        if(term->Type == QueryToken::WILDTERM){
                              wildcard = true;
                        }
                        //Peek to see if the type of the next token is fuzzy term
                        if(tokens->peek()->Type == QueryToken::FUZZY){
                              DelToken = MatchQueryToken(QueryToken::FUZZY);

                              CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
                              _CLDELETE(DelToken);

                              fuzzy = true;
                        }
                        if(tokens->peek()->Type == QueryToken::CARAT){
                              DelToken = MatchQueryToken(QueryToken::CARAT);

                              CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
                              _CLDELETE(DelToken);

                              boost = MatchQueryToken(QueryToken::NUMBER);

                              if(tokens->peek()->Type == QueryToken::FUZZY){
                                 DelToken = MatchQueryToken(QueryToken::FUZZY);

                                 CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
                                 _CLDELETE(DelToken);

                                 fuzzy = true;
                           }
                        } //end if type==CARAT

                        discardEscapeChar(term->Value); //clean up
                        if(wildcard){
                              q = GetWildcardQuery(field,term->Value);
                              break;
                        }else if(prefix){
                              //Create a PrefixQuery
                              term->Value[_tcslen(term->Value)-1] = 0; //discard the *
                              q = GetPrefixQuery(field,term->Value);
                              break;
                        }else if(fuzzy){
                              //Create a FuzzyQuery

                              //Check if the last char is a ~
                              if(term->Value[_tcslen(term->Value)-1] == '~'){
                                    //remove the ~
                                    term->Value[_tcslen(term->Value)-1] = '\0';
                              }

                              q = GetFuzzyQuery(field,term->Value);
                              break;
                        }else{
                              q = GetFieldQuery(field, term->Value);
                              break;
                        }
                  }


                  case QueryToken::RANGEIN:
                  case QueryToken::RANGEEX:{
                        if(term->Type == QueryToken::RANGEIN){
                              rangein = true;
                        }

                        if(tokens->peek()->Type == QueryToken::CARAT){
                              DelToken = MatchQueryToken(QueryToken::CARAT);

                              CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
                              _CLDELETE(DelToken);

                              boost = MatchQueryToken(QueryToken::NUMBER);
                        }

                        TCHAR* noBrackets = term->Value + 1;
                        noBrackets[_tcslen(noBrackets)-1] = 0;
                        q = ParseRangeQuery(field, noBrackets, rangein);
                        break;
                  }


                  case QueryToken::QUOTED:{
                        if(tokens->peek()->Type == QueryToken::SLOP){
                              slop = MatchQueryToken(QueryToken::SLOP);
                        }

                        if(tokens->peek()->Type == QueryToken::CARAT){
                              DelToken = MatchQueryToken(QueryToken::CARAT);

                              CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
                              _CLDELETE(DelToken);

                              boost = MatchQueryToken(QueryToken::NUMBER);
                        }

                        //remove the quotes
                        TCHAR* quotedValue = term->Value+1;
                        quotedValue[_tcslen(quotedValue)-1] = '\0';

                        int32_t islop = phraseSlop;
                        if(slop != NULL ){
                           try {
                       TCHAR* end; //todo: should parse using float...
                                 islop = (int32_t)_tcstoi64(slop->Value+1, &end, 10);
                           }catch(...){
                                 //ignored
                           }
                        }

                        q = GetFieldQuery(field, quotedValue, islop);
                        _CLDELETE(slop);
                  }
            } // end of switch

            _CLDELETE(term);


            if( q!=NULL && boost != NULL ){
                  float_t f = 1.0F;
                  try {
                        TCHAR* tmp;
                        f = _tcstod(boost->Value, &tmp);
                  }catch(...){
                        //ignored
                  }
                  _CLDELETE(boost);

                  q->setBoost( f);
            }

            return q;
      }

00465       QueryToken* QueryParser::MatchQueryToken(QueryToken::Types expectedType){
      //Func - matches for QueryToken of the specified type and returns it
      //       otherwise Exception throws
      //Pre  - tokens != NULL
      //Post -

            CND_PRECONDITION(tokens != NULL,"tokens is NULL");

            if(tokens->count() == 0){
                  throwParserException(_T("Error: Unexpected end of program"),' ',0,0);
            }

        //Extract a token form the TokenList tokens
        QueryToken* t = tokens->extract();
        //Check if the type of the token t matches the expectedType
        if (expectedType != t->Type){
              TCHAR buf[200];
              _sntprintf(buf,200,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType);
              _CLDELETE(t);
              throwParserException(buf,' ',0,0);
            }

        //Return the matched token
        return t;
      }

00491       void QueryParser::ExtractAndDeleteToken(void){
      //Func - Extracts the first token from the Tokenlist tokenlist
      //       and destroys it
      //Pre  - true
      //Post - The first token has been extracted and destroyed

            CND_PRECONDITION(tokens != NULL, "tokens is NULL");

            //Extract the token from the TokenList tokens
            QueryToken* t = tokens->extract();
            //Condition Check Token may not be NULL
            CND_CONDITION(t != NULL, "Token is NULL");
            //Delete Token
            _CLDELETE(t);
      }

CL_NS_END

Generated by  Doxygen 1.6.0   Back to index