Logo Search packages:      
Sourcecode: qt4-x11 version File versions

SloppyPhraseScorer.cpp

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "SloppyPhraseScorer.h"

#include "PhraseScorer.h"
#include "CLucene/index/Terms.h"

CL_NS_USE(index)
CL_NS_DEF(search)

  SloppyPhraseScorer::SloppyPhraseScorer(Weight* weight, CL_NS(index)::TermPositions** tps, 
                  int32_t* positions, Similarity* similarity, 
                  int32_t slop, uint8_t* norms):
      PhraseScorer(weight,tps,positions,similarity,norms){
  //Func - Constructor
  //Pre  - tps != NULL 
  //       tpsLength >= 0
  //       n != NULL
  //Post - Instance has been created

      CND_PRECONDITION(tps != NULL, "tps is NULL");
      //CND_PRECONDITION(n != NULL, _T("n is NULL")) = checked in PhraseScorer;

      this->slop = slop;
  }

  float_t SloppyPhraseScorer::phraseFreq() {
  //Func - Returns the freqency of the phrase
  //Pre  - first != NULL
  //       last  != NULL
  //       pq    != NULL
  //Post - The frequency of the phrase has been returned

        CND_PRECONDITION(first != NULL,"first is NULL");
        CND_PRECONDITION(last  != NULL,"last is NULL");
        CND_PRECONDITION(pq    != NULL,"pq is NULL");

        //Clear the PhraseQueue pq;
      pq->clear();

      int32_t end = 0;

        //declare iterator
      PhrasePositions* pp = NULL;

      // build pq from list

        //Sort the list of PhrasePositions using pq
      for (pp = first; pp != NULL; pp = pp->_next) {
              //Read the first TermPosition of the current PhrasePositions pp
          pp->firstPosition();
              //Check if the position of the pp is bigger than end
              if (pp->position > end){
              end = pp->position;
                  }
          //Store the current PhrasePositions pp into the PhraseQueue pp
          pq->put(pp);        
          }

     float_t freq = 0.0f;
     
       bool done = false;
     
       do {
             //Pop a PhrasePositions pp from the PhraseQueue pp
         pp = pq->pop();
         //Get start position
         int32_t start = pp->position;
             //Get next position
             int32_t next = pq->top()->position;

         for (int32_t pos = start; pos <= next; pos = pp->position) {
             //advance pp to min window
             start = pos;                         
             
             if (!pp->nextPosition()) {
                         //ran out of a term -- done
                 done = true;
                 break;
                 }
             }
         
         //Calculate matchLength
             int32_t matchLength = end - start;
         //Check if matchLength is smaller than slop
         if (matchLength <= slop){
             // penalize longer matches
             freq += 1.0 / (matchLength + 1);     
             }

             if (pp->position > end){
             end = pp->position;
             }
         
         //restore pq
             pq->put(pp);                         
       }while (!done);

     return freq;
  }
CL_NS_END

Generated by  Doxygen 1.6.0   Back to index