Logo Search packages:      
Sourcecode: qt4-x11 version File versions

IndexSearcher.cpp

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "IndexSearcher.h"

#include "SearchHeader.h"
#include "Scorer.h"
#include "FieldDocSortedHitQueue.h"
#include "CLucene/store/Directory.h"
#include "CLucene/document/Document.h"
#include "CLucene/index/IndexReader.h"
#include "CLucene/index/Term.h"
#include "CLucene/util/BitSet.h"
#include "FieldSortedHitQueue.h"

CL_NS_USE(index)
CL_NS_USE(util)
CL_NS_USE(document)

CL_NS_DEF(search)

      class SimpleTopDocsCollector:public HitCollector{ 
      private:
            float_t minScore;
            const CL_NS(util)::BitSet* bits;
            HitQueue* hq;
            size_t nDocs;
            int32_t* totalHits;
      public:
            SimpleTopDocsCollector(const CL_NS(util)::BitSet* bs, HitQueue* hitQueue, int32_t* totalhits, size_t ndocs, const float_t ms=-1.0f):
            minScore(ms),
            bits(bs),
            hq(hitQueue),
            nDocs(ndocs),
            totalHits(totalhits)
      {
      }
            ~SimpleTopDocsCollector(){}
            void collect(const int32_t doc, const float_t score){
            if (score > 0.0f &&                   // ignore zeroed buckets
                  (bits==NULL || bits->get(doc))) {     // skip docs not in bits
                  ++totalHits[0];
                  if (hq->size() < nDocs || (minScore==-1.0f || score >= minScore)) {
                        ScoreDoc sd = {doc, score};
                        hq->insert(sd);     // update hit queue
                        if ( minScore != -1.0f )
                              minScore = hq->top().score; // maintain minScore
                  }
            }
      }
      };

      class SortedTopDocsCollector:public HitCollector{ 
      private:
            const CL_NS(util)::BitSet* bits;
            FieldSortedHitQueue* hq;
            size_t nDocs;
            int32_t* totalHits;
      public:
            SortedTopDocsCollector(const CL_NS(util)::BitSet* bs, FieldSortedHitQueue* hitQueue, int32_t* totalhits, size_t _nDocs):
            bits(bs),
            hq(hitQueue),
            nDocs(_nDocs),
            totalHits(totalhits)
      {
      }
            ~SortedTopDocsCollector(){
            }
            void collect(const int32_t doc, const float_t score){
            if (score > 0.0f &&                   // ignore zeroed buckets
                  (bits==NULL || bits->get(doc))) {     // skip docs not in bits
                  ++totalHits[0];
                  FieldDoc* fd = _CLNEW FieldDoc(doc, score); //todo: see jlucene way... with fields def???
                  if ( !hq->insert(fd) )    // update hit queue
                        _CLDELETE(fd);
            }
      }
      };

      class SimpleFilteredCollector: public HitCollector{
      private:
            CL_NS(util)::BitSet* bits;
            HitCollector* results;
      public:
            SimpleFilteredCollector(CL_NS(util)::BitSet* bs, HitCollector* collector):
            bits(bs),
            results(collector)
        {
        }
            ~SimpleFilteredCollector(){
            }
      protected:
            void collect(const int32_t doc, const float_t score){
            if (bits->get(doc)) {           // skip docs not in bits
                results->collect(doc, score);
            }
        }
      };


00105   IndexSearcher::IndexSearcher(const char* path){
  //Func - Constructor
  //       Creates a searcher searching the index in the named directory.  */
  //Pre  - path != NULL
  //Post - The instance has been created

      CND_PRECONDITION(path != NULL, "path is NULL");

      reader = IndexReader::open(path);
      readerOwner = true;
  }
  
00117   IndexSearcher::IndexSearcher(CL_NS(store)::Directory* directory){
  //Func - Constructor
  //       Creates a searcher searching the index in the specified directory.  */
  //Pre  - path != NULL
  //Post - The instance has been created

      CND_PRECONDITION(directory != NULL, "directory is NULL");

      reader = IndexReader::open(directory);
      readerOwner = true;
  }

  IndexSearcher::IndexSearcher(IndexReader* r){
  //Func - Constructor
  //       Creates a searcher searching the index with the provide IndexReader
  //Pre  - path != NULL
  //Post - The instance has been created

      reader      = r;
      readerOwner = false;
  }

  IndexSearcher::~IndexSearcher(){
  //Func - Destructor
  //Pre  - true
  //Post - The instance has been destroyed

        close();
  }

00147   void IndexSearcher::close(){
  //Func - Frees resources associated with this Searcher.
  //Pre  - true
  //Post - The resources associated have been freed
      if (readerOwner && reader){
          reader->close();
          _CLDELETE(reader);
      }
  }

  // inherit javadoc
  int32_t IndexSearcher::docFreq(const Term* term) const{
  //Func - 
  //Pre  - reader != NULL
  //Post -

      CND_PRECONDITION(reader != NULL, "reader is NULL");

      return reader->docFreq(term);
  }

  
  // inherit javadoc
00170   bool IndexSearcher::doc(int32_t i, CL_NS(document)::Document* d) {
  //Func - Retrieves i-th document found
  //       For use by HitCollector implementations.
  //Pre  - reader != NULL
  //Post - The i-th document has been returned

      CND_PRECONDITION(reader != NULL, "reader is NULL");

      return reader->document(i,d);
  }

  // inherit javadoc
00182   int32_t IndexSearcher::maxDoc() const {
  //Func - Return total number of documents including the ones marked deleted
  //Pre  - reader != NULL
  //Post - The total number of documents including the ones marked deleted 
  //       has been returned

      CND_PRECONDITION(reader != NULL, "reader is NULL");

      return reader->maxDoc();
  }

00193   TopDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs){
  //Func -
  //Pre  - reader != NULL
  //Post -

      CND_PRECONDITION(reader != NULL, "reader is NULL");
      CND_PRECONDITION(query != NULL, "query is NULL");

        Weight* weight = query->weight(this);
      Scorer* scorer = weight->scorer(reader);
        if (scorer == NULL){
          return _CLNEW TopDocs(0, NULL, 0);
        }

      BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
      HitQueue* hq = _CLNEW HitQueue(nDocs);

        //Check hq has been allocated properly
        CND_CONDITION(hq != NULL, "Could not allocate memory for HitQueue hq");

        int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
      totalHits[0] = 0;

      SimpleTopDocsCollector hitCol(bits,hq,totalHits,nDocs,0.0f);
      scorer->score( &hitCol );
      _CLDELETE(scorer);

      int32_t scoreDocsLength = hq->size();

            ScoreDoc* scoreDocs = _CL_NEWARRAY(ScoreDoc,scoreDocsLength);

            for (int32_t i = scoreDocsLength-1; i >= 0; --i)        // put docs in array
                  scoreDocs[i] = hq->pop();

      int32_t totalHitsInt = totalHits[0];

      _CLDELETE(hq);
        if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
            _CLDELETE(bits);
      _CLDELETE_ARRAY(totalHits);
        Query* wq = weight->getQuery();
        if ( query != wq ) //query was re-written
              _CLLDELETE(wq);
        _CLDELETE(weight);

      return _CLNEW TopDocs(totalHitsInt, scoreDocs, scoreDocsLength);
  }

  // inherit javadoc
00242   TopFieldDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs,
         const Sort* sort) {
             
      CND_PRECONDITION(reader != NULL, "reader is NULL");
      CND_PRECONDITION(query != NULL, "query is NULL");

    Weight* weight = query->weight(this);
    Scorer* scorer = weight->scorer(reader);
    if (scorer == NULL){
            return _CLNEW TopFieldDocs(0, NULL, 0, sort->getSort() );
      }

    BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
    FieldSortedHitQueue hq(reader, sort->getSort(), nDocs);
    int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
      totalHits[0]=0;
    
      SortedTopDocsCollector hitCol(bits,&hq,totalHits,nDocs);
      scorer->score(&hitCol);
    _CLDELETE(scorer);

      int32_t hqLen = hq.size();
    FieldDoc** fieldDocs = _CL_NEWARRAY(FieldDoc*,hqLen);
      for (int32_t i = hqLen-1; i >= 0; --i){     // put docs in array
        fieldDocs[i] = hq.fillFields (hq.pop());
      }

    Query* wq = weight->getQuery();
      if ( query != wq ) //query was re-written
            _CLLDELETE(wq);
      _CLDELETE(weight);

    SortField** hqFields = hq.getFields();
      hq.setFields(NULL); //move ownership of memory over to TopFieldDocs
    int32_t totalHits0 = totalHits[0];
      if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
            _CLDELETE(bits);
    _CLDELETE_ARRAY(totalHits);
    return _CLNEW TopFieldDocs(totalHits0, fieldDocs, hqLen, hqFields );
  }

00283   void IndexSearcher::_search(Query* query, Filter* filter, HitCollector* results){
  //Func - _search an index and fetch the results
  //       Applications should only use this if they need all of the
  //       matching documents.  The high-level search API (search(Query)) is usually more efficient, 
  //       as it skips non-high-scoring hits.
  //Pre  - query is a valid reference to a query
  //       filter may or may not be NULL
  //       results is a valid reference to a HitCollector and used to store the results
  //Post - filter if non-NULL, a bitset used to eliminate some documents

      CND_PRECONDITION(reader != NULL, "reader is NULL");
      CND_PRECONDITION(query != NULL, "query is NULL");

      BitSet* bits = NULL;
      SimpleFilteredCollector* fc = NULL; 

      if (filter != NULL){
          bits = filter->bits(reader);
          fc = _CLNEW SimpleFilteredCollector(bits, results);
       }

      Weight* weight = query->weight(this);
      Scorer* scorer = weight->scorer(reader);
      if (scorer != NULL) {
              if (fc == NULL){
              scorer->score(results);
              }else{
              scorer->score((HitCollector*)fc);
              }
          _CLDELETE(scorer); 
      }

    _CLDELETE(fc);
      _CLDELETE(weight);
      if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
            _CLDELETE(bits);
  }

  Query* IndexSearcher::rewrite(Query* original) {
        Query* query = original;
            Query* last = original;
        for (Query* rewrittenQuery = query->rewrite(reader); 
                        rewrittenQuery != query;
                        rewrittenQuery = query->rewrite(reader)) {
                  query = rewrittenQuery;
                  if ( query != last && last != original ){
                        _CLDELETE(last);
                  }
                  last = query;
        }
        return query;
    }

00336     void IndexSearcher::explain(Query* query, int32_t doc, Explanation* ret){
        Weight* weight = query->weight(this);
        weight->explain(reader, doc, ret);

        Query* wq = weight->getQuery();
          if ( query != wq ) //query was re-written
              _CLLDELETE(wq);
        _CLDELETE(weight);
    }

CL_NS_END

Generated by  Doxygen 1.6.0   Back to index