Logo Search packages:      
Sourcecode: qt4-x11 version File versions

SegmentReader.cpp

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
*
* Changes are Copyright(C) 2007 by Trolltech ASA, all rights reserved.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "SegmentHeader.h"

#include "FieldInfos.h"
#include "FieldsReader.h"
#include "IndexReader.h"
#include "TermInfosReader.h"
#include "Terms.h"
#include "CLucene/search/Similarity.h"

CL_NS_USE(util)
CL_NS_USE(store)
CL_NS_USE(document)
CL_NS_USE(search)
CL_NS_DEF(index)

 SegmentReader::Norm::Norm(IndexInput* instrm, int32_t n, SegmentReader* r, const char* seg): 
      number(n), 
      reader(r), 
      segment(seg), 
      in(instrm),
      bytes(NULL), 
      dirty(false){
  //Func - Constructor
  //Pre  - instrm is a valid reference to an IndexInput
  //Post - A Norm instance has been created with an empty bytes array

       bytes = NULL;
     dirty = false;
  }

  SegmentReader::Norm::~Norm() {
  //Func - Destructor
  //Pre  - true
  //Post - The IndexInput in has been deleted (and closed by its destructor) 
  //       and the array too.

      //Close and destroy the inputstream in-> The inputstream will be closed
      // by its destructor. Note that the IndexInput 'in' actually is a pointer!!!!!  
      _CLDELETE(in);

        //Delete the bytes array
      _CLDELETE_ARRAY(bytes);

  }

  void SegmentReader::Norm::reWrite(){
      char buf[CL_MAX_PATH];
      char fileName[CL_MAX_PATH];
      sprintf(buf,"%s.tmp",segment);

      // NOTE: norms are re-written in regular directory, not cfs
      IndexOutput* out = reader->getDirectory()->createOutput(buf);
      try {
        out->writeBytes(bytes, reader->maxDoc());
      }_CLFINALLY( out->close(); _CLDELETE(out) );

        if ( reader->cfsReader == NULL )
            sprintf(fileName,"%s.f%d",segment,number);
        else // use a different file name if we have compound format
            sprintf(fileName,"%s.s%d",segment,number);
      reader->getDirectory()->renameFile(buf, fileName);
      this->dirty = false;
    }

00074   SegmentReader::SegmentReader(SegmentInfo* si) : 
      //Init the superclass IndexReader
      IndexReader(si->getDir()),
        _norms(false,false)
  { 
      initialize(si);
  }

  SegmentReader::SegmentReader(SegmentInfos* sis, SegmentInfo* si) : 
      //Init the superclass IndexReader
      IndexReader(si->getDir(),sis,false),
        _norms(false,false)
  { 
      initialize(si);
  }

  void SegmentReader::initialize(SegmentInfo* si){
      //Pre  - si-> is a valid reference to SegmentInfo instance
      //       identified by si->
      //Post - All files of the segment have been read

      deletedDocs      = NULL;
        ones                     = NULL;
        //There are no documents yet marked as deleted
      deletedDocsDirty = false;
      
      normsDirty=false;
      undeleteAll=false;

        //Duplicate the name of the segment from SegmentInfo to segment
      segment          = STRDUP_AtoA(si->name);
        // make sure that all index files have been read or are kept open
      // so that if an index update removes them we'll still have them
      freqStream       = NULL;
      proxStream       = NULL;
      
        //instantiate a buffer large enough to hold a directory path
      char buf[CL_MAX_PATH];

      // Use compound file directory for some files, if it exists
      Directory* cfsDir = getDirectory();
      SegmentName(buf, CL_MAX_PATH, ".cfs");
      if (cfsDir->fileExists(buf)) {
         cfsReader = _CLNEW CompoundFileReader(cfsDir, buf);
         cfsDir = cfsReader;
        }else
             cfsReader = NULL;

        //Create the name of the field info file with suffix .fnm in buf
      SegmentName(buf, CL_MAX_PATH, ".fnm");
      fieldInfos = _CLNEW FieldInfos(cfsDir, buf );

      //Condition check to see if fieldInfos points to a valid instance
      CND_CONDITION(fieldInfos != NULL,"No memory could be allocated for fieldInfos");

        //Create the name of the frequence file with suffix .frq in buf
      SegmentName(buf,CL_MAX_PATH, ".frq");

        //Open an IndexInput freqStream to the frequency file
#ifdef LUCENE_FS_MMAP
        if ( cfsDir->getDirectoryType() == FSDirectory::DirectoryType() ){
              FSDirectory* fsdir = (FSDirectory*)cfsDir;
              freqStream = fsdir->openMMapFile( buf );
      } else if (strcmp(cfsDir->getDirectoryType(), "CFS") == 0) { //todo: we should have a CFS Directory
          freqStream = cfsDir->openInput(buf,true);
        }else
#endif
            freqStream = cfsDir->openInput( buf );

      //Condition check to see if freqStream points to a valid instance and was able to open the
        //frequency file
      CND_CONDITION(freqStream != NULL, "IndexInput freqStream could not open the frequency file");

        //Create the name of the prox file with suffix .prx in buf
      SegmentName(buf, CL_MAX_PATH,".prx");

        //Open an IndexInput proxStream to the prox file
#ifdef LUCENE_FS_MMAP
    if (cfsDir->getDirectoryType() == FSDirectory::DirectoryType()) {
        FSDirectory* fsdir = (FSDirectory*)cfsDir;
        proxStream = fsdir->openMMapFile( buf );
    } else if (strcmp(cfsDir->getDirectoryType(), "CFS") == 0) {
        proxStream = cfsDir->openInput(buf,true);
    } else
#endif
      proxStream = cfsDir->openInput( buf );

        //Condition check to see if proxStream points to a valid instance and was able to open the
        //prox file
      CND_CONDITION(proxStream != NULL, "IndexInput proxStream could not open proximity file");

        //Instantiate a FieldsReader for reading the Field Info File
      fieldsReader = _CLNEW FieldsReader(cfsDir, segment, fieldInfos);

      //Condition check to see if fieldsReader points to a valid instance 
      CND_CONDITION(fieldsReader != NULL,"No memory could be allocated for fieldsReader");

        //Instantiate a TermInfosReader for reading the Term Dictionary .tis file
      tis = _CLNEW TermInfosReader(cfsDir, segment, fieldInfos);

      //Condition check to see if tis points to a valid instance 
      CND_CONDITION(tis != NULL,"No memory could be allocated for tis");

        //Check if the segment has deletion according to the SegmentInfo instance si->
      // NOTE: the bitvector is stored using the regular directory, not cfs
      if (hasDeletions(si)){
              //Create a deletion file with suffix .del          
          SegmentName(buf, CL_MAX_PATH,".del");
              //Instantiate a BitVector that manages which documents have been deleted
          deletedDocs = _CLNEW BitSet(getDirectory(), buf );
       }

        //Open the norm file. There's a norm file for each indexed field with a byte for each document. 
        //The .f[0-9]* file contains, for each document, a byte that encodes a value 
        //that is multiplied into the score for hits on that field
      openNorms(cfsDir);

      if (fieldInfos->hasVectors()) { // open term vector files only as needed
         termVectorsReaderOrig = _CLNEW TermVectorsReader(cfsDir, segment, fieldInfos);
      }else
             termVectorsReaderOrig = NULL;
  }

00197   SegmentReader::~SegmentReader(){
  //Func - Destructor.
  //Pre  - doClose has been invoked!
  //Post - the instance has been destroyed

      doClose(); //this means that index reader doesn't need to be closed manually

      _CLDELETE(fieldInfos);
        _CLDELETE(fieldsReader);
      _CLDELETE(tis);         
        _CLDELETE(freqStream);
        _CLDELETE(proxStream);
        _CLDELETE_CaARRAY(segment);
        _CLDELETE(deletedDocs);
        _CLDELETE_ARRAY(ones);
     _CLDELETE(termVectorsReaderOrig)
     _CLDECDELETE(cfsReader);
    //termVectorsLocal->unregister(this);
  }

00217   void SegmentReader::doCommit(){
   char bufdel[CL_MAX_PATH];
   strcpy(bufdel,segment);
   strcat(bufdel,".del");

    if (deletedDocsDirty) {               // re-write deleted 
            char buftmp[CL_MAX_PATH];
            strcpy(buftmp,segment);
            strcat(buftmp,".tmp");
            deletedDocs->write(getDirectory(), buftmp);
            getDirectory()->renameFile(buftmp,bufdel);
    }
    if(undeleteAll && getDirectory()->fileExists(bufdel)){
      getDirectory()->deleteFile(bufdel, true);
    }
    if (normsDirty) {               // re-write norms 
        NormsType::iterator itr = _norms.begin();
      while (itr != _norms.end()) {
        Norm* norm = itr->second;
        if (norm->dirty) {
          norm->reWrite();
        }
        ++itr;
      }
    }
    deletedDocsDirty = false;
    normsDirty = false;
    undeleteAll = false;
  }
  
00247   void SegmentReader::doClose() {
  //Func - Closes all streams to the files of a single segment
  //Pre  - fieldsReader != NULL
  //       tis != NULL
  //Post - All streams to files have been closed

      CND_PRECONDITION(fieldsReader != NULL, "fieldsReader is NULL");
      CND_PRECONDITION(tis != NULL, "tis is NULL");

        //Close the fieldsReader
      fieldsReader->close();
        //Close the TermInfosReader
      tis->close();

        //Close the frequency stream
        if (freqStream != NULL){
          freqStream->close();
        }
        //Close the prox stream
        if (proxStream != NULL){
         proxStream->close();
         }

        //Close the norm file
      closeNorms();
    
     if (termVectorsReaderOrig != NULL) 
        termVectorsReaderOrig->close();

     if (cfsReader != NULL)
         cfsReader->close();
  }

00280   bool SegmentReader::hasDeletions()  const{
      return deletedDocs != NULL;
  }

  //static 
  bool SegmentReader::usesCompoundFile(SegmentInfo* si) {
    char buf[CL_MAX_PATH];
    strcpy(buf,si->name);
    strcat(buf,".cfs");
    return si->getDir()->fileExists(buf);
  }
  
  //static
00293   bool SegmentReader::hasSeparateNorms(SegmentInfo* si) {
      vector<string> names;
      si->getDir()->fileList(&names);
      
    char pattern[CL_MAX_PATH];
    strcpy(pattern,si->name);
    strcat(pattern,".s");
    size_t patternLength = strlen(pattern);

    string res;
      bool ret=false;
      vector<string>::iterator itr = names.begin();
    while ( itr != names.end() ){
            if ( (*itr).length()>patternLength && strncmp((*itr).c_str(),pattern,patternLength) == 0 ){
                  if ( (*itr).at(patternLength) >= '0' && (*itr).at(patternLength) <= '9' )
                        return true;
            }
            itr++;
    }
    return false;
  }

00315   bool SegmentReader::hasDeletions(const SegmentInfo* si) {
  //Func - Static method
  //       Checks if a segment managed by SegmentInfo si-> has deletions
  //Pre  - si-> holds a valid reference to an SegmentInfo instance
  //Post - if the segement contains deleteions true is returned otherwise flas

        //Create a buffer f of length CL_MAX_PATH
      char f[CL_MAX_PATH];
      //SegmentReader::segmentname(f, si->name,_T(".del"),-1 );
      //create the name of the deletion file
        Misc::segmentname(f,CL_MAX_PATH, si->name,".del",-1 );
        //Check if the deletion file exists and return the result
      return si->getDir()->fileExists( f );
  }

      //synchronized
00331   void SegmentReader::doDelete(const int32_t docNum){
  //Func - Marks document docNum as deleted
  //Pre  - docNum >=0 and DocNum < maxDoc() 
  //       docNum contains the number of the document that must be 
  //       marked deleted
  //Post - The document identified by docNum has been marked deleted

      SCOPED_LOCK_MUTEX(THIS_LOCK)
      
     CND_PRECONDITION(docNum >= 0, "docNum is a negative number");
     CND_PRECONDITION(docNum < maxDoc(), "docNum is bigger than the total number of documents");

        //Check if deletedDocs exists
        if (deletedDocs == NULL){
          deletedDocs = _CLNEW BitSet(maxDoc());

          //Condition check to see if deletedDocs points to a valid instance
          CND_CONDITION(deletedDocs != NULL,"No memory could be allocated for deletedDocs");
        }
      //Flag that there are documents marked deleted
      deletedDocsDirty = true;
      undeleteAll = false;
      //Mark document identified by docNum as deleted
      deletedDocs->set(docNum);

  }

00358   void SegmentReader::doUndeleteAll(){
      _CLDELETE(deletedDocs);
      deletedDocsDirty = false;
      undeleteAll = true;
  }

00364   void SegmentReader::files(CL_NS(util)::AStringArrayWithDeletor& retarray) {
  //Func - Returns all file names managed by this SegmentReader
  //Pre  - segment != NULL
  //Post - All filenames managed by this SegmentRead have been returned
 
     CND_PRECONDITION(segment != NULL, "segment is NULL");

     char* temp = NULL;
     #define _ADD_SEGMENT(ext) temp = SegmentName( ext ); if ( getDirectory()->fileExists(temp) ) retarray.push_back(temp); else _CLDELETE_CaARRAY(temp);
                                                
     //Add the name of the Field Info file
     _ADD_SEGMENT(".cfs" );
     _ADD_SEGMENT(".fnm" );
     _ADD_SEGMENT(".fdx" );
     _ADD_SEGMENT(".fdt" );
     _ADD_SEGMENT(".tii" );
     _ADD_SEGMENT(".tis" );
     _ADD_SEGMENT(".frq" );
     _ADD_SEGMENT(".prx" );
     _ADD_SEGMENT(".del" );
     _ADD_SEGMENT(".tvx" );
     _ADD_SEGMENT(".tvd" );
     _ADD_SEGMENT(".tvf" );
     _ADD_SEGMENT(".tvp" );

      //iterate through the field infos
                  FieldInfo* fi;
      for (int32_t i = 0; i < fieldInfos->size(); ++i) {
          //Get the field info for the i-th field   
          fi = fieldInfos->fieldInfo(i);
          //Check if the field has been indexed
          if (fi->isIndexed && !fi->omitNorms){
                    char* name;
                    if ( cfsReader == NULL )
                          name = SegmentName(".f", i);
                    else
                          name = SegmentName(".s", i);
              //The field has been indexed so add its norm file
              if ( getDirectory()->fileExists(name) )
                          retarray.push_back( name );
                    else
                          _CLDELETE_CaARRAY(name);
          }
       }
  }

00410   TermEnum* SegmentReader::terms() const {
  //Func - Returns an enumeration of all the Terms and TermInfos in the set. 
  //Pre  - tis != NULL
  //Post - An enumeration of all the Terms and TermInfos in the set has been returned

      CND_PRECONDITION(tis != NULL, "tis is NULL");

      return tis->terms();
  }

00420   TermEnum* SegmentReader::terms(const Term* t) const {
  //Func - Returns an enumeration of terms starting at or after the named term t 
  //Pre  - t != NULL
  //       tis != NULL
  //Post - An enumeration of terms starting at or after the named term t 

      CND_PRECONDITION(t   != NULL, "t is NULL");
      CND_PRECONDITION(tis != NULL, "tis is NULL");

      return tis->terms(t);
  }

  bool SegmentReader::document(int32_t n, Document* doc) {
  //Func - Returns a document identified by n
  //Pre  - n >=0 and identifies the document n
  //Post - if the document has been deleted then an exception has been thrown
  //       otherwise a reference to the found document has been returned

      SCOPED_LOCK_MUTEX(THIS_LOCK)
      
      CND_PRECONDITION(n >= 0, "n is a negative number");

        //Check if the n-th document has been marked deleted
       if (isDeleted(n)){
          _CLTHROWA( CL_ERR_InvalidState,"attempt to access a deleted document" );
       }

         //Retrieve the n-th document
       return fieldsReader->doc(n, doc);
  }


00452   bool SegmentReader::isDeleted(const int32_t n){
  //Func - Checks if the n-th document has been marked deleted
  //Pre  - n >=0 and identifies the document n
  //Post - true has been returned if document n has been deleted otherwise fralse

      SCOPED_LOCK_MUTEX(THIS_LOCK)
      
      CND_PRECONDITION(n >= 0, "n is a negative number");

        //Is document n deleted
      bool ret = (deletedDocs != NULL && deletedDocs->get(n));

      return ret;
  }

00467   TermDocs* SegmentReader::termDocs() const {
  //Func - Returns an unpositioned TermDocs enumerator. 
  //Pre  - true
  //Post - An unpositioned TermDocs enumerator has been returned

       return _CLNEW SegmentTermDocs(this);
  }

00475   TermPositions* SegmentReader::termPositions() const {
  //Func - Returns an unpositioned TermPositions enumerator. 
  //Pre  - true
  //Post - An unpositioned TermPositions enumerator has been returned

      return _CLNEW SegmentTermPositions(this);
  }

00483   int32_t SegmentReader::docFreq(const Term* t) const {
  //Func - Returns the number of documents which contain the term t
  //Pre  - t holds a valid reference to a Term
  //Post - The number of documents which contain term t has been returned

      //Get the TermInfo ti for Term  t in the set
      TermInfo* ti = tis->get(t);
      //Check if an TermInfo has been returned
      if (ti){
              //Get the frequency of the term
          int32_t ret = ti->docFreq;
              //TermInfo ti is not needed anymore so delete it
          _CLDELETE( ti );
              //return the number of documents which containt term t
          return ret;
          }
        else
              //No TermInfo returned so return 0
          return 0;
  }

00504   int32_t SegmentReader::numDocs() {
  //Func - Returns the actual number of documents in the segment
  //Pre  - true
  //Post - The actual number of documents in the segments

        //Get the number of all the documents in the segment including the ones that have 
        //been marked deleted
      int32_t n = maxDoc();

        //Check if there any deleted docs
      if (deletedDocs != NULL)
              //Substract the number of deleted docs from the number returned by maxDoc
          n -= deletedDocs->count();

        //return the actual number of documents in the segment
      return n;
  }

00522   int32_t SegmentReader::maxDoc() const {
  //Func - Returns the number of  all the documents in the segment including
  //       the ones that have been marked deleted
  //Pre  - true
  //Post - The total number of documents in the segment has been returned

      return fieldsReader->size();
  }

void SegmentReader::getFieldNames(FieldOption fldOption, StringArrayWithDeletor& retarray){
      size_t len = fieldInfos->size();
      for (size_t i = 0; i < len; i++) {
            FieldInfo* fi = fieldInfos->fieldInfo(i);
            bool v=false;
            if (fldOption & IndexReader::ALL) {
                  v=true;
            }else {
                  if (!fi->isIndexed && (fldOption & IndexReader::UNINDEXED) )
                        v=true;
                  if (fi->isIndexed && (fldOption & IndexReader::INDEXED) )
                        v=true;
                  if (fi->isIndexed && fi->storeTermVector == false && ( fldOption & IndexReader::INDEXED_NO_TERMVECTOR) )
                        v=true;
                  if ( (fldOption & IndexReader::TERMVECTOR) &&
                            fi->storeTermVector == true &&
                              fi->storePositionWithTermVector == false &&
                              fi->storeOffsetWithTermVector == false )
                        v=true;
                  if (fi->isIndexed && fi->storeTermVector && (fldOption & IndexReader::INDEXED_WITH_TERMVECTOR) )
                        v=true;
                  if (fi->storePositionWithTermVector && fi->storeOffsetWithTermVector == false && 
                              (fldOption & IndexReader::TERMVECTOR_WITH_POSITION))
                        v=true;
                  if (fi->storeOffsetWithTermVector && fi->storePositionWithTermVector == false && 
                              (fldOption & IndexReader::TERMVECTOR_WITH_OFFSET) )
                        v=true;
                  if ((fi->storeOffsetWithTermVector && fi->storePositionWithTermVector) &&
                              (fldOption & IndexReader::TERMVECTOR_WITH_POSITION_OFFSET) )
                        v=true;
            }
            if ( v )
                  retarray.push_back(STRDUP_TtoT(fi->name));
      }
}

bool SegmentReader::hasNorms(const TCHAR* field) const{
      return _norms.find(field) != _norms.end();
}


00572   void SegmentReader::norms(const TCHAR* field, uint8_t* bytes) {
  //Func - Reads the Norms for field from disk starting at offset in the inputstream
  //Pre  - field != NULL
  //       bytes != NULL is an array of bytes which is to be used to read the norms into.
  //       it is advisable to have bytes initalized by zeroes!
  //Post - The if an inputstream to the norm file could be retrieved the bytes have been read
  //       You are never sure whether or not the norms have been read into bytes properly!!!!!!!!!!!!!!!!!

    CND_PRECONDITION(field != NULL, "field is NULL");
    CND_PRECONDITION(bytes != NULL, "field is NULL");

      SCOPED_LOCK_MUTEX(THIS_LOCK)
    
    Norm* norm = _norms.get(field);
      if ( norm == NULL ){
            memcpy(bytes, fakeNorms(), maxDoc());
            return;
      }
    if (norm->bytes != NULL) { // can copy from cache
        memcpy(bytes, norm->bytes, maxDoc());
      return;
    }

   IndexInput* _normStream = norm->in->clone();
   CND_PRECONDITION(_normStream != NULL, "normStream==NULL")

    // read from disk
    try{ 
       _normStream->seek(0);
       _normStream->readBytes(bytes, maxDoc());
    }_CLFINALLY(
        //Have the normstream closed
        _normStream->close();
        //Destroy the normstream
        _CLDELETE( _normStream );
      );    
  }

  uint8_t* SegmentReader::createFakeNorms(int32_t size) {
    uint8_t* ones = _CL_NEWARRAY(uint8_t,size);
      memset(ones, DefaultSimilarity::encodeNorm(1.0f), size);
    return ones;
  }

  uint8_t* SegmentReader::fakeNorms() {
    if (ones==NULL) 
            ones=createFakeNorms(maxDoc());
    return ones;
  }
  // can return null if norms aren't stored
  uint8_t* SegmentReader::getNorms(const TCHAR* field) {
      SCOPED_LOCK_MUTEX(THIS_LOCK)
    Norm* norm = _norms.get(field);
    if (norm == NULL) 
            return NULL;  // not indexed, or norms not stored

    if (norm->bytes == NULL) {                     // value not yet read
      uint8_t* bytes = _CL_NEWARRAY(uint8_t, maxDoc());
      norms(field, bytes);
      norm->bytes = bytes;                         // cache it
    }
    return norm->bytes;
  }

00636   uint8_t* SegmentReader::norms(const TCHAR* field) {
  //Func - Returns the bytes array that holds the norms of a named field
  //Pre  - field != NULL and contains the name of the field for which the norms 
  //       must be retrieved
  //Post - If there was norm for the named field then a bytes array has been allocated 
  //       and returned containing the norms for that field. If the named field is unknown NULL is returned.

    CND_PRECONDITION(field != NULL, "field is NULL");
    SCOPED_LOCK_MUTEX(THIS_LOCK)
      uint8_t* bytes = getNorms(field);
    if (bytes==NULL) 
            bytes=fakeNorms();
    return bytes;
  }

00651   void SegmentReader::doSetNorm(int32_t doc, const TCHAR* field, uint8_t value){
    Norm* norm = _norms.get(field);
    if (norm == NULL)                             // not an indexed field
      return;
    norm->dirty = true;                            // mark it dirty
    normsDirty = true;

    uint8_t* bits = norms(field);
    bits[doc] = value;                    // set the value
  }


00663   char* SegmentReader::SegmentName(const char* ext, const int32_t x){
  //Func - Returns an allocated buffer in which it creates a filename by 
  //       concatenating segment with ext and x
  //Pre    ext != NULL and holds the extension
  //       x contains a number
  //Post - A buffer has been instantiated an when x = -1 buffer contains the concatenation of 
  //       segment and ext otherwise buffer contains the contentation of segment, ext and x
      
        CND_PRECONDITION(ext     != NULL, "ext is NULL");

        //Create a buffer of length CL_MAX_PATH
        char* buf = _CL_NEWARRAY(char,CL_MAX_PATH);
        //Create the filename
      SegmentName(buf,CL_MAX_PATH,ext,x);
        
      return buf ;
  }

00681   void SegmentReader::SegmentName(char* buffer,int32_t bufferLen, const char* ext, const int32_t x ){
  //Func - Creates a filename in buffer by concatenating segment with ext and x
  //Pre  - buffer != NULL
  //       ext    != NULL
  //       x contains a number
  //Post - When x = -1 buffer contains the concatenation of segment and ext otherwise
  //       buffer contains the contentation of segment, ext and x

      CND_PRECONDITION(buffer  != NULL, "buffer is NULL");
      CND_PRECONDITION(segment != NULL, "Segment is NULL");

      Misc::segmentname(buffer,bufferLen,segment,ext,x);
  }
  void SegmentReader::openNorms(Directory* cfsDir) {
  //Func - Open all norms files for all fields
  //       Creates for each field a norm Instance with an open inputstream to 
  //       a corresponding norm file ready to be read
  //Pre  - true
  //Post - For each field a norm instance has been created with an open inputstream to
  //       a corresponding norm file ready to be read

      //Iterate through all the fields
      for (int32_t i = 0; i < fieldInfos->size(); i++) {
              //Get the FieldInfo for the i-th field
          FieldInfo* fi = fieldInfos->fieldInfo(i);
          //Check if the field is indexed
              if (fi->isIndexed && !fi->omitNorms ) {
                  //Allocate a buffer
              char fileName[CL_MAX_PATH];
                    
                    // look first if there are separate norms in compound format
              SegmentName(fileName,CL_MAX_PATH, ".s", fi->number);
                    Directory* d = getDirectory();
                    if(!d->fileExists(fileName)){
                        SegmentName(fileName,CL_MAX_PATH, ".f", fi->number);
                        d = cfsDir;
                    }
             
                    _norms.put(fi->name, _CLNEW Norm( d->openInput( fileName ),fi->number, this, segment ));
          }
      }
  }

  void SegmentReader::closeNorms() {
  //Func - Close all the norms stored in norms
  //Pre  - true
  //Post - All the norms have been destroyed

    SCOPED_LOCK_MUTEX(_norms.THIS_LOCK)
      //Create an interator initialized at the beginning of norms
      NormsType::iterator itr = _norms.begin();
      //Iterate through all the norms
    while (itr != _norms.end()) {
        //Get the norm
        Norm* n = itr->second;
        //delete the norm n
        _CLDELETE(n);
        //Move the interator to the next norm in the norms collection.
          //Note ++ is an overloaded operator
        ++itr;
     }
    _norms.clear(); //bvk: they're deleted, so clear them so that they are not re-used
  }


00746       TermVectorsReader* SegmentReader::getTermVectorsReader() {
            TermVectorsReader* tvReader = termVectorsLocal.get();
            if (tvReader == NULL) {
              tvReader = termVectorsReaderOrig->clone();
              termVectorsLocal.set(tvReader);
            }
            return tvReader;
      }

00755    TermFreqVector* SegmentReader::getTermFreqVector(int32_t docNumber, const TCHAR* field){
            if ( field != NULL ){
                  // Check if this field is invalid or has no stored term vector
                  FieldInfo* fi = fieldInfos->fieldInfo(field);
                  if (fi == NULL || !fi->storeTermVector || termVectorsReaderOrig == NULL ) 
                        return NULL;
            }
            TermVectorsReader* termVectorsReader = getTermVectorsReader();
            if (termVectorsReader == NULL)
              return NULL;
            return termVectorsReader->get(docNumber, field);
  }

00768    bool SegmentReader::getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result) {
    if (termVectorsReaderOrig == NULL)
      return false;
    
    TermVectorsReader* termVectorsReader = getTermVectorsReader();
    if (termVectorsReader == NULL)
      return false;
    
    return termVectorsReader->get(docNumber, result);
  }

CL_NS_END

Generated by  Doxygen 1.6.0   Back to index