Logo Search packages:      
Sourcecode: qt4-x11 version File versions

TermInfosWriter.cpp

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "TermInfosWriter.h"

#include "CLucene/store/Directory.h"
#include "CLucene/util/Misc.h"
#include "FieldInfos.h"
#include "Term.h"
#include "TermInfo.h"
#include "IndexWriter.h"

CL_NS_USE(util)
CL_NS_USE(store)
CL_NS_DEF(index)

      TermInfosWriter::TermInfosWriter(Directory* directory, const char* segment, FieldInfos* fis, int32_t interval):
        fieldInfos(fis){
    //Func - Constructor
      //Pre  - directory contains a valid reference to a Directory
    //       segment != NULL
      //       fis contains a valid reference to a reference FieldInfos
      //Post - The instance has been created

        CND_PRECONDITION(segment != NULL, "segment is NULL");
            //Initialize instance
        initialise(directory,segment,interval, false);

            other = _CLNEW TermInfosWriter(directory, segment,fieldInfos, interval, true);

            CND_CONDITION(other != NULL, "other is NULL");

            other->other = this;
      }

    TermInfosWriter::TermInfosWriter(Directory* directory, const char* segment, FieldInfos* fis, int32_t interval, bool isIndex):
          fieldInfos(fis){
    //Func - Constructor
      //Pre  - directory contains a valid reference to a Directory
    //       segment != NULL
      //       fis contains a valid reference to a reference FieldInfos
      //       isIndex is true or false
      //Post - The instance has been created

        CND_PRECONDITION(segment != NULL, "segment is NULL");
        initialise(directory,segment,interval,isIndex);
    }

    void TermInfosWriter::initialise(Directory* directory, const char* segment, int32_t interval, bool IsIndex){
    //Func - Helps constructors to initialize Instance
      //Pre  - directory contains a valid reference to a Directory
    //       segment != NULL
      //       fis contains a valid reference to a reference FieldInfos
      //Post - The instance has been initialized

            lastTerm = _CLNEW Term;

            CND_CONDITION(lastTerm != NULL, "Could not allocate memory for lastTerm");
    
            lastTi  = _CLNEW TermInfo();

        CND_CONDITION(lastTi != NULL, "Could not allocate memory for lastTi");

            lastIndexPointer = 0;
            size             = 0;
            isIndex          = IsIndex;
            indexInterval = interval;
            skipInterval = LUCENE_DEFAULT_TERMDOCS_SKIP_INTERVAL;

          const char* buf = Misc::segmentname(segment, (isIndex ? ".tii" : ".tis"));
          output = directory->createOutput( buf );
          _CLDELETE_CaARRAY(buf);

            output->writeInt(FORMAT);                      // write format
            output->writeLong(0);                          // leave space for size
            output->writeInt(indexInterval);// write indexInterval
            output->writeInt(skipInterval); // write skipInterval

        //Set other to NULL by Default
        other = NULL;
    }

      TermInfosWriter::~TermInfosWriter(){
    //Func - Destructor
      //Pre  - true
      //Post - de instance has been destroyed

            close();
      }
      
      void TermInfosWriter::add(Term* term, const TermInfo* ti) {
      //Func - Writes a Term and TermInfo to the outputstream
      //Pre  - Term must be lexicographically greater than all previous Terms added.
    //       Pointers of TermInfo ti (freqPointer and proxPointer) must be positive and greater than all previous.

            CND_PRECONDITION(isIndex || (!isIndex  && term->compareTo(lastTerm) > 0),"term out of order");
            CND_PRECONDITION(ti->freqPointer >= lastTi->freqPointer,"freqPointer out of order");
            CND_PRECONDITION(ti->proxPointer >= lastTi->proxPointer,"proxPointer out of order");

            if (!isIndex && size % indexInterval == 0){
            //add an index term
                  other->add(lastTerm, lastTi);
            }

            //write term
            writeTerm(term);                          
            // write doc freq
            output->writeVInt(ti->docFreq);             
            //write pointers
            output->writeVLong(ti->freqPointer - lastTi->freqPointer);
            output->writeVLong(ti->proxPointer - lastTi->proxPointer);
            if (ti->docFreq >= skipInterval) {
                  output->writeVInt(ti->skipOffset);
            }

            if (isIndex){
                  output->writeVLong(other->output->getFilePointer() - lastIndexPointer);
                  lastIndexPointer = other->output->getFilePointer(); // write pointer
            }

            lastTi->set(ti);
            size++;
      }
      
      void TermInfosWriter::close() {
    //Func - Closes the TermInfosWriter
      //Pre  - true
      //Post - The TermInfosWriter has been closed
    
            if (output){
                  //write size at start
                output->seek(4);          // write size after format
                output->writeLong(size);
                output->close();
               _CLDELETE(output);

               if (!isIndex){
                     if(other){
                        other->close();
                        _CLDELETE( other );
                        }
                  }
              _CLDECDELETE(lastTerm);

                  _CLDELETE(lastTi);
               }
      }

      void TermInfosWriter::writeTerm(Term* term) {
          int32_t start = Misc::stringDifference(lastTerm->text(),lastTerm->textLength(), 
                  term->text(),term->textLength());
            int32_t length = term->textLength() - start;
       
            output->writeVInt(start);                   // write shared prefix length
            output->writeVInt(length);                  // write delta length
            output->writeChars(term->text(), start, length);  // write delta chars

            int32_t fieldnum = fieldInfos->fieldNumber(term->field());
            CND_PRECONDITION(fieldnum>=-1&&fieldnum<fieldInfos->size(),"Fieldnum is out of range");
            output->writeVInt(fieldnum); // write field num

            if ( lastTerm->__cl_refcount == 1 ){
                  lastTerm->set(term,term->text());
            }else{
                  _CLDECDELETE(lastTerm);
                  lastTerm = _CL_POINTER(term);
            }
      }

CL_NS_END

Generated by  Doxygen 1.6.0   Back to index