Logo Search packages:      
Sourcecode: qt4-x11 version File versions

AnalysisHeader.cpp

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "AnalysisHeader.h"
#include "CLucene/util/StringBuffer.h"

CL_NS_USE(util)
CL_NS_DEF(analysis)

const TCHAR* Token::defaultType=_T("word");

Token::Token():
      _startOffset (0),
      _endOffset (0),
      _type ( defaultType ),
      positionIncrement (1)
{
    _termTextLen = 0;
#ifndef LUCENE_TOKEN_WORD_LENGTH
    _termText = NULL;
      bufferTextLen = 0;
#else
    _termText[0] = 0; //make sure null terminated
      bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
#endif
}

Token::~Token(){
#ifndef LUCENE_TOKEN_WORD_LENGTH
    free(_termText);
#endif
}

Token::Token(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ):
      _startOffset (start),
      _endOffset (end),
      _type ( typ ),
      positionIncrement (1)
{
    _termTextLen = 0;
#ifndef LUCENE_TOKEN_WORD_LENGTH
    _termText = NULL;
      bufferTextLen = 0;
#else
    _termText[0] = 0; //make sure null terminated
      bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
#endif
      setText(text);
}

void Token::set(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ){
      _startOffset = start;
      _endOffset   = end;
      _type        = typ;
      positionIncrement = 1;
      setText(text);
}

void Token::setText(const TCHAR* text){
      _termTextLen = _tcslen(text);
      
#ifndef LUCENE_TOKEN_WORD_LENGTH
      growBuffer(_termTextLen+1);
      _tcsncpy(_termText,text,_termTextLen+1);
#else
      if ( _termTextLen > LUCENE_TOKEN_WORD_LENGTH ){
      //in the case where this occurs, we will leave the endOffset as it is
      //since the actual word still occupies that space.
            _termTextLen=LUCENE_TOKEN_WORD_LENGTH;
      }
      _tcsncpy(_termText,text,_termTextLen+1);
#endif
      _termText[_termTextLen] = 0; //make sure null terminated
}

void Token::growBuffer(size_t size){
      if(bufferTextLen>size)
            return;
#ifndef LUCENE_TOKEN_WORD_LENGTH
      if ( _termText == NULL )
            _termText = (TCHAR*)malloc( size * sizeof(TCHAR) );
      else
            _termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) );
      bufferTextLen = size;
#else
      _CLTHROWA(CL_ERR_TokenMgr,"Couldn't grow Token buffer");
#endif
}

void Token::setPositionIncrement(int32_t posIncr) {
      if (posIncr < 0) {
            _CLTHROWA(CL_ERR_IllegalArgument,"positionIncrement must be >= 0");
      }
      positionIncrement = posIncr;
}

int32_t Token::getPositionIncrement() const { return positionIncrement; }

// Returns the Token's term text. 
const TCHAR* Token::termText() const{
      return (const TCHAR*) _termText; 
}
size_t Token::termTextLength() { 
      if ( _termTextLen == -1 ) //it was invalidated by growBuffer
            _termTextLen = _tcslen(_termText);
      return _termTextLen; 
}
void Token::resetTermTextLen(){
      _termTextLen=-1;
}
bool Token::OrderCompare::operator()( Token* t1, Token* t2 ) const{
      if(t1->startOffset()>t2->startOffset())
        return false;
    if(t1->startOffset()<t2->startOffset())
        return true;
      return true;
}
TCHAR* Token::toString() const{
      StringBuffer sb;
    sb.append(_T("("));
    sb.append( _termText );
    sb.append(_T(","));
    sb.appendInt( _startOffset );
    sb.append(_T(","));
    sb.appendInt( _endOffset );
    
    if (!_tcscmp( _type, _T("word")) == 0 ){
      sb.append(_T(",type="));
      sb.append(_type);
    }
    if (positionIncrement != 1){
      sb.append(_T(",posIncr="));
      sb.appendInt(positionIncrement);
    }
    sb.append(_T(")"));

    return sb.toString();
}


Token* TokenStream::next(){
      Token* t = _CLNEW Token; //deprecated
      if ( !next(t) )
            _CLDELETE(t);
      return t;
}


00153 TokenFilter::TokenFilter(TokenStream* in, bool deleteTS):
      input(in),
      deleteTokenStream(deleteTS)
{
}
TokenFilter::~TokenFilter(){
      close();
}

// Close the input TokenStream.
00163 void TokenFilter::close() {
    if ( input != NULL ){
            input->close();
        if ( deleteTokenStream )
                  _CLDELETE( input );
    }
    input = NULL;
}



00174 Tokenizer::Tokenizer() {
      input = NULL;
}

00178 Tokenizer::Tokenizer(CL_NS(util)::Reader* _input):
    input(_input)
{
}

00183 void Tokenizer::close(){
      if (input != NULL) {
            // ? delete input;
            input = NULL;
      }
}

Tokenizer::~Tokenizer(){ 
    close();
}


00195 int32_t Analyzer::getPositionIncrementGap(const TCHAR* fieldName)
{
      return 0;
}

CL_NS_END

Generated by  Doxygen 1.6.0   Back to index