Logo Search packages:      
Sourcecode: qt4-x11 version File versions

Reader.cpp

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "Reader.h"

CL_NS_DEF(util)

StringReader::StringReader ( const TCHAR* value ):
  Reader(NULL,true){
        reader = new jstreams::StringReader<TCHAR>(value);
}
StringReader::StringReader ( const TCHAR* value, const int32_t length ):
  Reader(NULL,true){
        reader = new jstreams::StringReader<TCHAR>(value,length);
}
StringReader::StringReader ( const TCHAR* value, const int32_t length, bool copyData ):
  Reader(NULL,true){
        reader = new jstreams::StringReader<TCHAR>(value,length, copyData);
}
StringReader::~StringReader(){
}


FileReader::FileReader ( const char* path, const char* enc,
                                     const int32_t cachelen, const int32_t /*cachebuff*/ ):
      Reader(NULL, true)
{
      this->input = new jstreams::FileInputStream(path, cachelen);
      this->reader = new SimpleInputStreamReader(this->input,enc); //(this is a jstream object)
}

FileReader::~FileReader (){
    if (input) 
            delete input;
}
int32_t FileReader::read(const TCHAR*& start, int32_t _min, int32_t _max) {
    return reader->read(start, _min, _max);
}
int64_t FileReader::mark(int32_t readlimit) {
    return reader->mark(readlimit);
}
int64_t FileReader::reset(int64_t newpos) {
    return reader->reset(newpos);
}



SimpleInputStreamReader::SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* enc)
{
    finishedDecoding = false;
    input = i;
    charbuf.setSize(262);

      if ( strcmp(enc,"ASCII")==0 )
            encoding = ASCII;
#ifdef _UCS2
      else if ( strcmp(enc,"UTF-8")==0 )
            encoding = UTF8;
      else if ( strcmp(enc,"UCS-2LE")==0 )
            encoding = UCS2_LE;
#endif
      else
            _CLTHROWA(CL_ERR_IllegalArgument,"Unsupported encoding, use jstreams iconv based instead");

      mark(262);
    charsLeft = 0;
}
SimpleInputStreamReader::~SimpleInputStreamReader(){
      input = NULL;
}
int32_t SimpleInputStreamReader::decode(TCHAR* start, int32_t space){
      // decode from charbuf
    const char *inbuf = charbuf.readPos;
    const char *inbufend = charbuf.readPos + charbuf.avail;
    TCHAR *outbuf = start;
      const TCHAR *outbufend = outbuf + space;

      if ( encoding == ASCII ){
            while ( outbuf<outbufend && inbuf<inbufend ){
                  *outbuf = *inbuf;
                  outbuf++;
                  inbuf++;
            }
            
#ifdef _UCS2
      }
      else if ( encoding == UCS2_LE ){
            while ( outbuf<outbufend && (inbuf+1)<inbufend ){
                  uint8_t c1 = *inbuf;
                  uint8_t c2 = *(inbuf+1);
                  unsigned short c = c1 | (c2<<8);
                  
                  #ifdef _UCS2
                        *outbuf = c;
                  #else
                        *outbuf = LUCENE_OOR_CHAR(c);
                  #endif
                  outbuf++;
                  inbuf+=2;
            }

      }else if ( encoding == UTF8 ){
            while ( outbuf<outbufend && inbuf<inbufend ){
                  size_t utflen = lucene_utf8charlen(inbuf);
                  if ( utflen==0 ){
                        error = "Invalid multibyte sequence.";
                        status = jstreams::Error;
                        return -1;
                  }else if ( inbuf+utflen > inbufend ){
                        break; //character incomplete
                  }else{
                        size_t rd = lucene_utf8towc(outbuf,inbuf,inbufend-inbuf);
                        if ( rd == 0 ){
                              error = "Invalid multibyte sequence.";
                              status = jstreams::Error;
                              return -1;
                        }else{
                              inbuf+=rd;
                              outbuf++;
                        }
                  }
            }
#endif //_UCS2
      }else
            _CLTHROWA(CL_ERR_Runtime,"Unexpected encoding");

      if ( outbuf < outbufend ) { 
            //we had enough room to convert the entire input
            if ( inbuf < inbufend ) {
                  // last character is incomplete
                  // move from inbuf to the end to the start of
                  // the buffer
                  memmove(charbuf.start, inbuf, inbufend-inbuf);
                  charbuf.readPos = charbuf.start;
                  charbuf.avail = inbufend-inbuf;
            } else if ( outbuf < outbufend ) { //input sequence was completely converted
                  charbuf.readPos = charbuf.start;
                  charbuf.avail = 0;
                  if (input == NULL) {
                        finishedDecoding = true;
                  }
            }
      } else {
        charbuf.readPos += charbuf.avail - (inbufend-inbuf);
        charbuf.avail = inbufend-inbuf;
      }
    return outbuf-start;
}

int32_t SimpleInputStreamReader::fillBuffer(TCHAR* start, int32_t space) {
    // fill up charbuf
    if (input && charbuf.readPos == charbuf.start) {
        const char *begin;
        int32_t numRead;
        numRead = input->read(begin, 1, charbuf.size - charbuf.avail);
        //printf("filled up charbuf\n");
        if (numRead < -1) {
            error = input->getError();
            status = jstreams::Error;
            input = 0;
            return numRead;
        }
        if (numRead < 1) {
            // signal end of input buffer
            input = 0;
            if (charbuf.avail) {
                error = "stream ends on incomplete character";
                status = jstreams::Error;
            }
            return -1;
        }
        // copy data into other buffer
        memmove( charbuf.start + charbuf.avail, begin, numRead * sizeof(char));
        charbuf.avail = numRead + charbuf.avail;
    }
    // decode
    int32_t n = decode(start, space);
    //printf("decoded %i\n", n);
    return n;
}

CL_NS_END

Generated by  Doxygen 1.6.0   Back to index