Logo Search packages:      
Sourcecode: qt4-x11 version File versions

Sort.h

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_search_Sort_
#define _lucene_search_Sort_

#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif

#include "CLucene/index/IndexReader.h"
#include "SearchHeader.h"

CL_NS_DEF(search)

 class SortField; //predefine
 class Sort;

/**
 * Expert: Compares two ScoreDoc objects for sorting.
 *
 */
00026  class ScoreDocComparator:LUCENE_BASE {
 protected:
       ScoreDocComparator(){}
 public:
       virtual ~ScoreDocComparator();
//    CL_NS(util)::Comparable** cachedValues;
//    ScoreDocComparator(CL_NS(util)::Comparable** cachedValues);

      /**
       * Compares two ScoreDoc objects and returns a result indicating their
       * sort order.
       * @param i First ScoreDoc
       * @param j Second ScoreDoc
       * @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
       * @see java.util.Comparator
       */
    virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0;

      /**
       * Returns the value used to sort the given document.  The
       * object returned must implement the java.io.Serializable
       * interface.  This is used by multisearchers to determine how to collate results from their searchers.
       * @see FieldDoc
       * @param i Document
       * @return Serializable object
       */
    virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0;

      
      /**
       * Returns the type of sort.  Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>, 
       * <code>SortField::FLOAT</code> or <code>SortField.CUSTOM</code>.  It is not valid to return <code>SortField.AUTO</code>.
       * This is used by multisearchers to determine how to collate results from their searchers.
       * @return One of the constants in SortField.
       * @see SortField
       */
    virtual int32_t sortType() = 0;

      /** Special comparator for sorting hits according to computed relevance (document score). */
00065       static ScoreDocComparator* RELEVANCE;

      /** Special comparator for sorting hits according to index order (document number). */
00068       static ScoreDocComparator* INDEXORDER;
 };

/**
 * Expert: returns a comparator for sorting ScoreDocs.
 *
 */
00075 class SortComparatorSource:LUCENE_BASE {
public:
   virtual ~SortComparatorSource(){
   }

   /**
   * return a reference to a string describing the name of the comparator
   * this is used in the explanation
   */
   virtual TCHAR* getName() = 0;

   virtual size_t hashCode() = 0;

  /**
   * Creates a comparator for the field in the given index.
   * @param reader Index to create comparator for.
   * @param fieldname  Field to create comparator for.
   * @return Comparator of ScoreDoc objects.
   * @throws IOException If an error occurs reading the index.
   */
   virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname) = 0;
};


/**
 * Abstract base class for sorting hits returned by a Query.
 *
 * <p>This class should only be used if the other SortField
 * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
 * adequate sorting.  It maintains an internal cache of values which
 * could be quite large.  The cache is an array of Comparable,
 * one for each document in the index.  There is a distinct
 * Comparable for each unique term in the field - if
 * some documents have the same term in the field, the cache
 * array will have entries which reference the same Comparable.
 *
 */
00112 class SortComparator: public SortComparatorSource {
public:
      virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname);
  
      SortComparator();
      virtual ~SortComparator();

  /**
   * Returns an object which, when sorted according to natural order,
   * will order the Term values in the correct order.
   * <p>For example, if the Terms contained integer values, this method
   * would return <code>new Integer(termtext)</code>.  Note that this
   * might not always be the most efficient implementation - for this
   * particular example, a better implementation might be to make a
   * ScoreDocLookupComparator that uses an internal lookup table of int.
   * @param termtext The textual value of the term.
   * @return An object representing <code>termtext</code> that sorts 
   * according to the natural order of <code>termtext</code>.
   * @see Comparable
   * @see ScoreDocComparator
   */
   virtual CL_NS(util)::Comparable* getComparable (const TCHAR* termtext) = 0;

};


/**
 * Stores information about how to sort documents by terms in an individual
 * field.  Fields must be indexed in order to sort by them.
 *
 */
00143 class SortField:LUCENE_BASE {
private:
  const TCHAR* field;
  int32_t type;  // defaults to determining type dynamically
  //Locale* locale;    // defaults to "natural order" (no Locale)
  bool reverse;  // defaults to natural order
  SortComparatorSource* factory;

protected:
  SortField (const SortField& clone);
public:
   virtual ~SortField();

  /** Sort by document score (relevancy).  Sort values are Float and higher
   * values are at the front. 
   * PORTING: this is the same as SCORE in java, it had to be renamed because
   * SCORE is a system macro on some platforms (AIX).
   */
   LUCENE_STATIC_CONSTANT(int32_t, DOCSCORE=0);
   
  /** Sort by document number (index order).  Sort values are Integer and lower
   * values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, DOC=1);

  /** Guess type of sort based on field contents.  A regular expression is used
   * to look at the first term indexed for the field and determine if it
   * represents an integer number, a floating point number, or just arbitrary
   * string characters. */
   LUCENE_STATIC_CONSTANT(int32_t, AUTO=2);

  /** Sort using term values as Strings.  Sort values are String and lower
   * values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, STRING=3);

  /** Sort using term values as encoded Integers.  Sort values are Integer and
   * lower values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, INT=4);

  /** Sort using term values as encoded Floats.  Sort values are Float and
   * lower values are at the front. */
   LUCENE_STATIC_CONSTANT(int32_t, FLOAT=5);

  /** Sort using a custom Comparator.  Sort values are any Comparable and
   * sorting is done according to natural order. */
   LUCENE_STATIC_CONSTANT(int32_t, CUSTOM=9);

  // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
  // as the above static int values.  Any new values must not have the same value
  // as FieldCache.STRING_INDEX.

  /** Represents sorting by document score (relevancy). */
00194   static SortField* FIELD_SCORE;

  /** Represents sorting by document number (index order). */
00197   static SortField* FIELD_DOC;

  SortField (const TCHAR* field);
  //SortField (const TCHAR* field, bool reverse);
  //todo: we cannot make reverse use default field of =false.
  //because bool and int are the same type in c, overloading is not possible
  SortField (const TCHAR* field, int32_t type, bool reverse); 

  /*
   SortField (TCHAR* field, Locale* locale) {
   SortField (TCHAR* field, Locale* locale, bool reverse);*/

  SortField (const TCHAR* field, SortComparatorSource* comparator, bool reverse=false);

  /** Returns the name of the field.  Could return <code>null</code>
   * if the sort is by SCORE or DOC.
   * @return Name of field, possibly <code>null</code>.
   */
00215   const TCHAR* getField() const { return field; }
  
  SortField* clone() const;

  /** Returns the type of contents in the field.
   * @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
   */
00222   int32_t getType() const { return type; }

  /** Returns the Locale by which term values are interpreted.
   * May return <code>null</code> if no Locale was specified.
   * @return Locale, or <code>null</code>.
   */
  /*Locale getLocale() {
    return locale;
  }*/

  /** Returns whether the sort should be reversed.
   * @return  True if natural order should be reversed.
   */
00235   bool getReverse() const { return reverse; }

  SortComparatorSource* getFactory() { return factory; }

  TCHAR* toString() const;
};



/**
 * Encapsulates sort criteria for returned hits.
 *
 * <p>The fields used to determine sort order must be carefully chosen.
 * Documents must contain a single term in such a field,
 * and the value of the term should indicate the document's relative position in
 * a given sort order.  The field must be indexed, but should not be tokenized,
 * and does not need to be stored (unless you happen to want it back with the
 * rest of your document data).  In other words:
 *
 * <dl><dd><code>document.add (new Field ("byNumber", Integer.toString(x), false, true, false));</code>
 * </dd></dl>
 *
 * <p><h3>Valid Types of Values</h3>
 *
 * <p>There are three possible kinds of term values which may be put into
 * sorting fields: Integers, Floats, or Strings.  Unless
 * {@link SortField SortField} objects are specified, the type of value
 * in the field is determined by parsing the first term in the field.
 *
 * <p>Integer term values should contain only digits and an optional
 * preceeding negative sign.  Values must be base 10 and in the range
 * <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
 * Documents which should appear first in the sort
 * should have low value integers, later documents high values
 * (i.e. the documents should be numbered <code>1..n</code> where
 * <code>1</code> is the first and <code>n</code> the last).
 *
 * <p>Float term values should conform to values accepted by
 * {@link Float Float.valueOf(String)} (except that <code>NaN</code>
 * and <code>Infinity</code> are not supported).
 * Documents which should appear first in the sort
 * should have low values, later documents high values.
 *
 * <p>String term values can contain any valid String, but should
 * not be tokenized.  The values are sorted according to their
 * {@link Comparable natural order}.  Note that using this type
 * of term value has higher memory requirements than the other
 * two types.
 *
 * <p><h3>Object Reuse</h3>
 *
 * <p>One of these objects can be
 * used multiple times and the sort order changed between usages.
 *
 * <p>This class is thread safe.
 *
 * <p><h3>Memory Usage</h3>
 *
 * <p>Sorting uses of caches of term values maintained by the
 * internal HitQueue(s).  The cache is static and contains an integer
 * or float array of length <code>IndexReader.maxDoc()</code> for each field
 * name for which a sort is performed.  In other words, the size of the
 * cache in bytes is:
 *
 * <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
 *
 * <p>For String fields, the cache is larger: in addition to the
 * above array, the value of every term in the field is kept in memory.
 * If there are many unique terms in the field, this could
 * be quite large.
 *
 * <p>Note that the size of the cache is not affected by how many
 * fields are in the index and <i>might</i> be used to sort - only by
 * the ones actually used to sort a result set.
 *
 * <p>The cache is cleared each time a new <code>IndexReader</code> is
 * passed in, or if the value returned by <code>maxDoc()</code>
 * changes for the current IndexReader.  This class is not set up to
 * be able to efficiently sort hits from more than one index
 * simultaneously.
 *
 */
00317 class Sort:LUCENE_BASE {
      // internal representation of the sort criteria
      SortField** fields;
      void clear();
public:
      ~Sort();

      /** Represents sorting by computed relevance. Using this sort criteria
       * returns the same results as calling {@link Searcher#search(Query) Searcher#search()}
       * without a sort criteria, only with slightly more overhead. */
00327       static Sort* RELEVANCE;

      /** Represents sorting by index order. */
00330       static Sort* INDEXORDER;

      Sort();
      Sort (const TCHAR* field, bool reverse=false);
      Sort (const TCHAR** fields);
      Sort (SortField* field);
      Sort (SortField** fields);
      void setSort (const TCHAR* field, bool reverse=false);
      void setSort (const TCHAR** fieldnames);
      void setSort (SortField* field);
      void setSort (SortField** fields);

    TCHAR* toString() const;
 
    /**
    * Representation of the sort criteria.
    * @return a pointer to the of SortField array used in this sort criteria
    */
00348     SortField** getSort() const{ return fields; }
};



 

CL_NS_END
#endif

Generated by  Doxygen 1.6.0   Back to index