Logo Search packages:      
Sourcecode: qt4-x11 version File versions  Download package

qjiscodec.cpp

/****************************************************************************
**
** Copyright (C) 1992-2008 Trolltech ASA. All rights reserved.
**
** This file is part of the plugins of the Qt Toolkit.
**
** This file may be used under the terms of the GNU General Public
** License versions 2.0 or 3.0 as published by the Free Software
** Foundation and appearing in the files LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file.  Alternatively you may (at
** your option) use any later version of the GNU General Public
** License if such license has been publicly approved by Trolltech ASA
** (or its successors, if any) and the KDE Free Qt Foundation. In
** addition, as a special exception, Trolltech gives you certain
** additional rights. These rights are described in the Trolltech GPL
** Exception version 1.1, which can be found at
** http://www.trolltech.com/products/qt/gplexception/ and in the file
** GPL_EXCEPTION.txt in this package.
**
** Please review the following information to ensure GNU General
** Public Licensing requirements will be met:
** http://trolltech.com/products/qt/licenses/licensing/opensource/. If
** you are unsure which license is appropriate for your use, please
** review the following information:
** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
** or contact the sales department at sales@trolltech.com.
**
** In addition, as a special exception, Trolltech, as the sole
** copyright holder for Qt Designer, grants users of the Qt/Eclipse
** Integration plug-in the right for the Qt/Eclipse Integration to
** link to functionality provided by Qt Designer and its related
** libraries.
**
** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
** A PARTICULAR PURPOSE. Trolltech reserves all rights not expressly
** granted herein.
**
** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
**
****************************************************************************/

// Most of the code here was originally written by Serika Kurusugawa,
// a.k.a. Junji Takagi, and is included in Qt with the author's permission
// and the grateful thanks of the Trolltech team.

/*! \class QJisCodec
    \reentrant
    \internal
*/

#include "qjiscodec.h"
#include "qlist.h"

#ifndef QT_NO_TEXTCODEC
enum {
    Esc = 0x1b,
    So = 0x0e,         // Shift Out
    Si = 0x0f,         // Shift In

    ReverseSolidus = 0x5c,
    YenSign = 0x5c,
    Tilde = 0x7e,
    Overline = 0x7e
};

#define        IsKana(c)        (((c) >= 0xa1) && ((c) <= 0xdf))
#define        IsJisChar(c)        (((c) >= 0x21) && ((c) <= 0x7e))

#define        QValidChar(u)        ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))

enum Iso2022State{ Ascii, MinState = Ascii,
                   JISX0201_Latin, JISX0201_Kana,
                   JISX0208_1978, JISX0208_1983,
                   JISX0212, MaxState = JISX0212,
                   UnknownState };

static const char Esc_CHARS[] = "()*+-./";

static const char Esc_Ascii[]                 = {Esc, '(', 'B', 0 };
static const char Esc_JISX0201_Latin[]        = {Esc, '(', 'J', 0 };
static const char Esc_JISX0201_Kana[]        = {Esc, '(', 'I', 0 };
static const char Esc_JISX0208_1978[]        = {Esc, '$', '@', 0 };
static const char Esc_JISX0208_1983[]        = {Esc, '$', 'B', 0 };
static const char Esc_JISX0212[]        = {Esc, '$', '(', 'D', 0 };
static const char * const Esc_SEQ[] = { Esc_Ascii,
                                        Esc_JISX0201_Latin,
                                        Esc_JISX0201_Kana,
                                        Esc_JISX0208_1978,
                                        Esc_JISX0208_1983,
                                        Esc_JISX0212 };

/*! \internal */
00095 QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
{
}


/*! \internal */
00101 QJisCodec::~QJisCodec()
{
    delete (QJpUnicodeConv*)conv;
    conv = 0;
}

00107 QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
{
    char replacement = '?';
    if (cs) {
        if (cs->flags & ConvertInvalidToNull)
            replacement = 0;
    }
    int invalid = 0;

    QByteArray result;
    Iso2022State state = Ascii;
    Iso2022State prev = Ascii;
    for (int i = 0; i < len; i++) {
        QChar ch = uc[i];
        uint j;
        if (ch.row() == 0x00 && ch.cell() < 0x80) {
            // Ascii
            if (state != JISX0201_Latin ||
                ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
                state = Ascii;
            }
            j = ch.cell();
        } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
            if (j < 0x80) {
                // JIS X 0201 Latin
                if (state != Ascii ||
                    ch.cell() == YenSign || ch.cell() == Overline) {
                    state = JISX0201_Latin;
                }
            } else {
                // JIS X 0201 Kana
                state = JISX0201_Kana;
                j &= 0x7f;
            }
        } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
            // JIS X 0208
            state = JISX0208_1983;
        } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
            // JIS X 0212
            state = JISX0212;
        } else {
            // Invalid
            state = UnknownState;
            j = replacement;
            ++invalid;
        }
        if (state != prev) {
            if (state == UnknownState) {
                result += Esc_Ascii;
            } else {
                result += Esc_SEQ[state - MinState];
            }
            prev = state;
        }
        if (j < 0x0100) {
            result += j & 0xff;
        } else {
            result += (j >> 8) & 0xff;
            result += j & 0xff;
        }
    }
    if (prev != Ascii) {
        result += Esc_Ascii;
    }

    if (cs) {
        cs->invalidChars += invalid;
    }
    return result;
}

00178 QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
{
    uchar buf[4] = {0, 0, 0, 0};
    int nbuf = 0;
    Iso2022State state = Ascii, prev = Ascii;
    bool esc = false;
    QChar replacement = QChar::ReplacementCharacter;
    if (cs) {
        if (cs->flags & ConvertInvalidToNull)
            replacement = QChar::Null;
        nbuf = cs->remainingChars;
        buf[0] = (cs->state_data[0] >> 24) & 0xff;
        buf[1] = (cs->state_data[0] >> 16) & 0xff;
        buf[2] = (cs->state_data[0] >>  8) & 0xff;
        buf[3] = (cs->state_data[0] >>  0) & 0xff;
        state = (Iso2022State)((cs->state_data[1] >>  0) & 0xff);
        prev = (Iso2022State)((cs->state_data[1] >>  8) & 0xff);
        esc = cs->state_data[2];
    }
    int invalid = 0;

    QString result;
    for (int i=0; i<len; i++) {
        uchar ch = chars[i];
        if (esc) {
            // Escape sequence
            state = UnknownState;
            switch (nbuf) {
            case 0:
                if (ch == '$' || strchr(Esc_CHARS, ch)) {
                    buf[nbuf++] = ch;
                } else {
                    nbuf = 0;
                    esc = false;
                }
                break;
            case 1:
                if (buf[0] == '$') {
                    if (strchr(Esc_CHARS, ch)) {
                        buf[nbuf++] = ch;
                    } else {
                        switch (ch) {
                        case '@':
                            state = JISX0208_1978;        // Esc $ @
                            break;
                        case 'B':
                            state = JISX0208_1983;        // Esc $ B
                            break;
                        }
                        nbuf = 0;
                        esc = false;
                    }
                } else {
                    if (buf[0] == '(') {
                        switch (ch) {
                        case 'B':
                            state = Ascii;        // Esc (B
                            break;
                        case 'I':
                            state = JISX0201_Kana;        // Esc (I
                            break;
                        case 'J':
                            state = JISX0201_Latin;        // Esc (J
                            break;
                        }
                    }
                    nbuf = 0;
                    esc = false;
                }
                break;
            case 2:
                if (buf[1] == '(') {
                    switch (ch) {
                    case 'D':
                        state = JISX0212;        // Esc $ (D
                        break;
                    }
                }
                nbuf = 0;
                esc = false;
                break;
            }
        } else {
            if (ch == Esc) {
                // Escape sequence
                nbuf = 0;
                esc = true;
            } else if (ch == So) {
                // Shift out
                prev = state;
                state = JISX0201_Kana;
                nbuf = 0;
            } else if (ch == Si) {
                // Shift in
                if (prev == Ascii || prev == JISX0201_Latin) {
                    state = prev;
                } else {
                    state = Ascii;
                }
                nbuf = 0;
            } else {
                uint u;
                switch (nbuf) {
                case 0:
                    switch (state) {
                    case Ascii:
                        if (ch < 0x80) {
                            result += QLatin1Char(ch);
                            break;
                        }
                        /* fall through */
                    case JISX0201_Latin:
                        u = conv->jisx0201ToUnicode(ch);
                        result += QValidChar(u);
                        break;
                    case JISX0201_Kana:
                        u = conv->jisx0201ToUnicode(ch | 0x80);
                        result += QValidChar(u);
                        break;
                    case JISX0208_1978:
                    case JISX0208_1983:
                    case JISX0212:
                        buf[nbuf++] = ch;
                        break;
                    default:
                        result += QChar::ReplacementCharacter;
                        break;
                    }
                    break;
                case 1:
                    switch (state) {
                    case JISX0208_1978:
                    case JISX0208_1983:
                        u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
                        result += QValidChar(u);
                        break;
                    case JISX0212:
                        u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
                        result += QValidChar(u);
                        break;
                    default:
                        result += replacement;
                        ++invalid;
                        break;
                    }
                    nbuf = 0;
                    break;
                }
            }
        }
    }

    if (cs) {
        cs->remainingChars = nbuf;
        cs->invalidChars += invalid;
        cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
        cs->state_data[1] = (prev << 8) + state;
        cs->state_data[2] = esc;
    }

    return result;
}



/*! \internal */
00344 int QJisCodec::_mibEnum()
{
    return 39;
}

/*! \internal */
00350 QByteArray QJisCodec::_name()
{
    return "ISO-2022-JP";
}

/*!
    Returns the codec's mime name.
*/
00358 QList<QByteArray> QJisCodec::_aliases()
{
    QList<QByteArray> list;
    list << "JIS7"; // Qt 3 compat
    return list;
}

#endif // QT_NO_TEXTCODEC

Generated by  Doxygen 1.6.0   Back to index