UTF8Decoder

UTF-8 decoder to use with dlib.text.encodings.transcode

Members

Functions

decode
auto decode(string s)
auto decode()

Range interface.

decodeNext
int decodeNext()

Decode next character.

eos
bool eos()

Check if decoder is in the end of input.

Variables

character
int character;

Current character index

index
size_t index;

Current index in an input string

input
string input;

Input string. Set it before decoding

Examples

{
    auto decoder = UTF8Decoder("Eng 日本語 Кир ©€\xF0\x90\x8D\x88");
    assert(decoder.decodeNext() == 'E');
    assert(decoder.decodeNext() == 'n');
    assert(decoder.decodeNext() == 'g');
    assert(decoder.decodeNext() == ' ');
    assert(decoder.decodeNext() == '日');
    assert(decoder.decodeNext() == '本');
    assert(decoder.decodeNext() == '語');
    assert(decoder.decodeNext() == ' ');
    assert(decoder.decodeNext() == 'К');
    assert(decoder.decodeNext() == 'и');
    assert(decoder.decodeNext() == 'р');
    assert(decoder.decodeNext() == ' ');
    assert(decoder.decodeNext() == '©');
    assert(decoder.decodeNext() == '€');
    assert(decoder.decodeNext() == 0x10348);
    assert(decoder.decodeNext() == UTF8_END);
    assert(decoder.get() == UTF8_END);
    assert(decoder.eos());
}
{
    auto decoder = UTF8Decoder("日本語"[0..$-1]);
    assert(decoder.decodeNext() == '日');
    assert(decoder.decodeNext() == '本');
    assert(decoder.decodeNext() == UTF8_ERROR);
}

Meta