25 Localization library [localization]

25.4 Standard locale categories [locale.categories]

25.4.1 The ctype category [category.ctype]

namespace std {
  class ctype_base {
  public:
    using mask = T;

    // numeric values are for exposition only.
    static const mask space = 1 << 0;
    static const mask print = 1 << 1;
    static const mask cntrl = 1 << 2;
    static const mask upper = 1 << 3;
    static const mask lower = 1 << 4;
    static const mask alpha = 1 << 5;
    static const mask digit = 1 << 6;
    static const mask punct = 1 << 7;
    static const mask xdigit = 1 << 8;
    static const mask blank = 1 << 9;
    static const mask alnum = alpha | digit;
    static const mask graph = alnum | punct;
  };
}

The type mask is a bitmask type.

25.4.1.1 Class template ctype [locale.ctype]

namespace std {
  template <class charT>
    class ctype : public locale::facet, public ctype_base {
    public:
      using char_type = charT;

      explicit ctype(size_t refs = 0);

      bool         is(mask m, charT c) const;
      const charT* is(const charT* low, const charT* high, mask* vec) const;
      const charT* scan_is(mask m, const charT* low, const charT* high) const;
      const charT* scan_not(mask m, const charT* low, const charT* high) const;
      charT        toupper(charT c) const;
      const charT* toupper(charT* low, const charT* high) const;
      charT        tolower(charT c) const;
      const charT* tolower(charT* low, const charT* high) const;

      charT        widen(char c) const;
      const char*  widen(const char* low, const char* high, charT* to) const;
      char         narrow(charT c, char dfault) const;
      const charT* narrow(const charT* low, const charT* high, char dfault, char* to) const;

      static locale::id id;

    protected:
      ~ctype();
      virtual bool         do_is(mask m, charT c) const;
      virtual const charT* do_is(const charT* low, const charT* high, mask* vec) const;
      virtual const charT* do_scan_is(mask m, const charT* low, const charT* high) const;
      virtual const charT* do_scan_not(mask m, const charT* low, const charT* high) const;
      virtual charT        do_toupper(charT) const;
      virtual const charT* do_toupper(charT* low, const charT* high) const;
      virtual charT        do_tolower(charT) const;
      virtual const charT* do_tolower(charT* low, const charT* high) const;
      virtual charT        do_widen(char) const;
      virtual const char*  do_widen(const char* low, const char* high, charT* dest) const;
      virtual char         do_narrow(charT, char dfault) const;
      virtual const charT* do_narrow(const charT* low, const charT* high,
                                     char dfault, char* dest) const;
    };
}

Class ctype encapsulates the C library <cctype> features. istream members are required to use ctype<> for character classing during input parsing.

The specializations required in Table 69 ([locale.category]), namely ctype<char> and ctype<wchar_­t>, implement character classing appropriate to the implementation's native character set.

25.4.1.1.1 ctype members [locale.ctype.members]

bool is(mask m, charT c) const; const charT* is(const charT* low, const charT* high, mask* vec) const;

Returns: do_­is(m, c) or do_­is(low, high, vec).

const charT* scan_is(mask m, const charT* low, const charT* high) const;

Returns: do_­scan_­is(m, low, high).

const charT* scan_not(mask m, const charT* low, const charT* high) const;

Returns: do_­scan_­not(m, low, high).

charT toupper(charT) const; const charT* toupper(charT* low, const charT* high) const;

Returns: do_­toupper(c) or do_­toupper(low, high).

charT tolower(charT c) const; const charT* tolower(charT* low, const charT* high) const;

Returns: do_­tolower(c) or do_­tolower(low, high).

charT widen(char c) const; const char* widen(const char* low, const char* high, charT* to) const;

Returns: do_­widen(c) or do_­widen(low, high, to).

char narrow(charT c, char dfault) const; const charT* narrow(const charT* low, const charT* high, char dfault, char* to) const;

Returns: do_­narrow(c, dfault) or do_­narrow(low, high, dfault, to).

25.4.1.1.2 ctype virtual functions [locale.ctype.virtuals]

bool do_is(mask m, charT c) const; const charT* do_is(const charT* low, const charT* high, mask* vec) const;

Effects: Classifies a character or sequence of characters. For each argument character, identifies a value M of type ctype_­base​::​mask. The second form identifies a value M of type ctype_­base​::​mask for each *p where (low <= p && p < high), and places it into vec[p - low].

Returns: The first form returns the result of the expression (M & m) != 0; i.e., true if the character has the characteristics specified. The second form returns high.

const charT* do_scan_is(mask m, const charT* low, const charT* high) const;

Effects: Locates a character in a buffer that conforms to a classification m.

Returns: The smallest pointer p in the range [low, high) such that is(m, *p) would return true; otherwise, returns high.

const charT* do_scan_not(mask m, const charT* low, const charT* high) const;

Effects: Locates a character in a buffer that fails to conform to a classification m.

Returns: The smallest pointer p, if any, in the range [low, high) such that is(m, *p) would return false; otherwise, returns high.

charT do_toupper(charT c) const; const charT* do_toupper(charT* low, const charT* high) const;

Effects: Converts a character or characters to upper case. The second form replaces each character *p in the range [low, high) for which a corresponding upper-case character exists, with that character.

Returns: The first form returns the corresponding upper-case character if it is known to exist, or its argument if not. The second form returns high.

charT do_tolower(charT c) const; const charT* do_tolower(charT* low, const charT* high) const;

Effects: Converts a character or characters to lower case. The second form replaces each character *p in the range [low, high) and for which a corresponding lower-case character exists, with that character.

Returns: The first form returns the corresponding lower-case character if it is known to exist, or its argument if not. The second form returns high.

charT do_widen(char c) const; const char* do_widen(const char* low, const char* high, charT* dest) const;

Effects: Applies the simplest reasonable transformation from a char value or sequence of char values to the corresponding charT value or values.233 The only characters for which unique transformations are required are those in the basic source character set.

For any named ctype category with a ctype <charT> facet ctc and valid ctype_­base​::​mask value M, (ctc.​is(M, c) || !is(M, do_­widen(c)) ) is true.234

The second form transforms each character *p in the range [low, high), placing the result in dest[p - low].

Returns: The first form returns the transformed value. The second form returns high.

char do_narrow(charT c, char dfault) const; const charT* do_narrow(const charT* low, const charT* high, char dfault, char* dest) const;

Effects: Applies the simplest reasonable transformation from a charT value or sequence of charT values to the corresponding char value or values.

For any character c in the basic source character set the transformation is such that

do_widen(do_narrow(c, 0)) == c

For any named ctype category with a ctype<char> facet ctc however, and ctype_­base​::​mask value M,

(is(M, c) || !ctc.is(M, do_narrow(c, dfault)) )

is true (unless do_­narrow returns dfault). In addition, for any digit character c, the expression (do_­narrow(c, dfault) - '0') evaluates to the digit value of the character. The second form transforms each character *p in the range [low, high), placing the result (or dfault if no simple transformation is readily available) in dest[p - low].

Returns: The first form returns the transformed value; or dfault if no mapping is readily available. The second form returns high.

The char argument of do_­widen is intended to accept values derived from character literals for conversion to the locale's encoding.

In other words, the transformed character is not a member of any character classification that c is not also a member of.

25.4.1.2 Class template ctype_­byname [locale.ctype.byname]

namespace std {
  template <class charT>
    class ctype_byname : public ctype<charT> {
    public:
      using mask = typename ctype<charT>::mask;
      explicit ctype_byname(const char*, size_t refs = 0);
      explicit ctype_byname(const string&, size_t refs = 0);
    protected:
      ~ctype_byname();
    };
}

25.4.1.3 ctype specializations [facet.ctype.special]

namespace std {
  template <>
    class ctype<char> : public locale::facet, public ctype_base {
    public:
      using char_type = char;

      explicit ctype(const mask* tab = 0, bool del = false,
                     size_t refs = 0);

      bool is(mask m, char c) const;
      const char* is(const char* low, const char* high, mask* vec) const;
      const char* scan_is (mask m,
                           const char* low, const char* high) const;
      const char* scan_not(mask m,
                           const char* low, const char* high) const;

      char        toupper(char c) const;
      const char* toupper(char* low, const char* high) const;
      char        tolower(char c) const;
      const char* tolower(char* low, const char* high) const;

      char  widen(char c) const;
      const char* widen(const char* low, const char* high, char* to) const;
      char  narrow(char c, char dfault) const;
      const char* narrow(const char* low, const char* high, char dfault,
                         char* to) const;

      static locale::id id;
      static const size_t table_size = implementation-defined;

      const mask* table() const noexcept;
      static const mask* classic_table() noexcept;

    protected:
      ~ctype();
      virtual char        do_toupper(char c) const;
      virtual const char* do_toupper(char* low, const char* high) const;
      virtual char        do_tolower(char c) const;
      virtual const char* do_tolower(char* low, const char* high) const;

      virtual char        do_widen(char c) const;
      virtual const char* do_widen(const char* low,
                                   const char* high,
                                   char* to) const;
      virtual char        do_narrow(char c, char dfault) const;
      virtual const char* do_narrow(const char* low,
                                    const char* high,
                                    char dfault, char* to) const;
    };
}

A specialization ctype<char> is provided so that the member functions on type char can be implemented inline.235 The implementation-defined value of member table_­size is at least 256.

Only the char (not unsigned char and signed char) form is provided. The specialization is specified in the standard, and not left as an implementation detail, because it affects the derivation interface for ctype<char>.

25.4.1.3.1 ctype<char> destructor [facet.ctype.char.dtor]

~ctype();

Effects: If the constructor's first argument was nonzero, and its second argument was true, does delete [] table().

25.4.1.3.2 ctype<char> members [facet.ctype.char.members]

In the following member descriptions, for unsigned char values v where v >= table_­size, table()[v] is assumed to have an implementation-specific value (possibly different for each such value v) without performing the array lookup.

explicit ctype(const mask* tbl = 0, bool del = false, size_t refs = 0);

Requires: tbl either 0 or an array of at least table_­size elements.

Effects: Passes its refs argument to its base class constructor.

bool is(mask m, char c) const; const char* is(const char* low, const char* high, mask* vec) const;

Effects: The second form, for all *p in the range [low, high), assigns into vec[p - low] the value table()[(unsigned char)*p].

Returns: The first form returns table()[(unsigned char)c] & m; the second form returns high.

const char* scan_is(mask m, const char* low, const char* high) const;

Returns: The smallest p in the range [low, high) such that

table()[(unsigned char) *p] & m

is true.

const char* scan_not(mask m, const char* low, const char* high) const;

Returns: The smallest p in the range [low, high) such that

table()[(unsigned char) *p] & m

is false.

char toupper(char c) const; const char* toupper(char* low, const char* high) const;

Returns: do_­toupper(c) or do_­toupper(low, high), respectively.

char tolower(char c) const; const char* tolower(char* low, const char* high) const;

Returns: do_­tolower(c) or do_­tolower(low, high), respectively.

char widen(char c) const; const char* widen(const char* low, const char* high, char* to) const;

Returns: do_­widen(c) or do_­widen(low, high, to), respectively.

char narrow(char c, char dfault) const; const char* narrow(const char* low, const char* high, char dfault, char* to) const;

Returns: do_­narrow(c, dfault) or do_­narrow(low, high, dfault, to), respectively.

const mask* table() const noexcept;

Returns: The first constructor argument, if it was nonzero, otherwise classic_­table().

25.4.1.3.3 ctype<char> static members [facet.ctype.char.statics]

static const mask* classic_table() noexcept;

Returns: A pointer to the initial element of an array of size table_­size which represents the classifications of characters in the "C" locale.

25.4.1.3.4 ctype<char> virtual functions [facet.ctype.char.virtuals]

char        do_toupper(char) const;
const char* do_toupper(char* low, const char* high) const;
char        do_tolower(char) const;
const char* do_tolower(char* low, const char* high) const;

virtual char        do_widen(char c) const;
virtual const char* do_widen(const char* low,
                             const char* high,
                             char* to) const;
virtual char        do_narrow(char c, char dfault) const;
virtual const char* do_narrow(const char* low,
                              const char* high,
                              char dfault, char* to) const;

These functions are described identically as those members of the same name in the ctype class template.

25.4.1.4 Class template codecvt [locale.codecvt]

namespace std {
  class codecvt_base {
  public:
    enum result { ok, partial, error, noconv };
  };

  template <class internT, class externT, class stateT>
    class codecvt : public locale::facet, public codecvt_base {
    public:
      using intern_type = internT;
      using extern_type = externT;
      using state_type  = stateT;

      explicit codecvt(size_t refs = 0);

      result out(
          stateT& state,
          const internT* from, const internT* from_end, const internT*& from_next,
                externT*   to,       externT*   to_end,       externT*&   to_next) const;

      result unshift(
          stateT& state,
                externT*    to,      externT*   to_end,       externT*&   to_next) const;

      result in(
          stateT& state,
          const externT* from, const externT* from_end, const externT*& from_next,
                internT*   to,       internT*   to_end,       internT*&   to_next) const;

      int encoding() const noexcept;
      bool always_noconv() const noexcept;
      int length(stateT&, const externT* from, const externT* end, size_t max) const;
      int max_length() const noexcept;

      static locale::id id;

    protected:
      ~codecvt();
      virtual result do_out(
          stateT& state,
          const internT* from, const internT* from_end, const internT*& from_next,
                externT* to,         externT*   to_end,       externT*&   to_next) const;
      virtual result do_in(
          stateT& state,
          const externT* from, const externT* from_end, const externT*& from_next,
                internT* to,         internT*   to_end,       internT*&   to_next) const;
      virtual result do_unshift(
          stateT& state,
                externT* to,         externT*   to_end,       externT*&   to_next) const;

      virtual int do_encoding() const noexcept;
      virtual bool do_always_noconv() const noexcept;
      virtual int do_length(stateT&, const externT* from, const externT* end, size_t max) const;
      virtual int do_max_length() const noexcept;
    };
}

The class codecvt<internT, externT, stateT> is for use when converting from one character encoding to another, such as from wide characters to multibyte characters or between wide character encodings such as Unicode and EUC.

The stateT argument selects the pair of character encodings being mapped between.

The specializations required in Table 69 ([locale.category]) convert the implementation-defined native character set. codecvt<char, char, mbstate_­t> implements a degenerate conversion; it does not convert at all. The specialization codecvt<char16_­t, char, mbstate_­t> converts between the UTF-16 and UTF-8 encoding forms, and the specialization codecvt <char32_­t, char, mbstate_­t> converts between the UTF-32 and UTF-8 encoding forms. codecvt<wchar_­t, char, mbstate_­t> converts between the native character sets for narrow and wide characters. Specializations on mbstate_­t perform conversion between encodings known to the library implementer. Other encodings can be converted by specializing on a user-defined stateT type. Objects of type stateT can contain any state that is useful to communicate to or from the specialized do_­in or do_­out members.

25.4.1.4.1 codecvt members [locale.codecvt.members]

result out( stateT& state, const internT* from, const internT* from_end, const internT*& from_next, externT* to, externT* to_end, externT*& to_next) const;

Returns: do_­out(state, from, from_­end, from_­next, to, to_­end, to_­next).

result unshift(stateT& state, externT* to, externT* to_end, externT*& to_next) const;

Returns: do_­unshift(state, to, to_­end, to_­next).

result in( stateT& state, const externT* from, const externT* from_end, const externT*& from_next, internT* to, internT* to_end, internT*& to_next) const;

Returns: do_­in(state, from, from_­end, from_­next, to, to_­end, to_­next).

int encoding() const noexcept;

Returns: do_­encoding().

bool always_noconv() const noexcept;

Returns: do_­always_­noconv().

int length(stateT& state, const externT* from, const externT* from_end, size_t max) const;

Returns: do_­length(state, from, from_­end, max).

int max_length() const noexcept;

Returns: do_­max_­length().

25.4.1.4.2 codecvt virtual functions [locale.codecvt.virtuals]

result do_out( stateT& state, const internT* from, const internT* from_end, const internT*& from_next, externT* to, externT* to_end, externT*& to_next) const; result do_in( stateT& state, const externT* from, const externT* from_end, const externT*& from_next, internT* to, internT* to_end, internT*& to_next) const;

Requires: (from <= from_­end && to <= to_­end) well-defined and true; state initialized, if at the beginning of a sequence, or else equal to the result of converting the preceding characters in the sequence.

Effects: Translates characters in the source range [from, from_­end), placing the results in sequential positions starting at destination to. Converts no more than (from_­end - from) source elements, and stores no more than (to_­end - to) destination elements.

Stops if it encounters a character it cannot convert. It always leaves the from_­next and to_­next pointers pointing one beyond the last element successfully converted. If returns noconv, internT and externT are the same type and the converted sequence is identical to the input sequence [from, from_next). to_­next is set equal to to, the value of state is unchanged, and there are no changes to the values in [to, to_­end).

A codecvt facet that is used by basic_­filebuf ([file.streams]) shall have the property that if

do_out(state, from, from_end, from_next, to, to_end, to_next)

would return ok, where from != from_­end, then

do_out(state, from, from + 1, from_next, to, to_end, to_next)

shall also return ok, and that if

do_in(state, from, from_end, from_next, to, to_end, to_next)

would return ok, where to != to_­end, then

do_in(state, from, from_end, from_next, to, to + 1, to_next)

shall also return ok.236 [Note: As a result of operations on state, it can return ok or partial and set from_­next == from and to_­next != to. end note]

Remarks: Its operations on state are unspecified. [Note: This argument can be used, for example, to maintain shift state, to specify conversion options (such as count only), or to identify a cache of seek offsets. end note]

Returns: An enumeration value, as summarized in Table 71.

Table 71do_­in/do_­out result values
ValueMeaning
ok completed the conversion
partial not all source characters converted
error encountered a character in [from, from_­end) that it could not convert
noconv internT and externT are the same type, and input sequence is identical to converted sequence

A return value of partial, if (from_­next == from_­end), indicates that either the destination sequence has not absorbed all the available destination elements, or that additional source elements are needed before another destination element can be produced.

result do_unshift(stateT& state, externT* to, externT* to_end, externT*& to_next) const;

Requires: (to <= to_­end) well defined and true; state initialized, if at the beginning of a sequence, or else equal to the result of converting the preceding characters in the sequence.

Effects: Places characters starting at to that should be appended to terminate a sequence when the current stateT is given by state.237 Stores no more than (to_­end - to) destination elements, and leaves the to_­next pointer pointing one beyond the last element successfully stored.

Returns: An enumeration value, as summarized in Table 72.

Table 72do_­unshift result values
ValueMeaning
ok completed the sequence
partial space for more than to_­end - to destination elements was needed to terminate a sequence given the value of state
error an unspecified error has occurred
noconv no termination is needed for this state_­type

int do_encoding() const noexcept;

Returns: -1 if the encoding of the externT sequence is state-dependent; else the constant number of externT characters needed to produce an internal character; or 0 if this number is not a constant.238

bool do_always_noconv() const noexcept;

Returns: true if do_­in() and do_­out() return noconv for all valid argument values. codecvt<char, char, mbstate_­t> returns true.

int do_length(stateT& state, const externT* from, const externT* from_end, size_t max) const;

Requires: (from <= from_­end) well-defined and true; state initialized, if at the beginning of a sequence, or else equal to the result of converting the preceding characters in the sequence.

Effects: The effect on the state argument is “as if” it called do_­in(state, from, from_­end, from, to, to+max, to) for to pointing to a buffer of at least max elements.

Returns: (from_­next-from) where from_­next is the largest value in the range [from, from_­end] such that the sequence of values in the range [from, from_­next) represents max or fewer valid complete characters of type internT. The specialization codecvt<char, char, mbstate_­t>, returns the lesser of max and (from_­end-from).

int do_max_length() const noexcept;

Returns: The maximum value that do_­length(state, from, from_­end, 1) can return for any valid range [from, from_­end) and stateT value state. The specialization codecvt<char, char, mbstate_­t>​::​do_­max_­length() returns 1.

Informally, this means that basic_­filebuf assumes that the mappings from internal to external characters is 1 to N: a codecvt facet that is used by basic_­filebuf must be able to translate characters one internal character at a time.

Typically these will be characters to return the state to stateT().

If encoding() yields -1, then more than max_­length() externT elements may be consumed when producing a single internT character, and additional externT elements may appear at the end of a sequence after those that yield the final internT character.

25.4.1.5 Class template codecvt_­byname [locale.codecvt.byname]

namespace std {
  template <class internT, class externT, class stateT>
    class codecvt_byname : public codecvt<internT, externT, stateT> {
    public:
      explicit codecvt_byname(const char*, size_t refs = 0);
      explicit codecvt_byname(const string&, size_t refs = 0);
    protected:
      ~codecvt_byname();
    };
}