8 #ifndef BOOST_LOCALE_UTF_HPP_INCLUDED 
    9 #define BOOST_LOCALE_UTF_HPP_INCLUDED 
   11 #include <boost/cstdint.hpp> 
   23     #   define BOOST_LOCALE_LIKELY(x)   __builtin_expect((x),1) 
   24     #   define BOOST_LOCALE_UNLIKELY(x) __builtin_expect((x),0) 
   26     #   define BOOST_LOCALE_LIKELY(x)   (x) 
   27     #   define BOOST_LOCALE_UNLIKELY(x) (x) 
   53         if(0xD800 <=v && v<= 0xDFFF) 
 
   58     #ifdef BOOST_LOCALE_DOXYGEN 
   59     template<
typename CharType,
int size=sizeof(CharType)>
 
   82         template<
typename Iterator>
 
  126         template<
typename Iterator>
 
  133         template<
typename Iterator>
 
  139     template<
typename CharType,
int size=sizeof(CharType)>
 
  142     template<
typename CharType>
 
  149             unsigned char c = ci;
 
  152             if(BOOST_LOCALE_UNLIKELY(c < 194))
 
  158             if(BOOST_LOCALE_LIKELY(c <=244))
 
  170             else if(value <=0x7FF) {
 
  173             else if(BOOST_LOCALE_LIKELY(value <=0xFFFF)) {
 
  184             return (c & 0xC0)==0x80;
 
  192         template<
typename Iterator>
 
  195             if(BOOST_LOCALE_UNLIKELY(p==e))
 
  198             unsigned char lead = *p++;
 
  203             if(BOOST_LOCALE_UNLIKELY(trail_size < 0))
 
  213             code_point c = lead & ((1<<(6-trail_size))-1);
 
  219                 if(BOOST_LOCALE_UNLIKELY(p==e))
 
  224                 c = (c << 6) | ( tmp & 0x3F);
 
  226                 if(BOOST_LOCALE_UNLIKELY(p==e))
 
  231                 c = (c << 6) | ( tmp & 0x3F);
 
  233                 if(BOOST_LOCALE_UNLIKELY(p==e))
 
  238                 c = (c << 6) | ( tmp & 0x3F);
 
  247             if(BOOST_LOCALE_UNLIKELY(
width(c)!=trail_size + 1))
 
  254         template<
typename Iterator>
 
  257             unsigned char lead = *p++;
 
  265             else if(BOOST_LOCALE_LIKELY(lead < 240)) 
 
  270             code_point c = lead & ((1<<(6-trail_size))-1);
 
  274                 c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
 
  276                 c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
 
  278                 c = (c << 6) | ( static_cast<unsigned char>(*p++) & 0x3F);
 
  286         template<
typename Iterator>
 
  292             else if(value <= 0x7FF) {
 
  293                 *out++ = 
static_cast<char_type>((value >> 6) | 0xC0);
 
  294                 *out++ = 
static_cast<char_type>((value & 0x3F) | 0x80);
 
  296             else if(BOOST_LOCALE_LIKELY(value <= 0xFFFF)) {
 
  297                 *out++ = 
static_cast<char_type>((value >> 12) | 0xE0);
 
  298                 *out++ = 
static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
 
  299                 *out++ = 
static_cast<char_type>((value & 0x3F) | 0x80);
 
  302                 *out++ = 
static_cast<char_type>((value >> 18) | 0xF0);
 
  303                 *out++ = 
static_cast<char_type>(((value >> 12) & 0x3F) | 0x80);
 
  304                 *out++ = 
static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
 
  305                 *out++ = 
static_cast<char_type>((value & 0x3F) | 0x80);
 
  311     template<
typename CharType>
 
  312     struct utf_traits<CharType,2> {
 
  316         static bool is_first_surrogate(uint16_t x)
 
  318             return 0xD800 <=x && x<= 0xDBFF;
 
  320         static bool is_second_surrogate(uint16_t x)
 
  322             return 0xDC00 <=x && x<= 0xDFFF;
 
  324         static code_point combine_surrogate(uint16_t w1,uint16_t w2)
 
  326             return ((
code_point(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
 
  330             if(is_first_surrogate(c))
 
  332             if(is_second_surrogate(c))
 
  341             return is_second_surrogate(c);
 
  348             return !is_second_surrogate(c);
 
  351         template<
typename It>
 
  354             if(BOOST_LOCALE_UNLIKELY(current == last))
 
  356             uint16_t w1=*current++;
 
  357             if(BOOST_LOCALE_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
 
  364             uint16_t w2=*current++;
 
  365             if(w2 < 0xDC00 || 0xDFFF < w2)
 
  367             return combine_surrogate(w1,w2);
 
  369         template<
typename It>
 
  372             uint16_t w1=*current++;
 
  373             if(BOOST_LOCALE_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
 
  376             uint16_t w2=*current++;
 
  377             return combine_surrogate(w1,w2);
 
  383             return u>=0x10000 ? 2 : 1;
 
  385         template<
typename It>
 
  388             if(BOOST_LOCALE_LIKELY(u<=0xFFFF)) {
 
  393                 *out++ = 
static_cast<char_type>(0xD800 | (u>>10));
 
  394                 *out++ = 
static_cast<char_type>(0xDC00 | (u & 0x3FF));
 
  401     template<
typename CharType>
 
  402     struct utf_traits<CharType,4> {
 
  419         template<
typename It>
 
  425         template<
typename It>
 
  428             if(BOOST_LOCALE_UNLIKELY(current == last))
 
  440         template<
typename It>
 
static code_point decode(Iterator &p, Iterator e)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point 
Definition: utf.hpp:49
static Iterator encode(code_point value, Iterator out)
static const code_point incomplete
Special constant that defines incomplete code point. 
Definition: utf.hpp:44
uint32_t code_point
The integral type that can hold a Unicode code point. 
Definition: utf.hpp:34
static const code_point illegal
Special constant that defines illegal code point. 
Definition: utf.hpp:39
UTF Traits class - functions to convert UTF sequences to and from Unicode code points. 
Definition: utf.hpp:63
static int width(code_point value)
CharType char_type
Definition: utf.hpp:67
static int trail_length(char_type c)
static bool is_lead(char_type c)
static code_point decode_valid(Iterator &p)
static bool is_trail(char_type c)
static const int max_width
Definition: utf.hpp:92