tenmon/3rdparty/include/pcl/CharTraits.h

//     ____   ______ __
//    / __ \ / ____// /
//   / /_/ // /    / /
//  / ____// /___ / /___   PixInsight Class Library
// /_/     \____//_____/   PCL 2.4.23
// ----------------------------------------------------------------------------
// pcl/CharTraits.h - Released 2022-03-12T18:59:29Z
// ----------------------------------------------------------------------------
// This file is part of the PixInsight Class Library (PCL).
// PCL is a multiplatform C++ framework for development of PixInsight modules.
//
// Copyright (c) 2003-2022 Pleiades Astrophoto S.L. All Rights Reserved.
//
// Redistribution and use in both source and binary forms, with or without
// modification, is permitted provided that the following conditions are met:
//
// 1. All redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//
// 2. All redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//
// 3. Neither the names "PixInsight" and "Pleiades Astrophoto", nor the names
//    of their contributors, may be used to endorse or promote products derived
//    from this software without specific prior written permission. For written
//    permission, please contact info@pixinsight.com.
//
// 4. All products derived from this software, in any form whatsoever, must
//    reproduce the following acknowledgment in the end-user documentation
//    and/or other materials provided with the product:
//
//    "This product is based on software from the PixInsight project, developed
//    by Pleiades Astrophoto and its contributors (https://pixinsight.com/)."
//
//    Alternatively, if that is where third-party acknowledgments normally
//    appear, this acknowledgment must be reproduced in the product itself.
//
// THIS SOFTWARE IS PROVIDED BY PLEIADES ASTROPHOTO AND ITS CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PLEIADES ASTROPHOTO OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, BUSINESS
// INTERRUPTION; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; AND LOSS OF USE,
// DATA OR PROFITS) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// ----------------------------------------------------------------------------

#ifndef __PCL_CharTraits_h
#define __PCL_CharTraits_h

/// \file pcl/CharTraits.h

#include <pcl/Defs.h>
#include <pcl/Diagnostics.h>

#include <pcl/Utility.h>

#include <ctype.h>
#include <memory.h>
#include <string.h>

#ifdef __PCL_WINDOWS
#  include <windows.h>
#else // POSIX
#  include <wchar.h>
#  include <wctype.h>
#endif // !__PCL_WINDOWS

namespace pcl
{

// ----------------------------------------------------------------------------

char16_type PCL_FUNC PCL_ToCaseFolded( char16_type );
char16_type PCL_FUNC PCL_ToLowercase( char16_type );
char16_type PCL_FUNC PCL_ToUppercase( char16_type );

extern const uint8* PCL_DATA PCL_toLowercaseLatin1;
extern const uint8* PCL_DATA PCL_toUppercaseLatin1;

// ----------------------------------------------------------------------------

/*!
 * \defgroup char_trait_classes Character Traits Classes
 *
 * A <em>character traits</em> class characterizes a data type to represent a
 * single element of a string, or \e character. For example, a char traits
 * class must provide primitive routines to copy, compare and classify
 * characters and sequences of characters. Those primitives are then used as
 * the building blocks of more complex structures such as character strings and
 * other text-oriented containers.
 */

// ----------------------------------------------------------------------------

#define PCL_COMPARE_CODE_POINTS()                                             \
   PCL_PRECONDITION( n1 == 0 || n2 == 0 || s1 != nullptr && s2 != nullptr )   \
   if ( caseSensitive )                                                       \
   {                                                                          \
      for ( size_type n = pcl::Min( n1, n2 ); n > 0; --n, ++s1, ++s2 )        \
         if ( *s1 != *s2 )                                                    \
            return (*s1 < *s2) ? -1 : +1;                                     \
   }                                                                          \
   else                                                                       \
   {                                                                          \
      for ( size_type n = pcl::Min( n1, n2 ); n > 0; --n, ++s1, ++s2 )        \
      {                                                                       \
         char_type c1 = ToCaseFolded( *s1 ), c2 = ToCaseFolded( *s2 );        \
         if ( c1 != c2 )                                                      \
            return (c1 < c2) ? -1 : +1;                                       \
      }                                                                       \
   }                                                                          \
   return (n1 == n2) ? 0 : ((n1 < n2) ? -1 : +1)

// ----------------------------------------------------------------------------

/*
 * Wildcard string matching algorithm adapted from:
 *
 * Kirk J. Krauss (2014): Matching Wildcards: An Empirical Way to Tame an
 * Algorithm, Dr. Dobb's Magazine, October 7, 2014.
 *
 * http://www.drdobbs.com/architecture-and-design/matching-wildcards-an-empirical-way-to-t/240169123
 */

template <typename Tt, typename Tp> inline
bool WildMatch( const Tt* t, size_type nt, const Tp* p, size_type np )
{
   PCL_PRECONDITION( nt == 0 || np == 0 || t != nullptr && p != nullptr )

   if ( nt == 0 || np == 0 )
      return false;

   const Tt* et = t + nt;
   const Tp* ep = p + np;
   const Tt* bt = nullptr;
   const Tp* bp = nullptr;

   for ( ;; )
   {
      Tp c = *p;

      if ( c == Tp( '*' ) )
      {
         do
            if ( ++p == ep )
               return true;
         while ( (c = *p) == Tp( '*' ) );

         if ( c != Tp( '?' ) )
            while ( Tt( c ) != *t )
               if ( ++t == et )
                  return false;

         bp = p;
         bt = t;
      }
      else if ( Tt( c ) != *t && c != Tp( '?' ) )
      {
         if ( bp != nullptr )
         {
            if ( p != bp )
            {
               p = bp;

               if ( Tt( *p ) != *t )
               {
                  t = ++bt;
                  continue;
               }

               ++p;
            }

            if ( t < et )
            {
               ++t;
               continue;
            }
         }

         return false;
      }

      ++t;
      ++p;

      if ( t == et )
      {
         if ( p == ep )
            return true;

         while ( *p == Tp( '*' ) )
            if ( ++p == ep )
               return true;

         return false;
      }
   }
}

template <typename Tt, typename Tp, class Ut, class Up> inline
bool WildMatchIC( const Tt* t, size_type nt, const Tp* p, size_type np, Ut ut, Up up )
{
   PCL_PRECONDITION( nt == 0 || np == 0 || t != nullptr && p != nullptr && ut != nullptr && up != nullptr )

   if ( nt == 0 || np == 0 )
      return false;

   const Tt* et = t + nt;
   const Tp* ep = p + np;
   const Tt* bt = nullptr;
   const Tp* bp = nullptr;

   for ( ;; )
   {
      Tp c = *p;

      if ( c == Tp( '*' ) )
      {
         do
            if ( ++p == ep )
               return true;
         while ( (c = *p) == Tp( '*' ) );

         if ( c != Tp( '?' ) )
         {
            c = up( c );
            while ( Tt( c ) != ut( *t ) )
               if ( ++t == et )
                  return false;
         }

         bp = p;
         bt = t;
      }
      else if ( c != Tp( '?' ) )
      {
         Tt ft = ut( *t );

         if ( Tt( up( c ) ) != ft )
         {
            if ( bp != nullptr )
            {
               if ( p != bp )
               {
                  p = bp;

                  if ( Tt( up( *p ) ) != ft )
                  {
                     t = ++bt;
                     continue;
                  }

                  ++p;
               }

               if ( t < et )
               {
                  ++t;
                  continue;
               }
            }

            return false;
         }
      }

      ++t;
      ++p;

      if ( t == et )
      {
         if ( p == ep )
            return true;

         while ( *p == Tp( '*' ) )
            if ( ++p == ep )
               return true;

         return false;
      }
   }
}

// ----------------------------------------------------------------------------

/*!
 * \class GenericCharTraits
 * \brief Generic base class of character traits classes
 *
 * %GenericCharTraits defines fundamental properties and functionality common
 * to all character types.
 *
 * The purpose of a <em>character traits</em> class is to characterize a data
 * type to represent a single element of a string, or \e character. For
 * example, a char traits class must provide primitive routines to copy,
 * compare and classify characters and sequences of characters.
 *
 * %GenericCharTraits is a template class that must be instantiated for
 * suitable data types. Two instantiations of %GenericCharTraits, namely for
 * \c char and \c char16_type, originate the IsoString and String fundamental
 * PCL classes, respectively. The versatile interface provided by
 * %GenericCharTraits makes it possible to implement string classes virtually
 * for any data type with default and copy constructor semantics.
 *
 * \ingroup char_trait_classes
 * \sa CharTraits, IsoCharTraits, String, IsoString
 */
template <typename T>
class PCL_CLASS GenericCharTraits
{
public:

   /*!
    * Represents the character data type used by this traits class.
    */
   typedef T   char_type;

   /*!
    * Number of bytes per character.
    */
   static constexpr size_type BytesPerChar() noexcept
   {
      return sizeof( char_type );
   }

   /*!
    * Returns the length of a null-terminated string in characters (\e not
    * bytes).
    *
    * The returned value is the length of the initial contiguous sequence of
    * characters that are not equal to Null().
    */
   static size_type Length( const char_type* s ) noexcept
   {
      const char_type* __restrict__ t = s;
      if ( s != nullptr )
         for ( ; *t != Null(); ++t ) {}
      return size_type( t - s );
   }

   /*!
    * Fills a contiguous region of characters with a constant value.
    *
    * \param s    Initial address of a character sequence.
    * \param c    Constant value to fill with.
    * \param n    Number of characters to fill.
    */
   static void Fill( char_type* __restrict__ s, char_type c, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n )
         *s++ = c;
   }

   /*!
    * Copies a contiguous sequence of characters to a specified location. The
    * source and destination regions do not overlap.
    *
    * \param dst  Destination location where characters will be copied.
    * \param src  Initial address of the sequence of source characters.
    * \param n    Number of characters to copy.
    *
    * If the source and destination regions overlap, this routine will produce
    * an unpredictable result. CopyOverlapped() should be used in these cases.
    */
   static void Copy( char_type* __restrict__ dst, const char_type* __restrict__ src, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
      ::memcpy( dst, src, n*sizeof( char_type ) );
   }

   /*!
    * Copies a contiguous sequence of characters to a specified location. The
    * source and destination regions may safely overlap.
    *
    * \param dst  Destination location where characters will be copied.
    * \param src  Initial address of the sequence of source characters.
    * \param n    Number of characters to copy.
    */
   static void CopyOverlapped( char_type* dst, const char_type* src, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
      ::memmove( dst, src, n*sizeof( char_type ) );
   }

   /*!
    * Compares numeric character values between two strings.
    *
    * \param s1   First string.
    * \param n1   Length of the first string in characters.
    * \param s2   Second string.
    * \param n2   Length of the second string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters. The default value of this parameter
    *                         is true.
    *
    * Returns an integer code representing the comparison result:
    *
    * \li  0 if \a s1 and \a s2 are equal
    * \li -1 if \a s1 is less than \a s2
    * \li +1 if \a s1 is greater than \a s2
    *
    * This function compares the numerical values of string characters. For
    * case-insensitive comparisons, this generic routine is only valid for the
    * ISO/IEC 8859-1 character set: characters in the ranges 0x41-0x5A,
    * 0xC0-0xD6 and 0xD8-0xDE are considered identical to its counterparts in
    * the ranges 0x61-0x7A, 0xE0-0xF6 and 0xF8-0xFE, respectively.
    */
   static int CompareCodePoints( const char_type* __restrict__ s1, size_type n1,
                                 const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true ) noexcept
   {
      PCL_COMPARE_CODE_POINTS();
   }

   /*!
    * Lexicographical comparison between two generic strings.
    *
    * \param s1   First string.
    * \param n1   Length of the first string in characters.
    * \param s2   Second string.
    * \param n2   Length of the second string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters. The default value of this parameter
    *                         is true.
    *
    * \param localeAware      This parameter is ignored by GenericCharTraits.
    *                         For locale-aware string comparisons, see the
    *                         reimplementations of this static function in the
    *                         IsoCharTraits and CharTraits template class
    *                         instantiations.
    *
    * Returns an integer code representing the comparison result:
    *
    * \li  0 if \a s1 and \a s2 are equal
    * \li -1 if \a s1 is less than \a s2
    * \li +1 if \a s1 is greater than \a s2
    *
    * This default implementation simply calls CompareCodePoints() to compare
    * the numerical values of string characters, so it cannot be localized and
    * can't be aware of user locale settings. For more comprehensive,
    * locale-aware Unicode implementations of this static function, see the
    * IsoCharTraits and CharTraits classes.
    */
   static int Compare( const char_type* __restrict__ s1, size_type n1,
                       const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true, bool localeAware = true ) noexcept
   {
      return CompareCodePoints( s1, n1, s2, n2, caseSensitive );
   }

   /*!
    * Wildcard string matching algorithm.
    *
    * \param t    The string to be matched.
    *
    * \param nt   Length of the string to be matched in characters.
    *
    * \param p    The pattern string. May contain multiple instances of the
    *             wildcard characters '*' and '?'.
    *
    * \param np   Length of the pattern string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters. The default value of this parameter
    *                         is true.
    *
    * Returns true iff the string \a t matches the specified pattern \a p. If
    * one of the strings is empty, this function always returns false
    * conventionally, even if the pattern is a single asterisk '*'.
    */
   static bool WildMatch( const char_type* __restrict__ t, size_type nt,
                          const char_type* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
   {
      if ( caseSensitive )
         return pcl::WildMatch( t, nt, p, np );
      return pcl::WildMatchIC( t, nt, p, np, ToCaseFolded, ToCaseFolded );
   }

   /*!
    * Returns the \e case \e folded equivalent character for the specified
    * code point \a c.
    *
    * Case folding elimitates case differences, which is useful for
    * case-insensitive string comparisons.
    *
    * This default implementation is only valid for the ISO/IEC-8859-1
    * character set. For a comprehensive Unicode implementation see the
    * CharTraits class.
    */
   static char_type ToCaseFolded( char_type c ) noexcept
   {
      return ToLowercase( c );
   }

   /*!
    * Returns the lowercase equivalent character for the specified code point.
    *
    * This default implementation is only valid for the ISO/IEC-8859-1
    * character set. For a comprehensive Unicode implementation see the
    * CharTraits class.
    */
   static constexpr char_type ToLowercase( char_type c ) noexcept
   {
      return (c >= char_type(  65 ) && c <= char_type(  90 )
           || c >= char_type( 192 ) && c <= char_type( 214 )
           || c >= char_type( 216 ) && c <= char_type( 222 )) ? c + 32 : c;
   }

   /*!
    * Returns the uppercase equivalent character for the specified code point.
    *
    * This default implementation is only valid for the ISO/IEC-8859-1
    * character set. For a comprehensive Unicode implementation see the
    * CharTraits class.
    */
   static constexpr char_type ToUppercase( char_type c ) noexcept
   {
      return (c >= char_type(  97 ) && c <= char_type( 122 )
           || c >= char_type( 224 ) && c <= char_type( 246 )
           || c >= char_type( 248 ) && c <= char_type( 254 )) ? c - 32 : c;
   }

   /*!
    * Transforms a string to lower case.
    *
    * This default implementation is only valid for the ISO/IEC-8859-1
    * character set. For a comprehensive Unicode implementation see the
    * CharTraits class.
    */
   static void ToLowercase( char_type* __restrict__ s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToLowercase( *s );
   }

   /*!
    * Transforms a string to upper case.
    *
    * This default implementation is only valid for the ISO/IEC-8859-1
    * character set. For a comprehensive Unicode implementation see the
    * CharTraits class.
    */
   static void ToUppercase( char_type* __restrict__ s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToUppercase( *s );
   }

   /*!
    * Returns the null string termination character '\\0'.
    */
   static constexpr char_type Null() noexcept
   {
      return char_type( 0 );
   }

   /*!
    * Returns the blank space character (white space).
    */
   static constexpr char_type Blank() noexcept
   {
      return char_type( ' ' );
   }

   /*!
    * Returns the horizontal tab control character '\\t'.
    */
   static constexpr char_type Tab() noexcept
   {
      return char_type( '\t' );
   }

   /*!
    * Returns the carriage return control character '\\r'.
    */
   static constexpr char_type CR() noexcept
   {
      return char_type( '\r' );
   }

   /*!
    * Returns the line feed control character '\\n'.
    */
   static constexpr char_type LF() noexcept
   {
      return char_type( '\n' );
   }

   /*!
    * Returns the comma punctuator character ','.
    */
   static constexpr char_type Comma() noexcept
   {
      return char_type( ',' );
   }

   /*!
    * Returns the colon punctuator character ':'.
    */
   static constexpr char_type Colon() noexcept
   {
      return char_type( ':' );
   }

   /*!
    * Returns the semicolon punctuator character ';'.
    */
   static constexpr char_type Semicolon() noexcept
   {
      return char_type( ';' );
   }

   /*!
    * Returns the hyphen punctuator character '-'.
    */
   static constexpr char_type Hyphen() noexcept
   {
      return char_type( '-' );
   }

   /*!
    * Returns the plus sign character '+'.
    */
   static constexpr char_type PlusSign() noexcept
   {
      return char_type( '+' );
   }

   /*!
    * Returns the minus sign character '-'.
    */
   static constexpr char_type MinusSign() noexcept
   {
      return char_type( '-' );
   }

   /*!
    * Returns the decimal separator character '.'.
    */
   static constexpr char_type DecimalSeparator() noexcept
   {
      return char_type( '.' );
   }

   /*!
    * Returns the exponent delimiter character 'e'.
    */
   static constexpr char_type ExponentDelimiter() noexcept
   {
      return char_type( 'e' );
   }

   /*!
    * Returns the underscore character '_'.
    */
   static constexpr char_type Underscore() noexcept
   {
      return char_type( '_' );
   }

   /*!
    * Returns the single quote character "'".
    */
   static constexpr char_type SingleQuote() noexcept
   {
      return char_type( '\'' );
   }

   /*!
    * Returns the double quote character '"'.
    */
   static constexpr char_type DoubleQuote() noexcept
   {
      return char_type( '\"' );
   }

   /*!
    * Returns true iff a character \a c is a null string terminator.
    */
   static constexpr bool IsNull( char_type c ) noexcept
   {
      return c == Null();
   }

   /*!
    * Returns true iff a character \a c is a white space character.
    */
   static constexpr bool IsSpace( char_type c ) noexcept
   {
      return c == Blank() || c == Tab() || c == CR() || c == LF();
   }

   /*!
    * Returns true iff a character \a c is a trimable character. Generally
    * equivalent to IsSpace().
    */
   static constexpr bool IsTrimable( char_type c ) noexcept
   {
      return IsSpace( c );
   }

   /*!
    * Returns true iff a character \a c is a decimal digit. Decimal digits are
    * in the range [0-9].
    */
   static constexpr bool IsDigit( char_type c ) noexcept
   {
      return c >= char_type( '0' ) && c <= char_type( '9' );
   }

   /*!
    * Returns true iff a character \a c is an hexadecimal digit. Hexadecimal
    * digits are in the range [a-fA-F].
    */
   static constexpr bool IsHexDigit( char_type c ) noexcept
   {
      return IsDigit( c ) || c >= char_type( 'A' ) && c <= char_type( 'F' ) ||
                             c >= char_type( 'a' ) && c <= char_type( 'f' );
   }

   /*!
    * Returns true iff a character \a c is in the range [a-zA-Z].
    */
   static constexpr bool IsAlpha( char_type c ) noexcept
   {
      return IsLowercaseAlpha( c ) || IsUppercaseAlpha( c );
   }

   /*!
    * Returns true iff a character \a c is in the range [a-z].
    */
   static constexpr bool IsLowercaseAlpha( char_type c ) noexcept
   {
      return c >= char_type( 'a' ) && c <= char_type( 'z' );
   }

   /*!
    * Returns true iff a character \a c is in the range [A-Z].
    */
   static constexpr bool IsUppercaseAlpha( char_type c ) noexcept
   {
      return c >= char_type( 'A' ) && c <= char_type( 'Z' );
   }

   /*!
    * Returns true iff a character \a c is the underscore character '_'.
    */
   static constexpr bool IsUnderscore( char_type c ) noexcept
   {
      return c == Underscore();
   }

   /*!
    * Returns true iff a character \a c is a valid symbol element. Symbol digits
    * are in the range [a-zA-Z0-9_].
    */
   static constexpr bool IsSymbolDigit( char_type c ) noexcept
   {
      return IsAlpha( c ) || IsDigit( c ) || IsUnderscore( c );
   }

   /*!
    * Returns true iff a character \a c is a valid starting symbol digit. A
    * starting symbol digit is in the range [a-zA-Z_].
    */
   static constexpr bool IsStartingSymbolDigit( char_type c ) noexcept
   {
      return IsAlpha( c ) || IsUnderscore( c );
   }

   /*!
    * Returns true iff a character \a c is a numerical sign, either '+' or '-'.
    */
   static constexpr bool IsSign( char_type c ) noexcept
   {
      return c == MinusSign() || c == PlusSign();
   }

   /*!
    * Returns true iff a character \a c is the decimal separator '.'.
    */
   static constexpr bool IsDecimalSeparator( char_type c ) noexcept
   {
      return c == DecimalSeparator();
   }

   /*!
    * Returns true iff a character \a c is an exponent delimiter. Exponent
    * delimiters are in the range [eEdD]. The [dD] pair allows for FORTRAN
    * compatibility.
    */
   static constexpr bool IsExponentDelimiter( char_type c ) noexcept
   {
      return c == char_type( 'e' ) || c == char_type( 'E' ) || c == char_type( 'd' ) || c == char_type( 'D' );
   }

   /*!
    * Returns true iff a character \a c is a wildcard. The wildcards are the
    * asterisk '*' and question mark '?' characters.
    */
   static constexpr bool IsWildcard( char_type c ) noexcept
   {
      return c == char_type( '*' ) || c == char_type( '?' );
   }

   /*!
    * Returns a pointer to the first non-trimmable character in the sequence of
    * contiguous characters defined by the range [i,j) of pointers.
    */
   template <typename Ptr1, typename Ptr2>
   static Ptr1 SearchTrimLeft( Ptr1 i, Ptr2 j ) noexcept
   {
      for ( ; i < j && IsTrimable( *i ); ++i ) {}
      return i;
   }

   /*!
    * Returns a pointer to the character \e after the last non-trimmable
    * character in the sequence of contiguous characters defined by the range
    * [i,j) of pointers.
    *
    * If there are no trimmable characters in the specified sequence, this
    * function returns the ending pointer \a j.
    */
   template <typename Ptr1, typename Ptr2>
   static Ptr2 SearchTrimRight( Ptr1 i, Ptr2 j ) noexcept
   {
      for ( ; i < j && IsTrimable( *(j-1) ); --j ) {}
      return j;
   }
};

// ----------------------------------------------------------------------------

/*!
 * \class IsoCharTraits
 * \brief A template instantiation of GenericCharTraits for the \c char type.
 *
 * %IsoCharTraits is the char traits class used by IsoString.
 *
 * \ingroup char_trait_classes
 */
class PCL_CLASS IsoCharTraits : public GenericCharTraits<char>
{
public:

   /*!
    * Base class of this char traits class.
    */
   typedef GenericCharTraits<char>  traits_base;

   /*!
    * Represents the character data type used by this traits class.
    */
   typedef traits_base::char_type   char_type;

   /*!
    * Returns the length of a null-terminated 8-bit string in characters
    * (\e not bytes).
    *
    * The returned value is the length of the initial contiguous sequence of
    * characters that are not equal to Null().
    */
   static constexpr size_type Length( const char_type* __restrict__ s ) noexcept
   {
      return (s != nullptr) ? ::strlen( s ) : 0;
   }

   /*!
    * Fills a contiguous region of characters with a constant value.
    *
    * \param s    Initial address of a character sequence.
    * \param c    Constant value to fill with.
    * \param n    Number of characters to fill.
    */
   static void Fill( char_type* __restrict__ s, char_type c, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      ::memset( s, c, n );
   }

   /*!
    * Copies a contiguous sequence of characters to a specified location. The
    * source and destination regions do not overlap.
    *
    * \param dst  Destination location where characters will be copied.
    * \param src  Initial address of the sequence of source characters.
    * \param n    Number of characters to copy.
    *
    * If the source and destination regions overlap, this routine will produce
    * an unpredictable result. CopyOverlapped() should be used in these cases.
    */
   static void Copy( char_type* __restrict__ dst, const char_type* __restrict__ src, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
      ::memcpy( dst, src, n );
   }

   /*!
    * Copies a contiguous sequence of characters to a specified location. The
    * source and destination regions may safely overlap.
    *
    * \param dst  Destination location where characters will be copied.
    * \param src  Initial address of the sequence of source characters.
    * \param n    Number of characters to copy.
    */
   static void CopyOverlapped( char_type* dst, const char_type* src, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
      ::memmove( dst, src, n );
   }

   /*!
    * Compares numeric character values between two 8-bit strings.
    *
    * \param s1   First string.
    * \param n1   Length of the first string in characters.
    * \param s2   Second string.
    * \param n2   Length of the second string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters. The default value of this parameter
    *                         is true.
    *
    * Returns an integer code representing the comparison result:
    *
    * \li  0 if \a s1 and \a s2 are equal
    * \li -1 if \a s1 is less than \a s2
    * \li +1 if \a s1 is greater than \a s2
    */
   static int CompareCodePoints( const char_type* __restrict__ s1, size_type n1,
                                 const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true ) noexcept
   {
      PCL_COMPARE_CODE_POINTS();
   }

   /*!
    * Lexicographical comparison between two 8-bit strings.
    *
    * \param s1   First string.
    * \param n1   Length of the first string in characters.
    * \param s2   Second string.
    * \param n2   Length of the second string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters (as defined by the current locale).
    *                         The default value of this parameter is true.
    *
    * \param localeAware      When true, a locale-aware comparison is
    *                         performed which takes into account the currently
    *                         selected user locale (language and variants).
    *                         When false, an invariant comparison is carried
    *                         out by comparing Unicode code points (which may
    *                         be faster). The default value is true.
    *
    * Returns an integer code representing the comparison result:
    *
    * \li  0 if \a s1 and \a s2 are equal
    * \li -1 if \a s1 is less than \a s2
    * \li +1 if \a s1 is greater than \a s2
    *
    * On OS X and Windows platforms this function invokes system API routines
    * to perform locale-aware string comparisons.
    *
    * On X11 (FreeBSD and Linux platforms), when the \a localeAware
    * parameter is true, the comparison uses the current locale as reported by
    * calling the setlocale POSIX function:
    *
    * \code
    * setlocale( LC_COLLATE, 0 );
    * \endcode
    *
    * In PixInsight on X11 platforms, the default user collation and case
    * comparison locales are used. A module can change them \e temporarily to
    * a custom locale by calling:
    *
    * \code
    * setlocale( LC_COLLATE, "<custom-locale>" );
    * setlocale( LC_CTYPE, "<custom-locale>" );
    * \endcode
    *
    * and then, after the necessary work has been done with the custom locales,
    * restore the default settings:
    *
    * \code
    * setlocale( LC_COLLATE, "" );
    * setlocale( LC_CTYPE, "" );
    * \endcode
    */
   static int Compare( const char_type* __restrict__ s1, size_type n1,
                       const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true, bool localeAware = true ) noexcept;

   /*!
    * Wildcard string matching algorithm.
    *
    * \param t    The string to be matched.
    *
    * \param nt   Length of the string to be matched in characters.
    *
    * \param p    The pattern string. May contain multiple instances of the
    *             wildcard characters '*' and '?'.
    *
    * \param np   Length of the pattern string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters. The default value of this parameter
    *                         is true.
    *
    * Returns true iff the string \a t matches the specified pattern \a p. If
    * one of the strings is empty, this function always returns false
    * conventionally, even if the pattern is a single asterisk '*'.
    */
   static bool WildMatch( const char_type* __restrict__ t, size_type nt,
                          const char_type* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
   {
      if ( caseSensitive )
         return pcl::WildMatch( t, nt, p, np );
      return pcl::WildMatchIC( t, nt, p, np,
                               []( char_type c ) { return ToCaseFolded( c ); },
                               []( char_type c ) { return ToCaseFolded( c ); } );
   }

   /*!
    * Returns the \e case \e folded equivalent character for the specified
    * ISO/IEC-8859-1 code point \a c.
    *
    * Case folding elimitates case differences, which is useful for
    * case-insensitive string comparisons.
    *
    * For more information on case folding, see Section 3.13 Default Case
    * Algorithms in The Unicode Standard.
    */
   static char_type ToCaseFolded( char_type c ) noexcept
   {
      return ToLowercase( c );
   }

   /*!
    * Returns the lowercase equivalent character for the specified
    * ISO/IEC-8859-1 code point \a c.
    */
   static char_type ToLowercase( char_type c ) noexcept
   {
      return char_type( PCL_toLowercaseLatin1[uint8( c )] );
   }

   /*!
    * Returns the uppercase equivalent character for the specified
    * ISO/IEC-8859-1 code point \a c.
    */
   static char_type ToUppercase( char_type c ) noexcept
   {
      return char_type( PCL_toUppercaseLatin1[uint8( c )] );
   }

   /*!
    * Transforms a string to case folded.
    */
   static void ToCaseFolded( char_type* s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToCaseFolded( *s );
   }

   /*!
    * Transforms a string to lowercase.
    */
   static void ToLowercase( char_type* s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToLowercase( *s );
   }

   /*!
    * Transforms a string to uppercase.
    */
   static void ToUppercase( char_type* s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToUppercase( *s );
   }
};

// ----------------------------------------------------------------------------

/*!
 * \class CharTraits
 * \brief A template instantiation of GenericCharTraits for \c char16_type.
 *
 * %CharTraits is the char traits class used by String.
 *
 * \ingroup char_trait_classes
 */
class PCL_CLASS CharTraits : public GenericCharTraits<char16_type>
{
public:

   /*!
    * Base class of this char traits class.
    */
   typedef GenericCharTraits<char16_type>    traits_base;

   /*!
    * Represents the character data type used by this traits class.
    */
   typedef traits_base::char_type            char_type;

   /*!
    * Returns the length of a null-terminated UTF-16 string in characters
    * (\e not bytes).
    *
    * The returned value is the length of the initial contiguous sequence of
    * characters that are not equal to Null().
    */
   static size_type Length( const char_type* __restrict__ s ) noexcept
   {
#ifdef __PCL_WINDOWS
      return (s != nullptr) ? ::wcslen( reinterpret_cast<const wchar_t*>( s ) ) : 0u;
#else
      return traits_base::Length( s );
#endif
   }

   /*!
    * Copies a contiguous sequence of characters to a specified location. The
    * source and destination regions do not overlap.
    *
    * \param dst  Destination location where characters will be copied.
    * \param src  Initial address of the sequence of source characters.
    * \param n    Number of characters to copy.
    *
    * If the source and destination regions overlap, this routine will produce
    * an unpredictable result. CopyOverlapped() should be used in these cases.
    */
   static void Copy( char_type* __restrict__ dst, const char_type* __restrict__ src, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
      ::memcpy( dst, src, n << 1 );
   }

   /*!
    * Copies a contiguous sequence of characters to a specified location. The
    * source and destination regions may safely overlap.
    *
    * \param dst  Destination location where characters will be copied.
    * \param src  Initial address of the sequence of source characters.
    * \param n    Number of characters to copy.
    */
   static void CopyOverlapped( char_type* dst, const char_type* src, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
      ::memmove( dst, src, n << 1 );
   }

   /*!
    * Compares numeric character values between two Unicode strings.
    *
    * \param s1   First string.
    * \param n1   Length of the first string in characters.
    * \param s2   Second string.
    * \param n2   Length of the second string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters. The default value of this parameter
    *                         is true.
    *
    * Returns an integer code representing the comparison result:
    *
    * \li  0 if \a s1 and \a s2 are equal
    * \li -1 if \a s1 is less than \a s2
    * \li +1 if \a s1 is greater than \a s2
    */
   static int CompareCodePoints( const char_type* __restrict__ s1, size_type n1,
                                 const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true ) noexcept
   {
      PCL_COMPARE_CODE_POINTS();
   }

   /*!
    * Lexicographical comparison between two Unicode strings.
    *
    * \param s1   First string.
    * \param n1   Length of the first string in characters.
    * \param s2   Second string.
    * \param n2   Length of the second string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters (as defined by the selected locale).
    *                         The default value of this parameter is true.
    *
    * \param localeAware      When true, a locale-aware comparison is
    *                         performed which takes into account the currently
    *                         selected user locale (language and variants).
    *                         When false, an invariant comparison is carried
    *                         out by comparing Unicode code points (which may
    *                         be faster). The default value is true.
    *
    * Returns an integer code representing the comparison result:
    *
    * \li  0 if \a s1 and \a s2 are equal
    * \li -1 if \a s1 is less than \a s2
    * \li +1 if \a s1 is greater than \a s2
    *
    * On OS X and Windows platforms this function invokes system API routines
    * to perform locale-aware string comparisons.
    *
    * On X11 (FreeBSD and Linux platforms), when the \a localeAware
    * parameter is true, the comparison uses the current locale as reported by
    * calling the setlocale POSIX function:
    *
    * \code
    * setlocale( LC_COLLATE, 0 );
    * \endcode
    *
    * In PixInsight on X11 platforms, the default user collation and case
    * comparison locales are used. A module can change them \e temporarily to
    * a custom locale by calling:
    *
    * \code
    * setlocale( LC_COLLATE, "<custom-locale>" );
    * setlocale( LC_CTYPE, "<custom-locale>" );
    * \endcode
    *
    * and then, after the necessary work has been done with the custom locales,
    * restore the default settings:
    *
    * \code
    * setlocale( LC_COLLATE, "" );
    * setlocale( LC_CTYPE, "" );
    * \endcode
    */
   static int Compare( const char_type* __restrict__ s1, size_type n1,
                       const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true, bool localeAware = true ) noexcept;

   /*!
    * Wildcard string matching algorithm.
    *
    * \param t    The string to be matched.
    *
    * \param nt   Length of the string to be matched in characters.
    *
    * \param p    The pattern string. May contain multiple instances of the
    *             wildcard characters '*' and '?'.
    *
    * \param np   Length of the pattern string in characters.
    *
    * \param caseSensitive    When true, a case-sensitive comparison is
    *                         performed; otherwise the comparison does not
    *                         distinguish between lowercase and uppercase
    *                         characters. The default value of this parameter
    *                         is true.
    *
    * Returns true iff the string \a t matches the specified pattern \a p. If
    * one of the strings is empty, this function always returns false
    * conventionally, even if the pattern is a single asterisk '*'.
    */
   static bool WildMatch( const char_type* __restrict__ t, size_type nt,
                          const char_type* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
   {
      if ( caseSensitive )
         return pcl::WildMatch( t, nt, p, np );
      return pcl::WildMatchIC( t, nt, p, np,
                               []( char_type c ) { return ToCaseFolded( c ); },
                               []( char_type c ) { return ToCaseFolded( c ); } );
   }

   /*!
    * Wildcard string matching algorithm - overloaded version with 8-bit
    * pattern string.
    */
   static bool WildMatch( const char_type* __restrict__ t, size_type nt,
                          const char* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
   {
      if ( caseSensitive )
         return pcl::WildMatch( t, nt, p, np );
      return pcl::WildMatchIC( t, nt, p, np,
                               []( char_type c ) { return ToCaseFolded( c ); },
                               []( char c ) { return IsoCharTraits::ToCaseFolded( c ); } );
   }

   /*!
    * Returns the \e case \e folded equivalent character for the specified
    * UTF-16 code point \a c.
    *
    * Case folding elimitates case differences, which is useful for
    * case-insensitive string comparisons.
    *
    * We implement the \e simple \e case \e folding Unicode algorithm
    * exclusively. For more information on case folding, see Section 3.13
    * Default Case Algorithms in The Unicode Standard.
    */
   static char_type ToCaseFolded( char_type c ) noexcept
   {
      if ( c < 256 )
      {
         if ( c >= 65 && c <= 90 || c >= 192 && c <= 214 || c >= 216 && c <= 222 )
            return c + 32;
         return c;
      }
      return PCL_ToCaseFolded( c );
   }

   /*!
    * Returns the lowercase equivalent character for the specified UTF-16 code
    * point \a c.
    */
   static char_type ToLowercase( char_type c ) noexcept
   {
      if ( c < 256 )
      {
         if ( c >= 65 && c <= 90 || c >= 192 && c <= 214 || c >= 216 && c <= 222 )
            return c + 32;
         return c;
      }
      return PCL_ToLowercase( c );
   }

   /*!
    * Returns the uppercase equivalent character for the specified UTF-16 code
    * point \a c.
    */
   static char_type ToUppercase( char_type c ) noexcept
   {
      if ( c < 256 )
      {
         if ( c >= 97 && c <= 122 || c >= 224 && c <= 246 || c >= 248 && c <= 254 )
            return c - 32;
         return c;
      }
      return PCL_ToUppercase( c );
   }

   /*!
    * Transforms a string to case folded.
    */
   static void ToCaseFolded( char_type* s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToCaseFolded( *s );
   }

   /*!
    * Transforms a string to lowercase.
    */
   static void ToLowercase( char_type* s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToLowercase( *s );
   }

   /*!
    * Transforms a string to uppercase.
    */
   static void ToUppercase( char_type* s, size_type n ) noexcept
   {
      PCL_PRECONDITION( n == 0 || s != nullptr )
      for ( ; n > 0; --n, ++s )
         *s = ToUppercase( *s );
   }

   /*!
    * Returns true iff the specified UTF-16 character is a <em>high
    * surrogate</em> (the most significant word of a <em>surrogate pair</em>
    * forming a UTF-32 code point). High surrogates have values between 0xD800
    * and 0xDBFF.
    */
   static constexpr bool IsHighSurrogate( char_type c16 ) noexcept
   {
      return (c16 & 0xFC00) == 0xD800;
   }

   /*!
    * Returns the <em>high surrogate</em> word of a UTF-32 code point. The
    * specified UTF-32 code must be in the range from 0x010000 to 0x10FFFF,
    * since surrogates only exist outside the Basic Multilingual Plane of
    * Unicode.
    */
   static constexpr char_type HighSurrogate( char32_type c32 ) noexcept
   {
      return char_type( (c32 >> 10) + 0xD7C0 );
   }

   /*!
    * Returns true iff the specified UTF-16 character is a <em>low
    * surrogate</em> (the least significant word of a <em>surrogate pair</em>
    * forming a UTF-32 code point). Low surrogates have values between 0xDC00
    * and 0xDFFF.
    */
   static constexpr bool IsLowSurrogate( char_type c16 ) noexcept
   {
      return (c16 & 0xFC00) == 0xDC00;
   }

   /*!
    * Returns the <em>low surrogate</em> word of a UTF-32 code point. The
    * specified UTF-32 code must be in the range from 0x010000 to 0x10FFFF,
    * since surrogates only exist outside the Basic Multilingual Plane of
    * Unicode.
    */
   static constexpr char_type LowSurrogate( char32_type c32 ) noexcept
   {
      return char_type( (c32%0x400) + 0xDC00 );
   }

   /*!
    * Returns a UTF-32 code point from its <em>surrogate pair</em>. The
    * specified surrogate words must pertain to a valid Unicode code point
    * outside the Basic Multilingual Plane (from 0x010000 to 0x10FFFF).
    */
   static constexpr char32_type SurrogatePairToUTF32( char_type high, char_type low ) noexcept
   {
      return (char32_type( high ) << 10) + low - 0x035FDC00;
   }
};

// ----------------------------------------------------------------------------

} // pcl

#endif   // __PCL_CharTraits_h

// ----------------------------------------------------------------------------
// EOF pcl/CharTraits.h - Released 2022-03-12T18:59:29Z