Files
tenmon/3rdparty/include/pcl/CharTraits.h
T
2022-04-12 08:17:18 +02:00

1454 lines
47 KiB
C++

// ____ ______ __
// / __ \ / ____// /
// / /_/ // / / /
// / ____// /___ / /___ PixInsight Class Library
// /_/ \____//_____/ PCL 2.4.23
// ----------------------------------------------------------------------------
// pcl/CharTraits.h - Released 2022-03-12T18:59:29Z
// ----------------------------------------------------------------------------
// This file is part of the PixInsight Class Library (PCL).
// PCL is a multiplatform C++ framework for development of PixInsight modules.
//
// Copyright (c) 2003-2022 Pleiades Astrophoto S.L. All Rights Reserved.
//
// Redistribution and use in both source and binary forms, with or without
// modification, is permitted provided that the following conditions are met:
//
// 1. All redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. All redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the names "PixInsight" and "Pleiades Astrophoto", nor the names
// of their contributors, may be used to endorse or promote products derived
// from this software without specific prior written permission. For written
// permission, please contact info@pixinsight.com.
//
// 4. All products derived from this software, in any form whatsoever, must
// reproduce the following acknowledgment in the end-user documentation
// and/or other materials provided with the product:
//
// "This product is based on software from the PixInsight project, developed
// by Pleiades Astrophoto and its contributors (https://pixinsight.com/)."
//
// Alternatively, if that is where third-party acknowledgments normally
// appear, this acknowledgment must be reproduced in the product itself.
//
// THIS SOFTWARE IS PROVIDED BY PLEIADES ASTROPHOTO AND ITS CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PLEIADES ASTROPHOTO OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, BUSINESS
// INTERRUPTION; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; AND LOSS OF USE,
// DATA OR PROFITS) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// ----------------------------------------------------------------------------
#ifndef __PCL_CharTraits_h
#define __PCL_CharTraits_h
/// \file pcl/CharTraits.h
#include <pcl/Defs.h>
#include <pcl/Diagnostics.h>
#include <pcl/Utility.h>
#include <ctype.h>
#include <memory.h>
#include <string.h>
#ifdef __PCL_WINDOWS
# include <windows.h>
#else // POSIX
# include <wchar.h>
# include <wctype.h>
#endif // !__PCL_WINDOWS
namespace pcl
{
// ----------------------------------------------------------------------------
char16_type PCL_FUNC PCL_ToCaseFolded( char16_type );
char16_type PCL_FUNC PCL_ToLowercase( char16_type );
char16_type PCL_FUNC PCL_ToUppercase( char16_type );
extern const uint8* PCL_DATA PCL_toLowercaseLatin1;
extern const uint8* PCL_DATA PCL_toUppercaseLatin1;
// ----------------------------------------------------------------------------
/*!
* \defgroup char_trait_classes Character Traits Classes
*
* A <em>character traits</em> class characterizes a data type to represent a
* single element of a string, or \e character. For example, a char traits
* class must provide primitive routines to copy, compare and classify
* characters and sequences of characters. Those primitives are then used as
* the building blocks of more complex structures such as character strings and
* other text-oriented containers.
*/
// ----------------------------------------------------------------------------
#define PCL_COMPARE_CODE_POINTS() \
PCL_PRECONDITION( n1 == 0 || n2 == 0 || s1 != nullptr && s2 != nullptr ) \
if ( caseSensitive ) \
{ \
for ( size_type n = pcl::Min( n1, n2 ); n > 0; --n, ++s1, ++s2 ) \
if ( *s1 != *s2 ) \
return (*s1 < *s2) ? -1 : +1; \
} \
else \
{ \
for ( size_type n = pcl::Min( n1, n2 ); n > 0; --n, ++s1, ++s2 ) \
{ \
char_type c1 = ToCaseFolded( *s1 ), c2 = ToCaseFolded( *s2 ); \
if ( c1 != c2 ) \
return (c1 < c2) ? -1 : +1; \
} \
} \
return (n1 == n2) ? 0 : ((n1 < n2) ? -1 : +1)
// ----------------------------------------------------------------------------
/*
* Wildcard string matching algorithm adapted from:
*
* Kirk J. Krauss (2014): Matching Wildcards: An Empirical Way to Tame an
* Algorithm, Dr. Dobb's Magazine, October 7, 2014.
*
* http://www.drdobbs.com/architecture-and-design/matching-wildcards-an-empirical-way-to-t/240169123
*/
template <typename Tt, typename Tp> inline
bool WildMatch( const Tt* t, size_type nt, const Tp* p, size_type np )
{
PCL_PRECONDITION( nt == 0 || np == 0 || t != nullptr && p != nullptr )
if ( nt == 0 || np == 0 )
return false;
const Tt* et = t + nt;
const Tp* ep = p + np;
const Tt* bt = nullptr;
const Tp* bp = nullptr;
for ( ;; )
{
Tp c = *p;
if ( c == Tp( '*' ) )
{
do
if ( ++p == ep )
return true;
while ( (c = *p) == Tp( '*' ) );
if ( c != Tp( '?' ) )
while ( Tt( c ) != *t )
if ( ++t == et )
return false;
bp = p;
bt = t;
}
else if ( Tt( c ) != *t && c != Tp( '?' ) )
{
if ( bp != nullptr )
{
if ( p != bp )
{
p = bp;
if ( Tt( *p ) != *t )
{
t = ++bt;
continue;
}
++p;
}
if ( t < et )
{
++t;
continue;
}
}
return false;
}
++t;
++p;
if ( t == et )
{
if ( p == ep )
return true;
while ( *p == Tp( '*' ) )
if ( ++p == ep )
return true;
return false;
}
}
}
template <typename Tt, typename Tp, class Ut, class Up> inline
bool WildMatchIC( const Tt* t, size_type nt, const Tp* p, size_type np, Ut ut, Up up )
{
PCL_PRECONDITION( nt == 0 || np == 0 || t != nullptr && p != nullptr && ut != nullptr && up != nullptr )
if ( nt == 0 || np == 0 )
return false;
const Tt* et = t + nt;
const Tp* ep = p + np;
const Tt* bt = nullptr;
const Tp* bp = nullptr;
for ( ;; )
{
Tp c = *p;
if ( c == Tp( '*' ) )
{
do
if ( ++p == ep )
return true;
while ( (c = *p) == Tp( '*' ) );
if ( c != Tp( '?' ) )
{
c = up( c );
while ( Tt( c ) != ut( *t ) )
if ( ++t == et )
return false;
}
bp = p;
bt = t;
}
else if ( c != Tp( '?' ) )
{
Tt ft = ut( *t );
if ( Tt( up( c ) ) != ft )
{
if ( bp != nullptr )
{
if ( p != bp )
{
p = bp;
if ( Tt( up( *p ) ) != ft )
{
t = ++bt;
continue;
}
++p;
}
if ( t < et )
{
++t;
continue;
}
}
return false;
}
}
++t;
++p;
if ( t == et )
{
if ( p == ep )
return true;
while ( *p == Tp( '*' ) )
if ( ++p == ep )
return true;
return false;
}
}
}
// ----------------------------------------------------------------------------
/*!
* \class GenericCharTraits
* \brief Generic base class of character traits classes
*
* %GenericCharTraits defines fundamental properties and functionality common
* to all character types.
*
* The purpose of a <em>character traits</em> class is to characterize a data
* type to represent a single element of a string, or \e character. For
* example, a char traits class must provide primitive routines to copy,
* compare and classify characters and sequences of characters.
*
* %GenericCharTraits is a template class that must be instantiated for
* suitable data types. Two instantiations of %GenericCharTraits, namely for
* \c char and \c char16_type, originate the IsoString and String fundamental
* PCL classes, respectively. The versatile interface provided by
* %GenericCharTraits makes it possible to implement string classes virtually
* for any data type with default and copy constructor semantics.
*
* \ingroup char_trait_classes
* \sa CharTraits, IsoCharTraits, String, IsoString
*/
template <typename T>
class PCL_CLASS GenericCharTraits
{
public:
/*!
* Represents the character data type used by this traits class.
*/
typedef T char_type;
/*!
* Number of bytes per character.
*/
static constexpr size_type BytesPerChar() noexcept
{
return sizeof( char_type );
}
/*!
* Returns the length of a null-terminated string in characters (\e not
* bytes).
*
* The returned value is the length of the initial contiguous sequence of
* characters that are not equal to Null().
*/
static size_type Length( const char_type* s ) noexcept
{
const char_type* __restrict__ t = s;
if ( s != nullptr )
for ( ; *t != Null(); ++t ) {}
return size_type( t - s );
}
/*!
* Fills a contiguous region of characters with a constant value.
*
* \param s Initial address of a character sequence.
* \param c Constant value to fill with.
* \param n Number of characters to fill.
*/
static void Fill( char_type* __restrict__ s, char_type c, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n )
*s++ = c;
}
/*!
* Copies a contiguous sequence of characters to a specified location. The
* source and destination regions do not overlap.
*
* \param dst Destination location where characters will be copied.
* \param src Initial address of the sequence of source characters.
* \param n Number of characters to copy.
*
* If the source and destination regions overlap, this routine will produce
* an unpredictable result. CopyOverlapped() should be used in these cases.
*/
static void Copy( char_type* __restrict__ dst, const char_type* __restrict__ src, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
::memcpy( dst, src, n*sizeof( char_type ) );
}
/*!
* Copies a contiguous sequence of characters to a specified location. The
* source and destination regions may safely overlap.
*
* \param dst Destination location where characters will be copied.
* \param src Initial address of the sequence of source characters.
* \param n Number of characters to copy.
*/
static void CopyOverlapped( char_type* dst, const char_type* src, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
::memmove( dst, src, n*sizeof( char_type ) );
}
/*!
* Compares numeric character values between two strings.
*
* \param s1 First string.
* \param n1 Length of the first string in characters.
* \param s2 Second string.
* \param n2 Length of the second string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters. The default value of this parameter
* is true.
*
* Returns an integer code representing the comparison result:
*
* \li 0 if \a s1 and \a s2 are equal
* \li -1 if \a s1 is less than \a s2
* \li +1 if \a s1 is greater than \a s2
*
* This function compares the numerical values of string characters. For
* case-insensitive comparisons, this generic routine is only valid for the
* ISO/IEC 8859-1 character set: characters in the ranges 0x41-0x5A,
* 0xC0-0xD6 and 0xD8-0xDE are considered identical to its counterparts in
* the ranges 0x61-0x7A, 0xE0-0xF6 and 0xF8-0xFE, respectively.
*/
static int CompareCodePoints( const char_type* __restrict__ s1, size_type n1,
const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true ) noexcept
{
PCL_COMPARE_CODE_POINTS();
}
/*!
* Lexicographical comparison between two generic strings.
*
* \param s1 First string.
* \param n1 Length of the first string in characters.
* \param s2 Second string.
* \param n2 Length of the second string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters. The default value of this parameter
* is true.
*
* \param localeAware This parameter is ignored by GenericCharTraits.
* For locale-aware string comparisons, see the
* reimplementations of this static function in the
* IsoCharTraits and CharTraits template class
* instantiations.
*
* Returns an integer code representing the comparison result:
*
* \li 0 if \a s1 and \a s2 are equal
* \li -1 if \a s1 is less than \a s2
* \li +1 if \a s1 is greater than \a s2
*
* This default implementation simply calls CompareCodePoints() to compare
* the numerical values of string characters, so it cannot be localized and
* can't be aware of user locale settings. For more comprehensive,
* locale-aware Unicode implementations of this static function, see the
* IsoCharTraits and CharTraits classes.
*/
static int Compare( const char_type* __restrict__ s1, size_type n1,
const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true, bool localeAware = true ) noexcept
{
return CompareCodePoints( s1, n1, s2, n2, caseSensitive );
}
/*!
* Wildcard string matching algorithm.
*
* \param t The string to be matched.
*
* \param nt Length of the string to be matched in characters.
*
* \param p The pattern string. May contain multiple instances of the
* wildcard characters '*' and '?'.
*
* \param np Length of the pattern string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters. The default value of this parameter
* is true.
*
* Returns true iff the string \a t matches the specified pattern \a p. If
* one of the strings is empty, this function always returns false
* conventionally, even if the pattern is a single asterisk '*'.
*/
static bool WildMatch( const char_type* __restrict__ t, size_type nt,
const char_type* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
{
if ( caseSensitive )
return pcl::WildMatch( t, nt, p, np );
return pcl::WildMatchIC( t, nt, p, np, ToCaseFolded, ToCaseFolded );
}
/*!
* Returns the \e case \e folded equivalent character for the specified
* code point \a c.
*
* Case folding elimitates case differences, which is useful for
* case-insensitive string comparisons.
*
* This default implementation is only valid for the ISO/IEC-8859-1
* character set. For a comprehensive Unicode implementation see the
* CharTraits class.
*/
static char_type ToCaseFolded( char_type c ) noexcept
{
return ToLowercase( c );
}
/*!
* Returns the lowercase equivalent character for the specified code point.
*
* This default implementation is only valid for the ISO/IEC-8859-1
* character set. For a comprehensive Unicode implementation see the
* CharTraits class.
*/
static constexpr char_type ToLowercase( char_type c ) noexcept
{
return (c >= char_type( 65 ) && c <= char_type( 90 )
|| c >= char_type( 192 ) && c <= char_type( 214 )
|| c >= char_type( 216 ) && c <= char_type( 222 )) ? c + 32 : c;
}
/*!
* Returns the uppercase equivalent character for the specified code point.
*
* This default implementation is only valid for the ISO/IEC-8859-1
* character set. For a comprehensive Unicode implementation see the
* CharTraits class.
*/
static constexpr char_type ToUppercase( char_type c ) noexcept
{
return (c >= char_type( 97 ) && c <= char_type( 122 )
|| c >= char_type( 224 ) && c <= char_type( 246 )
|| c >= char_type( 248 ) && c <= char_type( 254 )) ? c - 32 : c;
}
/*!
* Transforms a string to lower case.
*
* This default implementation is only valid for the ISO/IEC-8859-1
* character set. For a comprehensive Unicode implementation see the
* CharTraits class.
*/
static void ToLowercase( char_type* __restrict__ s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToLowercase( *s );
}
/*!
* Transforms a string to upper case.
*
* This default implementation is only valid for the ISO/IEC-8859-1
* character set. For a comprehensive Unicode implementation see the
* CharTraits class.
*/
static void ToUppercase( char_type* __restrict__ s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToUppercase( *s );
}
/*!
* Returns the null string termination character '\\0'.
*/
static constexpr char_type Null() noexcept
{
return char_type( 0 );
}
/*!
* Returns the blank space character (white space).
*/
static constexpr char_type Blank() noexcept
{
return char_type( ' ' );
}
/*!
* Returns the horizontal tab control character '\\t'.
*/
static constexpr char_type Tab() noexcept
{
return char_type( '\t' );
}
/*!
* Returns the carriage return control character '\\r'.
*/
static constexpr char_type CR() noexcept
{
return char_type( '\r' );
}
/*!
* Returns the line feed control character '\\n'.
*/
static constexpr char_type LF() noexcept
{
return char_type( '\n' );
}
/*!
* Returns the comma punctuator character ','.
*/
static constexpr char_type Comma() noexcept
{
return char_type( ',' );
}
/*!
* Returns the colon punctuator character ':'.
*/
static constexpr char_type Colon() noexcept
{
return char_type( ':' );
}
/*!
* Returns the semicolon punctuator character ';'.
*/
static constexpr char_type Semicolon() noexcept
{
return char_type( ';' );
}
/*!
* Returns the hyphen punctuator character '-'.
*/
static constexpr char_type Hyphen() noexcept
{
return char_type( '-' );
}
/*!
* Returns the plus sign character '+'.
*/
static constexpr char_type PlusSign() noexcept
{
return char_type( '+' );
}
/*!
* Returns the minus sign character '-'.
*/
static constexpr char_type MinusSign() noexcept
{
return char_type( '-' );
}
/*!
* Returns the decimal separator character '.'.
*/
static constexpr char_type DecimalSeparator() noexcept
{
return char_type( '.' );
}
/*!
* Returns the exponent delimiter character 'e'.
*/
static constexpr char_type ExponentDelimiter() noexcept
{
return char_type( 'e' );
}
/*!
* Returns the underscore character '_'.
*/
static constexpr char_type Underscore() noexcept
{
return char_type( '_' );
}
/*!
* Returns the single quote character "'".
*/
static constexpr char_type SingleQuote() noexcept
{
return char_type( '\'' );
}
/*!
* Returns the double quote character '"'.
*/
static constexpr char_type DoubleQuote() noexcept
{
return char_type( '\"' );
}
/*!
* Returns true iff a character \a c is a null string terminator.
*/
static constexpr bool IsNull( char_type c ) noexcept
{
return c == Null();
}
/*!
* Returns true iff a character \a c is a white space character.
*/
static constexpr bool IsSpace( char_type c ) noexcept
{
return c == Blank() || c == Tab() || c == CR() || c == LF();
}
/*!
* Returns true iff a character \a c is a trimable character. Generally
* equivalent to IsSpace().
*/
static constexpr bool IsTrimable( char_type c ) noexcept
{
return IsSpace( c );
}
/*!
* Returns true iff a character \a c is a decimal digit. Decimal digits are
* in the range [0-9].
*/
static constexpr bool IsDigit( char_type c ) noexcept
{
return c >= char_type( '0' ) && c <= char_type( '9' );
}
/*!
* Returns true iff a character \a c is an hexadecimal digit. Hexadecimal
* digits are in the range [a-fA-F].
*/
static constexpr bool IsHexDigit( char_type c ) noexcept
{
return IsDigit( c ) || c >= char_type( 'A' ) && c <= char_type( 'F' ) ||
c >= char_type( 'a' ) && c <= char_type( 'f' );
}
/*!
* Returns true iff a character \a c is in the range [a-zA-Z].
*/
static constexpr bool IsAlpha( char_type c ) noexcept
{
return IsLowercaseAlpha( c ) || IsUppercaseAlpha( c );
}
/*!
* Returns true iff a character \a c is in the range [a-z].
*/
static constexpr bool IsLowercaseAlpha( char_type c ) noexcept
{
return c >= char_type( 'a' ) && c <= char_type( 'z' );
}
/*!
* Returns true iff a character \a c is in the range [A-Z].
*/
static constexpr bool IsUppercaseAlpha( char_type c ) noexcept
{
return c >= char_type( 'A' ) && c <= char_type( 'Z' );
}
/*!
* Returns true iff a character \a c is the underscore character '_'.
*/
static constexpr bool IsUnderscore( char_type c ) noexcept
{
return c == Underscore();
}
/*!
* Returns true iff a character \a c is a valid symbol element. Symbol digits
* are in the range [a-zA-Z0-9_].
*/
static constexpr bool IsSymbolDigit( char_type c ) noexcept
{
return IsAlpha( c ) || IsDigit( c ) || IsUnderscore( c );
}
/*!
* Returns true iff a character \a c is a valid starting symbol digit. A
* starting symbol digit is in the range [a-zA-Z_].
*/
static constexpr bool IsStartingSymbolDigit( char_type c ) noexcept
{
return IsAlpha( c ) || IsUnderscore( c );
}
/*!
* Returns true iff a character \a c is a numerical sign, either '+' or '-'.
*/
static constexpr bool IsSign( char_type c ) noexcept
{
return c == MinusSign() || c == PlusSign();
}
/*!
* Returns true iff a character \a c is the decimal separator '.'.
*/
static constexpr bool IsDecimalSeparator( char_type c ) noexcept
{
return c == DecimalSeparator();
}
/*!
* Returns true iff a character \a c is an exponent delimiter. Exponent
* delimiters are in the range [eEdD]. The [dD] pair allows for FORTRAN
* compatibility.
*/
static constexpr bool IsExponentDelimiter( char_type c ) noexcept
{
return c == char_type( 'e' ) || c == char_type( 'E' ) || c == char_type( 'd' ) || c == char_type( 'D' );
}
/*!
* Returns true iff a character \a c is a wildcard. The wildcards are the
* asterisk '*' and question mark '?' characters.
*/
static constexpr bool IsWildcard( char_type c ) noexcept
{
return c == char_type( '*' ) || c == char_type( '?' );
}
/*!
* Returns a pointer to the first non-trimmable character in the sequence of
* contiguous characters defined by the range [i,j) of pointers.
*/
template <typename Ptr1, typename Ptr2>
static Ptr1 SearchTrimLeft( Ptr1 i, Ptr2 j ) noexcept
{
for ( ; i < j && IsTrimable( *i ); ++i ) {}
return i;
}
/*!
* Returns a pointer to the character \e after the last non-trimmable
* character in the sequence of contiguous characters defined by the range
* [i,j) of pointers.
*
* If there are no trimmable characters in the specified sequence, this
* function returns the ending pointer \a j.
*/
template <typename Ptr1, typename Ptr2>
static Ptr2 SearchTrimRight( Ptr1 i, Ptr2 j ) noexcept
{
for ( ; i < j && IsTrimable( *(j-1) ); --j ) {}
return j;
}
};
// ----------------------------------------------------------------------------
/*!
* \class IsoCharTraits
* \brief A template instantiation of GenericCharTraits for the \c char type.
*
* %IsoCharTraits is the char traits class used by IsoString.
*
* \ingroup char_trait_classes
*/
class PCL_CLASS IsoCharTraits : public GenericCharTraits<char>
{
public:
/*!
* Base class of this char traits class.
*/
typedef GenericCharTraits<char> traits_base;
/*!
* Represents the character data type used by this traits class.
*/
typedef traits_base::char_type char_type;
/*!
* Returns the length of a null-terminated 8-bit string in characters
* (\e not bytes).
*
* The returned value is the length of the initial contiguous sequence of
* characters that are not equal to Null().
*/
static constexpr size_type Length( const char_type* __restrict__ s ) noexcept
{
return (s != nullptr) ? ::strlen( s ) : 0;
}
/*!
* Fills a contiguous region of characters with a constant value.
*
* \param s Initial address of a character sequence.
* \param c Constant value to fill with.
* \param n Number of characters to fill.
*/
static void Fill( char_type* __restrict__ s, char_type c, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
::memset( s, c, n );
}
/*!
* Copies a contiguous sequence of characters to a specified location. The
* source and destination regions do not overlap.
*
* \param dst Destination location where characters will be copied.
* \param src Initial address of the sequence of source characters.
* \param n Number of characters to copy.
*
* If the source and destination regions overlap, this routine will produce
* an unpredictable result. CopyOverlapped() should be used in these cases.
*/
static void Copy( char_type* __restrict__ dst, const char_type* __restrict__ src, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
::memcpy( dst, src, n );
}
/*!
* Copies a contiguous sequence of characters to a specified location. The
* source and destination regions may safely overlap.
*
* \param dst Destination location where characters will be copied.
* \param src Initial address of the sequence of source characters.
* \param n Number of characters to copy.
*/
static void CopyOverlapped( char_type* dst, const char_type* src, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
::memmove( dst, src, n );
}
/*!
* Compares numeric character values between two 8-bit strings.
*
* \param s1 First string.
* \param n1 Length of the first string in characters.
* \param s2 Second string.
* \param n2 Length of the second string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters. The default value of this parameter
* is true.
*
* Returns an integer code representing the comparison result:
*
* \li 0 if \a s1 and \a s2 are equal
* \li -1 if \a s1 is less than \a s2
* \li +1 if \a s1 is greater than \a s2
*/
static int CompareCodePoints( const char_type* __restrict__ s1, size_type n1,
const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true ) noexcept
{
PCL_COMPARE_CODE_POINTS();
}
/*!
* Lexicographical comparison between two 8-bit strings.
*
* \param s1 First string.
* \param n1 Length of the first string in characters.
* \param s2 Second string.
* \param n2 Length of the second string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters (as defined by the current locale).
* The default value of this parameter is true.
*
* \param localeAware When true, a locale-aware comparison is
* performed which takes into account the currently
* selected user locale (language and variants).
* When false, an invariant comparison is carried
* out by comparing Unicode code points (which may
* be faster). The default value is true.
*
* Returns an integer code representing the comparison result:
*
* \li 0 if \a s1 and \a s2 are equal
* \li -1 if \a s1 is less than \a s2
* \li +1 if \a s1 is greater than \a s2
*
* On OS X and Windows platforms this function invokes system API routines
* to perform locale-aware string comparisons.
*
* On X11 (FreeBSD and Linux platforms), when the \a localeAware
* parameter is true, the comparison uses the current locale as reported by
* calling the setlocale POSIX function:
*
* \code
* setlocale( LC_COLLATE, 0 );
* \endcode
*
* In PixInsight on X11 platforms, the default user collation and case
* comparison locales are used. A module can change them \e temporarily to
* a custom locale by calling:
*
* \code
* setlocale( LC_COLLATE, "<custom-locale>" );
* setlocale( LC_CTYPE, "<custom-locale>" );
* \endcode
*
* and then, after the necessary work has been done with the custom locales,
* restore the default settings:
*
* \code
* setlocale( LC_COLLATE, "" );
* setlocale( LC_CTYPE, "" );
* \endcode
*/
static int Compare( const char_type* __restrict__ s1, size_type n1,
const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true, bool localeAware = true ) noexcept;
/*!
* Wildcard string matching algorithm.
*
* \param t The string to be matched.
*
* \param nt Length of the string to be matched in characters.
*
* \param p The pattern string. May contain multiple instances of the
* wildcard characters '*' and '?'.
*
* \param np Length of the pattern string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters. The default value of this parameter
* is true.
*
* Returns true iff the string \a t matches the specified pattern \a p. If
* one of the strings is empty, this function always returns false
* conventionally, even if the pattern is a single asterisk '*'.
*/
static bool WildMatch( const char_type* __restrict__ t, size_type nt,
const char_type* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
{
if ( caseSensitive )
return pcl::WildMatch( t, nt, p, np );
return pcl::WildMatchIC( t, nt, p, np,
[]( char_type c ) { return ToCaseFolded( c ); },
[]( char_type c ) { return ToCaseFolded( c ); } );
}
/*!
* Returns the \e case \e folded equivalent character for the specified
* ISO/IEC-8859-1 code point \a c.
*
* Case folding elimitates case differences, which is useful for
* case-insensitive string comparisons.
*
* For more information on case folding, see Section 3.13 Default Case
* Algorithms in The Unicode Standard.
*/
static char_type ToCaseFolded( char_type c ) noexcept
{
return ToLowercase( c );
}
/*!
* Returns the lowercase equivalent character for the specified
* ISO/IEC-8859-1 code point \a c.
*/
static char_type ToLowercase( char_type c ) noexcept
{
return char_type( PCL_toLowercaseLatin1[uint8( c )] );
}
/*!
* Returns the uppercase equivalent character for the specified
* ISO/IEC-8859-1 code point \a c.
*/
static char_type ToUppercase( char_type c ) noexcept
{
return char_type( PCL_toUppercaseLatin1[uint8( c )] );
}
/*!
* Transforms a string to case folded.
*/
static void ToCaseFolded( char_type* s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToCaseFolded( *s );
}
/*!
* Transforms a string to lowercase.
*/
static void ToLowercase( char_type* s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToLowercase( *s );
}
/*!
* Transforms a string to uppercase.
*/
static void ToUppercase( char_type* s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToUppercase( *s );
}
};
// ----------------------------------------------------------------------------
/*!
* \class CharTraits
* \brief A template instantiation of GenericCharTraits for \c char16_type.
*
* %CharTraits is the char traits class used by String.
*
* \ingroup char_trait_classes
*/
class PCL_CLASS CharTraits : public GenericCharTraits<char16_type>
{
public:
/*!
* Base class of this char traits class.
*/
typedef GenericCharTraits<char16_type> traits_base;
/*!
* Represents the character data type used by this traits class.
*/
typedef traits_base::char_type char_type;
/*!
* Returns the length of a null-terminated UTF-16 string in characters
* (\e not bytes).
*
* The returned value is the length of the initial contiguous sequence of
* characters that are not equal to Null().
*/
static size_type Length( const char_type* __restrict__ s ) noexcept
{
#ifdef __PCL_WINDOWS
return (s != nullptr) ? ::wcslen( reinterpret_cast<const wchar_t*>( s ) ) : 0u;
#else
return traits_base::Length( s );
#endif
}
/*!
* Copies a contiguous sequence of characters to a specified location. The
* source and destination regions do not overlap.
*
* \param dst Destination location where characters will be copied.
* \param src Initial address of the sequence of source characters.
* \param n Number of characters to copy.
*
* If the source and destination regions overlap, this routine will produce
* an unpredictable result. CopyOverlapped() should be used in these cases.
*/
static void Copy( char_type* __restrict__ dst, const char_type* __restrict__ src, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
::memcpy( dst, src, n << 1 );
}
/*!
* Copies a contiguous sequence of characters to a specified location. The
* source and destination regions may safely overlap.
*
* \param dst Destination location where characters will be copied.
* \param src Initial address of the sequence of source characters.
* \param n Number of characters to copy.
*/
static void CopyOverlapped( char_type* dst, const char_type* src, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || dst != nullptr && src != nullptr )
::memmove( dst, src, n << 1 );
}
/*!
* Compares numeric character values between two Unicode strings.
*
* \param s1 First string.
* \param n1 Length of the first string in characters.
* \param s2 Second string.
* \param n2 Length of the second string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters. The default value of this parameter
* is true.
*
* Returns an integer code representing the comparison result:
*
* \li 0 if \a s1 and \a s2 are equal
* \li -1 if \a s1 is less than \a s2
* \li +1 if \a s1 is greater than \a s2
*/
static int CompareCodePoints( const char_type* __restrict__ s1, size_type n1,
const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true ) noexcept
{
PCL_COMPARE_CODE_POINTS();
}
/*!
* Lexicographical comparison between two Unicode strings.
*
* \param s1 First string.
* \param n1 Length of the first string in characters.
* \param s2 Second string.
* \param n2 Length of the second string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters (as defined by the selected locale).
* The default value of this parameter is true.
*
* \param localeAware When true, a locale-aware comparison is
* performed which takes into account the currently
* selected user locale (language and variants).
* When false, an invariant comparison is carried
* out by comparing Unicode code points (which may
* be faster). The default value is true.
*
* Returns an integer code representing the comparison result:
*
* \li 0 if \a s1 and \a s2 are equal
* \li -1 if \a s1 is less than \a s2
* \li +1 if \a s1 is greater than \a s2
*
* On OS X and Windows platforms this function invokes system API routines
* to perform locale-aware string comparisons.
*
* On X11 (FreeBSD and Linux platforms), when the \a localeAware
* parameter is true, the comparison uses the current locale as reported by
* calling the setlocale POSIX function:
*
* \code
* setlocale( LC_COLLATE, 0 );
* \endcode
*
* In PixInsight on X11 platforms, the default user collation and case
* comparison locales are used. A module can change them \e temporarily to
* a custom locale by calling:
*
* \code
* setlocale( LC_COLLATE, "<custom-locale>" );
* setlocale( LC_CTYPE, "<custom-locale>" );
* \endcode
*
* and then, after the necessary work has been done with the custom locales,
* restore the default settings:
*
* \code
* setlocale( LC_COLLATE, "" );
* setlocale( LC_CTYPE, "" );
* \endcode
*/
static int Compare( const char_type* __restrict__ s1, size_type n1,
const char_type* __restrict__ s2, size_type n2, bool caseSensitive = true, bool localeAware = true ) noexcept;
/*!
* Wildcard string matching algorithm.
*
* \param t The string to be matched.
*
* \param nt Length of the string to be matched in characters.
*
* \param p The pattern string. May contain multiple instances of the
* wildcard characters '*' and '?'.
*
* \param np Length of the pattern string in characters.
*
* \param caseSensitive When true, a case-sensitive comparison is
* performed; otherwise the comparison does not
* distinguish between lowercase and uppercase
* characters. The default value of this parameter
* is true.
*
* Returns true iff the string \a t matches the specified pattern \a p. If
* one of the strings is empty, this function always returns false
* conventionally, even if the pattern is a single asterisk '*'.
*/
static bool WildMatch( const char_type* __restrict__ t, size_type nt,
const char_type* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
{
if ( caseSensitive )
return pcl::WildMatch( t, nt, p, np );
return pcl::WildMatchIC( t, nt, p, np,
[]( char_type c ) { return ToCaseFolded( c ); },
[]( char_type c ) { return ToCaseFolded( c ); } );
}
/*!
* Wildcard string matching algorithm - overloaded version with 8-bit
* pattern string.
*/
static bool WildMatch( const char_type* __restrict__ t, size_type nt,
const char* __restrict__ p, size_type np, bool caseSensitive = true ) noexcept
{
if ( caseSensitive )
return pcl::WildMatch( t, nt, p, np );
return pcl::WildMatchIC( t, nt, p, np,
[]( char_type c ) { return ToCaseFolded( c ); },
[]( char c ) { return IsoCharTraits::ToCaseFolded( c ); } );
}
/*!
* Returns the \e case \e folded equivalent character for the specified
* UTF-16 code point \a c.
*
* Case folding elimitates case differences, which is useful for
* case-insensitive string comparisons.
*
* We implement the \e simple \e case \e folding Unicode algorithm
* exclusively. For more information on case folding, see Section 3.13
* Default Case Algorithms in The Unicode Standard.
*/
static char_type ToCaseFolded( char_type c ) noexcept
{
if ( c < 256 )
{
if ( c >= 65 && c <= 90 || c >= 192 && c <= 214 || c >= 216 && c <= 222 )
return c + 32;
return c;
}
return PCL_ToCaseFolded( c );
}
/*!
* Returns the lowercase equivalent character for the specified UTF-16 code
* point \a c.
*/
static char_type ToLowercase( char_type c ) noexcept
{
if ( c < 256 )
{
if ( c >= 65 && c <= 90 || c >= 192 && c <= 214 || c >= 216 && c <= 222 )
return c + 32;
return c;
}
return PCL_ToLowercase( c );
}
/*!
* Returns the uppercase equivalent character for the specified UTF-16 code
* point \a c.
*/
static char_type ToUppercase( char_type c ) noexcept
{
if ( c < 256 )
{
if ( c >= 97 && c <= 122 || c >= 224 && c <= 246 || c >= 248 && c <= 254 )
return c - 32;
return c;
}
return PCL_ToUppercase( c );
}
/*!
* Transforms a string to case folded.
*/
static void ToCaseFolded( char_type* s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToCaseFolded( *s );
}
/*!
* Transforms a string to lowercase.
*/
static void ToLowercase( char_type* s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToLowercase( *s );
}
/*!
* Transforms a string to uppercase.
*/
static void ToUppercase( char_type* s, size_type n ) noexcept
{
PCL_PRECONDITION( n == 0 || s != nullptr )
for ( ; n > 0; --n, ++s )
*s = ToUppercase( *s );
}
/*!
* Returns true iff the specified UTF-16 character is a <em>high
* surrogate</em> (the most significant word of a <em>surrogate pair</em>
* forming a UTF-32 code point). High surrogates have values between 0xD800
* and 0xDBFF.
*/
static constexpr bool IsHighSurrogate( char_type c16 ) noexcept
{
return (c16 & 0xFC00) == 0xD800;
}
/*!
* Returns the <em>high surrogate</em> word of a UTF-32 code point. The
* specified UTF-32 code must be in the range from 0x010000 to 0x10FFFF,
* since surrogates only exist outside the Basic Multilingual Plane of
* Unicode.
*/
static constexpr char_type HighSurrogate( char32_type c32 ) noexcept
{
return char_type( (c32 >> 10) + 0xD7C0 );
}
/*!
* Returns true iff the specified UTF-16 character is a <em>low
* surrogate</em> (the least significant word of a <em>surrogate pair</em>
* forming a UTF-32 code point). Low surrogates have values between 0xDC00
* and 0xDFFF.
*/
static constexpr bool IsLowSurrogate( char_type c16 ) noexcept
{
return (c16 & 0xFC00) == 0xDC00;
}
/*!
* Returns the <em>low surrogate</em> word of a UTF-32 code point. The
* specified UTF-32 code must be in the range from 0x010000 to 0x10FFFF,
* since surrogates only exist outside the Basic Multilingual Plane of
* Unicode.
*/
static constexpr char_type LowSurrogate( char32_type c32 ) noexcept
{
return char_type( (c32%0x400) + 0xDC00 );
}
/*!
* Returns a UTF-32 code point from its <em>surrogate pair</em>. The
* specified surrogate words must pertain to a valid Unicode code point
* outside the Basic Multilingual Plane (from 0x010000 to 0x10FFFF).
*/
static constexpr char32_type SurrogatePairToUTF32( char_type high, char_type low ) noexcept
{
return (char32_type( high ) << 10) + low - 0x035FDC00;
}
};
// ----------------------------------------------------------------------------
} // pcl
#endif // __PCL_CharTraits_h
// ----------------------------------------------------------------------------
// EOF pcl/CharTraits.h - Released 2022-03-12T18:59:29Z