Файловый менеджер - Редактировать - /home/skymarketplace/public_html/uploads/pcre2lib.tar
Назад
config.h 0000644 00000007064 15004461075 0006173 0 ustar 00 #include <php_compat.h> #ifdef PHP_WIN32 # include <config.w32.h> #else # include <php_config.h> #endif #define SUPPORT_UNICODE 1 #define SUPPORT_PCRE2_8 1 #if defined(__GNUC__) && __GNUC__ >= 4 # ifdef __cplusplus # define PCRE2_EXP_DECL extern "C" __attribute__ ((visibility("default"))) # else # define PCRE2_EXP_DECL extern __attribute__ ((visibility("default"))) # endif # define PCRE2_EXP_DEFN __attribute__ ((visibility("default"))) #endif /* Define to any value for valgrind support to find invalid memory reads. */ #ifdef HAVE_PCRE_VALGRIND_SUPPORT #define SUPPORT_VALGRIND 1 #endif /* Define to any value to enable support for Just-In-Time compiling. */ #ifdef HAVE_PCRE_JIT_SUPPORT #define SUPPORT_JIT #endif /* This limits the amount of memory that pcre2_match() may use while matching a pattern. The value is in kilobytes. */ #ifndef HEAP_LIMIT #define HEAP_LIMIT 20000000 #endif /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system stack that is used while compiling a pattern. */ #ifndef PARENS_NEST_LIMIT #define PARENS_NEST_LIMIT 250 #endif /* The value of MATCH_LIMIT determines the default number of times the pcre2_match() function can record a backtrack position during a single matching attempt. There is a runtime interface for setting a different limit. The limit exists in order to catch runaway regular expressions that take for ever to determine that they do not match. The default is set very large so that it does not accidentally catch legitimate cases. */ #ifndef MATCH_LIMIT #define MATCH_LIMIT 10000000 #endif /* The above limit applies to all backtracks, whether or not they are nested. In some environments it is desirable to limit the nesting of backtracking (that is, the depth of tree that is searched) more strictly, in order to restrict the maximum amount of heap memory that is used. The value of MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it must be less than the value of MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is a runtime method for setting a different limit. */ #ifndef MATCH_LIMIT_DEPTH #define MATCH_LIMIT_DEPTH MATCH_LIMIT #endif /* This limit is parameterized just in case anybody ever wants to change it. Care must be taken if it is increased, because it guards against integer overflow caused by enormously large patterns. */ #ifndef MAX_NAME_COUNT #define MAX_NAME_COUNT 10000 #endif /* This limit is parameterized just in case anybody ever wants to change it. Care must be taken if it is increased, because it guards against integer overflow caused by enormously large patterns. */ #ifndef MAX_NAME_SIZE #define MAX_NAME_SIZE 32 #endif /* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ /* #undef NEVER_BACKSLASH_C */ /* The value of NEWLINE_DEFAULT determines the default newline character sequence. PCRE2 client programs can override this by selecting other values at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5 (ANYCRLF), and 6 (NUL). */ #ifndef NEWLINE_DEFAULT #define NEWLINE_DEFAULT 2 #endif /* The value of LINK_SIZE determines the number of bytes used to store links as offsets within the compiled regex. The default is 2, which allows for compiled patterns up to 64K long. This covers the vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This allows for longer patterns in extreme cases. */ #ifndef LINK_SIZE #define LINK_SIZE 2 #endif pcre2.h 0000644 00000134020 15004461075 0005732 0 ustar 00 /************************************************* * Perl-Compatible Regular Expressions * *************************************************/ /* This is the public header file for the PCRE library, second API, to be #included by applications that call PCRE2 functions. Copyright (c) 2016-2020 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ #ifndef PCRE2_H_IDEMPOTENT_GUARD #define PCRE2_H_IDEMPOTENT_GUARD /* The current PCRE version information. */ #define PCRE2_MAJOR 10 #define PCRE2_MINOR 35 #define PCRE2_PRERELEASE #define PCRE2_DATE 2020-05-09 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate export setting is defined in pcre2_internal.h, which includes this file. So we don't change existing definitions of PCRE2_EXP_DECL. */ #if defined(_WIN32) && !defined(PCRE2_STATIC) # ifndef PCRE2_EXP_DECL # define PCRE2_EXP_DECL extern __declspec(dllimport) # endif #endif /* By default, we use the standard "extern" declarations. */ #ifndef PCRE2_EXP_DECL # ifdef __cplusplus # define PCRE2_EXP_DECL extern "C" # else # define PCRE2_EXP_DECL extern # endif #endif /* When compiling with the MSVC compiler, it is sometimes necessary to include a "calling convention" before exported function names. (This is secondhand information; I know nothing about MSVC myself). For example, something like void __cdecl function(....) might be needed. In order so make this easy, all the exported functions have PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not set, we ensure here that it has no effect. */ #ifndef PCRE2_CALL_CONVENTION #define PCRE2_CALL_CONVENTION #endif /* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do not have stdint.h, which is why we use inttypes.h, which according to the C standard is a superset of stdint.h. If none of these headers are available, the relevant values must be provided by some other means. */ #include <limits.h> #include <stdlib.h> #include <inttypes.h> /* Allow for C++ users compiling this directly. */ #ifdef __cplusplus extern "C" { #endif /* The following option bits can be passed to pcre2_compile(), pcre2_match(), or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it is passed. Put these bits at the most significant end of the options word so others can be added next to them */ #define PCRE2_ANCHORED 0x80000000u #define PCRE2_NO_UTF_CHECK 0x40000000u #define PCRE2_ENDANCHORED 0x20000000u /* The following option bits can be passed only to pcre2_compile(). However, they may affect compilation, JIT compilation, and/or interpretive execution. The following tags indicate which: C alters what is compiled by pcre2_compile() J alters what is compiled by pcre2_jit_compile() M is inspected during pcre2_match() execution D is inspected during pcre2_dfa_match() execution */ #define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */ #define PCRE2_ALT_BSUX 0x00000002u /* C */ #define PCRE2_AUTO_CALLOUT 0x00000004u /* C */ #define PCRE2_CASELESS 0x00000008u /* C */ #define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */ #define PCRE2_DOTALL 0x00000020u /* C */ #define PCRE2_DUPNAMES 0x00000040u /* C */ #define PCRE2_EXTENDED 0x00000080u /* C */ #define PCRE2_FIRSTLINE 0x00000100u /* J M D */ #define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */ #define PCRE2_MULTILINE 0x00000400u /* C */ #define PCRE2_NEVER_UCP 0x00000800u /* C */ #define PCRE2_NEVER_UTF 0x00001000u /* C */ #define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */ #define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */ #define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */ #define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */ #define PCRE2_UCP 0x00020000u /* C J M D */ #define PCRE2_UNGREEDY 0x00040000u /* C */ #define PCRE2_UTF 0x00080000u /* C J M D */ #define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ #define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ #define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ #define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */ #define PCRE2_EXTENDED_MORE 0x01000000u /* C */ #define PCRE2_LITERAL 0x02000000u /* C */ #define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ /* An additional compile options word is available in the compile context. */ #define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */ #define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */ #define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */ #define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */ #define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ #define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ /* These are for pcre2_jit_compile(). */ #define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */ #define PCRE2_JIT_PARTIAL_SOFT 0x00000002u #define PCRE2_JIT_PARTIAL_HARD 0x00000004u #define PCRE2_JIT_INVALID_UTF 0x00000100u /* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and pcre2_substitute(). Some are allowed only for one of the functions, and in these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these functions (though pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */ #define PCRE2_NOTBOL 0x00000001u #define PCRE2_NOTEOL 0x00000002u #define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */ #define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */ #define PCRE2_PARTIAL_SOFT 0x00000010u #define PCRE2_PARTIAL_HARD 0x00000020u #define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */ #define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */ #define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */ #define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */ #define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */ #define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */ #define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */ #define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */ #define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u #define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */ #define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */ #define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */ /* Options for pcre2_pattern_convert(). */ #define PCRE2_CONVERT_UTF 0x00000001u #define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u #define PCRE2_CONVERT_POSIX_BASIC 0x00000004u #define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u #define PCRE2_CONVERT_GLOB 0x00000010u #define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u #define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u /* Newline and \R settings, for use in compile contexts. The newline values must be kept in step with values set in config.h and both sets must all be greater than zero. */ #define PCRE2_NEWLINE_CR 1 #define PCRE2_NEWLINE_LF 2 #define PCRE2_NEWLINE_CRLF 3 #define PCRE2_NEWLINE_ANY 4 #define PCRE2_NEWLINE_ANYCRLF 5 #define PCRE2_NEWLINE_NUL 6 #define PCRE2_BSR_UNICODE 1 #define PCRE2_BSR_ANYCRLF 2 /* Error codes for pcre2_compile(). Some of these are also used by pcre2_pattern_convert(). */ #define PCRE2_ERROR_END_BACKSLASH 101 #define PCRE2_ERROR_END_BACKSLASH_C 102 #define PCRE2_ERROR_UNKNOWN_ESCAPE 103 #define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104 #define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105 #define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106 #define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107 #define PCRE2_ERROR_CLASS_RANGE_ORDER 108 #define PCRE2_ERROR_QUANTIFIER_INVALID 109 #define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110 #define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111 #define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112 #define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113 #define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114 #define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115 #define PCRE2_ERROR_NULL_PATTERN 116 #define PCRE2_ERROR_BAD_OPTIONS 117 #define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118 #define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119 #define PCRE2_ERROR_PATTERN_TOO_LARGE 120 #define PCRE2_ERROR_HEAP_FAILED 121 #define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122 #define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123 #define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124 #define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125 #define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126 #define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127 #define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128 #define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129 #define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130 #define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131 #define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132 #define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133 #define PCRE2_ERROR_CODE_POINT_TOO_BIG 134 #define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135 #define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136 #define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137 #define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138 #define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139 #define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140 #define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141 #define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142 #define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143 #define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144 #define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145 #define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146 #define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147 #define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148 #define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149 #define PCRE2_ERROR_CLASS_INVALID_RANGE 150 #define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151 #define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152 #define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153 #define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154 #define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155 #define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156 #define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157 #define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158 /* Error 159 is obsolete and should now never occur */ #define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159 #define PCRE2_ERROR_VERB_UNKNOWN 160 #define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161 #define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162 #define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163 #define PCRE2_ERROR_INVALID_OCTAL 164 #define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165 #define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166 #define PCRE2_ERROR_INVALID_HEXADECIMAL 167 #define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168 #define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169 #define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170 #define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171 #define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172 #define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173 #define PCRE2_ERROR_UTF_IS_DISABLED 174 #define PCRE2_ERROR_UCP_IS_DISABLED 175 #define PCRE2_ERROR_VERB_NAME_TOO_LONG 176 #define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177 #define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178 #define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179 #define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180 #define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181 #define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182 #define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183 #define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184 #define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185 #define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186 #define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187 #define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188 #define PCRE2_ERROR_INTERNAL_BAD_CODE 189 #define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190 #define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191 #define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 #define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193 #define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194 #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 #define PCRE2_ERROR_TOO_MANY_CAPTURES 197 #define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 /* "Expected" matching error codes: no match and partial match. */ #define PCRE2_ERROR_NOMATCH (-1) #define PCRE2_ERROR_PARTIAL (-2) /* Error codes for UTF-8 validity checks */ #define PCRE2_ERROR_UTF8_ERR1 (-3) #define PCRE2_ERROR_UTF8_ERR2 (-4) #define PCRE2_ERROR_UTF8_ERR3 (-5) #define PCRE2_ERROR_UTF8_ERR4 (-6) #define PCRE2_ERROR_UTF8_ERR5 (-7) #define PCRE2_ERROR_UTF8_ERR6 (-8) #define PCRE2_ERROR_UTF8_ERR7 (-9) #define PCRE2_ERROR_UTF8_ERR8 (-10) #define PCRE2_ERROR_UTF8_ERR9 (-11) #define PCRE2_ERROR_UTF8_ERR10 (-12) #define PCRE2_ERROR_UTF8_ERR11 (-13) #define PCRE2_ERROR_UTF8_ERR12 (-14) #define PCRE2_ERROR_UTF8_ERR13 (-15) #define PCRE2_ERROR_UTF8_ERR14 (-16) #define PCRE2_ERROR_UTF8_ERR15 (-17) #define PCRE2_ERROR_UTF8_ERR16 (-18) #define PCRE2_ERROR_UTF8_ERR17 (-19) #define PCRE2_ERROR_UTF8_ERR18 (-20) #define PCRE2_ERROR_UTF8_ERR19 (-21) #define PCRE2_ERROR_UTF8_ERR20 (-22) #define PCRE2_ERROR_UTF8_ERR21 (-23) /* Error codes for UTF-16 validity checks */ #define PCRE2_ERROR_UTF16_ERR1 (-24) #define PCRE2_ERROR_UTF16_ERR2 (-25) #define PCRE2_ERROR_UTF16_ERR3 (-26) /* Error codes for UTF-32 validity checks */ #define PCRE2_ERROR_UTF32_ERR1 (-27) #define PCRE2_ERROR_UTF32_ERR2 (-28) /* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction functions, context functions, and serializing functions. They are in numerical order. Originally they were in alphabetical order too, but now that PCRE2 is released, the numbers must not be changed. */ #define PCRE2_ERROR_BADDATA (-29) #define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */ #define PCRE2_ERROR_BADMAGIC (-31) #define PCRE2_ERROR_BADMODE (-32) #define PCRE2_ERROR_BADOFFSET (-33) #define PCRE2_ERROR_BADOPTION (-34) #define PCRE2_ERROR_BADREPLACEMENT (-35) #define PCRE2_ERROR_BADUTFOFFSET (-36) #define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */ #define PCRE2_ERROR_DFA_BADRESTART (-38) #define PCRE2_ERROR_DFA_RECURSE (-39) #define PCRE2_ERROR_DFA_UCOND (-40) #define PCRE2_ERROR_DFA_UFUNC (-41) #define PCRE2_ERROR_DFA_UITEM (-42) #define PCRE2_ERROR_DFA_WSSIZE (-43) #define PCRE2_ERROR_INTERNAL (-44) #define PCRE2_ERROR_JIT_BADOPTION (-45) #define PCRE2_ERROR_JIT_STACKLIMIT (-46) #define PCRE2_ERROR_MATCHLIMIT (-47) #define PCRE2_ERROR_NOMEMORY (-48) #define PCRE2_ERROR_NOSUBSTRING (-49) #define PCRE2_ERROR_NOUNIQUESUBSTRING (-50) #define PCRE2_ERROR_NULL (-51) #define PCRE2_ERROR_RECURSELOOP (-52) #define PCRE2_ERROR_DEPTHLIMIT (-53) #define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */ #define PCRE2_ERROR_UNAVAILABLE (-54) #define PCRE2_ERROR_UNSET (-55) #define PCRE2_ERROR_BADOFFSETLIMIT (-56) #define PCRE2_ERROR_BADREPESCAPE (-57) #define PCRE2_ERROR_REPMISSINGBRACE (-58) #define PCRE2_ERROR_BADSUBSTITUTION (-59) #define PCRE2_ERROR_BADSUBSPATTERN (-60) #define PCRE2_ERROR_TOOMANYREPLACE (-61) #define PCRE2_ERROR_BADSERIALIZEDDATA (-62) #define PCRE2_ERROR_HEAPLIMIT (-63) #define PCRE2_ERROR_CONVERT_SYNTAX (-64) #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) #define PCRE2_ERROR_DFA_UINVALID_UTF (-66) /* Request types for pcre2_pattern_info() */ #define PCRE2_INFO_ALLOPTIONS 0 #define PCRE2_INFO_ARGOPTIONS 1 #define PCRE2_INFO_BACKREFMAX 2 #define PCRE2_INFO_BSR 3 #define PCRE2_INFO_CAPTURECOUNT 4 #define PCRE2_INFO_FIRSTCODEUNIT 5 #define PCRE2_INFO_FIRSTCODETYPE 6 #define PCRE2_INFO_FIRSTBITMAP 7 #define PCRE2_INFO_HASCRORLF 8 #define PCRE2_INFO_JCHANGED 9 #define PCRE2_INFO_JITSIZE 10 #define PCRE2_INFO_LASTCODEUNIT 11 #define PCRE2_INFO_LASTCODETYPE 12 #define PCRE2_INFO_MATCHEMPTY 13 #define PCRE2_INFO_MATCHLIMIT 14 #define PCRE2_INFO_MAXLOOKBEHIND 15 #define PCRE2_INFO_MINLENGTH 16 #define PCRE2_INFO_NAMECOUNT 17 #define PCRE2_INFO_NAMEENTRYSIZE 18 #define PCRE2_INFO_NAMETABLE 19 #define PCRE2_INFO_NEWLINE 20 #define PCRE2_INFO_DEPTHLIMIT 21 #define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */ #define PCRE2_INFO_SIZE 22 #define PCRE2_INFO_HASBACKSLASHC 23 #define PCRE2_INFO_FRAMESIZE 24 #define PCRE2_INFO_HEAPLIMIT 25 #define PCRE2_INFO_EXTRAOPTIONS 26 /* Request types for pcre2_config(). */ #define PCRE2_CONFIG_BSR 0 #define PCRE2_CONFIG_JIT 1 #define PCRE2_CONFIG_JITTARGET 2 #define PCRE2_CONFIG_LINKSIZE 3 #define PCRE2_CONFIG_MATCHLIMIT 4 #define PCRE2_CONFIG_NEWLINE 5 #define PCRE2_CONFIG_PARENSLIMIT 6 #define PCRE2_CONFIG_DEPTHLIMIT 7 #define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */ #define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */ #define PCRE2_CONFIG_UNICODE 9 #define PCRE2_CONFIG_UNICODE_VERSION 10 #define PCRE2_CONFIG_VERSION 11 #define PCRE2_CONFIG_HEAPLIMIT 12 #define PCRE2_CONFIG_NEVER_BACKSLASH_C 13 #define PCRE2_CONFIG_COMPILED_WIDTHS 14 #define PCRE2_CONFIG_TABLES_LENGTH 15 /* Types for code units in patterns and subject strings. */ typedef uint8_t PCRE2_UCHAR8; typedef uint16_t PCRE2_UCHAR16; typedef uint32_t PCRE2_UCHAR32; typedef const PCRE2_UCHAR8 *PCRE2_SPTR8; typedef const PCRE2_UCHAR16 *PCRE2_SPTR16; typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; /* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2, including pattern offsets for errors and subject offsets after a match. We define special values to indicate zero-terminated strings and unset offsets in the offset vector (ovector). */ #define PCRE2_SIZE size_t #define PCRE2_SIZE_MAX SIZE_MAX #define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0) #define PCRE2_UNSET (~(PCRE2_SIZE)0) /* Generic types for opaque structures and JIT callback functions. These declarations are defined in a macro that is expanded for each width later. */ #define PCRE2_TYPES_LIST \ struct pcre2_real_general_context; \ typedef struct pcre2_real_general_context pcre2_general_context; \ \ struct pcre2_real_compile_context; \ typedef struct pcre2_real_compile_context pcre2_compile_context; \ \ struct pcre2_real_match_context; \ typedef struct pcre2_real_match_context pcre2_match_context; \ \ struct pcre2_real_convert_context; \ typedef struct pcre2_real_convert_context pcre2_convert_context; \ \ struct pcre2_real_code; \ typedef struct pcre2_real_code pcre2_code; \ \ struct pcre2_real_match_data; \ typedef struct pcre2_real_match_data pcre2_match_data; \ \ struct pcre2_real_jit_stack; \ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \ \ typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *); /* The structures for passing out data via callout functions. We use structures so that new fields can be added on the end in future versions, without changing the API of the function, thereby allowing old clients to work without modification. Define the generic versions in a macro; the width-specific versions are generated from this macro below. */ /* Flags for the callout_flags field. These are cleared after a callout. */ #define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */ #define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */ #define PCRE2_STRUCTURE_LIST \ typedef struct pcre2_callout_block { \ uint32_t version; /* Identifies version of block */ \ /* ------------------------ Version 0 ------------------------------- */ \ uint32_t callout_number; /* Number compiled into pattern */ \ uint32_t capture_top; /* Max current capture */ \ uint32_t capture_last; /* Most recently closed capture */ \ PCRE2_SIZE *offset_vector; /* The offset vector */ \ PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ PCRE2_SPTR subject; /* The subject being matched */ \ PCRE2_SIZE subject_length; /* The length of the subject */ \ PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \ PCRE2_SIZE current_position; /* Where we currently are in the subject */ \ PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ /* ------------------- Added for Version 1 -------------------------- */ \ PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ PCRE2_SPTR callout_string; /* String compiled into pattern */ \ /* ------------------- Added for Version 2 -------------------------- */ \ uint32_t callout_flags; /* See above for list */ \ /* ------------------------------------------------------------------ */ \ } pcre2_callout_block; \ \ typedef struct pcre2_callout_enumerate_block { \ uint32_t version; /* Identifies version of block */ \ /* ------------------------ Version 0 ------------------------------- */ \ PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ uint32_t callout_number; /* Number compiled into pattern */ \ PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ PCRE2_SPTR callout_string; /* String compiled into pattern */ \ /* ------------------------------------------------------------------ */ \ } pcre2_callout_enumerate_block; \ \ typedef struct pcre2_substitute_callout_block { \ uint32_t version; /* Identifies version of block */ \ /* ------------------------ Version 0 ------------------------------- */ \ PCRE2_SPTR input; /* Pointer to input subject string */ \ PCRE2_SPTR output; /* Pointer to output buffer */ \ PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \ PCRE2_SIZE *ovector; /* Pointer to current ovector */ \ uint32_t oveccount; /* Count of pairs set in ovector */ \ uint32_t subscount; /* Substitution number */ \ /* ------------------------------------------------------------------ */ \ } pcre2_substitute_callout_block; /* List the generic forms of all other functions in macros, which will be expanded for each width below. Start with functions that give general information. */ #define PCRE2_GENERAL_INFO_FUNCTIONS \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *); /* Functions for manipulating contexts. */ #define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ *pcre2_general_context_copy(pcre2_general_context *); \ PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ *pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \ void (*)(void *, void *), void *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_general_context_free(pcre2_general_context *); #define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ *pcre2_compile_context_copy(pcre2_compile_context *); \ PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ *pcre2_compile_context_create(pcre2_general_context *);\ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_compile_context_free(pcre2_compile_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_newline(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ int (*)(uint32_t, void *), void *); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ *pcre2_match_context_copy(pcre2_match_context *); \ PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ *pcre2_match_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_match_context_free(pcre2_match_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_callout(pcre2_match_context *, \ int (*)(pcre2_callout_block *, void *), void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_substitute_callout(pcre2_match_context *, \ int (*)(pcre2_substitute_callout_block *, void *), void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_recursion_memory_management(pcre2_match_context *, \ void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *); #define PCRE2_CONVERT_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ *pcre2_convert_context_copy(pcre2_convert_context *); \ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ *pcre2_convert_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_convert_context_free(pcre2_convert_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); /* Functions concerned with compiling a pattern to PCRE internal code. */ #define PCRE2_COMPILE_FUNCTIONS \ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ pcre2_compile_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_code_free(pcre2_code *); \ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ *pcre2_code_copy(const pcre2_code *); \ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ *pcre2_code_copy_with_tables(const pcre2_code *); /* Functions that give information about a compiled pattern. */ #define PCRE2_PATTERN_INFO_FUNCTIONS \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_callout_enumerate(const pcre2_code *, \ int (*)(pcre2_callout_enumerate_block *, void *), void *); /* Functions for running a match and inspecting the result. */ #define PCRE2_MATCH_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ *pcre2_match_data_create(uint32_t, pcre2_general_context *); \ PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ *pcre2_match_data_create_from_pattern(const pcre2_code *, \ pcre2_general_context *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ uint32_t, pcre2_match_data *, pcre2_match_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_match_data_free(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \ pcre2_get_mark(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ pcre2_get_match_data_size(pcre2_match_data *); \ PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \ pcre2_get_ovector_count(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ *pcre2_get_ovector_pointer(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ pcre2_get_startchar(pcre2_match_data *); /* Convenience functions for handling matched substrings. */ #define PCRE2_SUBSTRING_FUNCTIONS \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \ PCRE2_SIZE *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \ PCRE2_SIZE *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_substring_free(PCRE2_UCHAR *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \ PCRE2_SIZE *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \ PCRE2_SIZE *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \ PCRE2_SPTR *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_substring_list_free(PCRE2_SPTR *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); /* Functions for serializing / deserializing compiled patterns. */ #define PCRE2_SERIALIZE_FUNCTIONS \ PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \ PCRE2_SIZE *, pcre2_general_context *); \ PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \ pcre2_general_context *); \ PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ pcre2_serialize_get_number_of_codes(const uint8_t *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_serialize_free(uint8_t *); /* Convenience function for match + substitute. */ #define PCRE2_SUBSTITUTE_FUNCTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \ PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *); /* Functions for converting pattern source strings. */ #define PCRE2_CONVERT_FUNCTIONS \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \ PCRE2_SIZE *, pcre2_convert_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_converted_pattern_free(PCRE2_UCHAR *); /* Functions for JIT processing */ #define PCRE2_JIT_FUNCTIONS \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_jit_compile(pcre2_code *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ uint32_t, pcre2_match_data *, pcre2_match_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_jit_free_unused_memory(pcre2_general_context *); \ PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \ *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_jit_stack_free(pcre2_jit_stack *); /* Other miscellaneous functions. */ #define PCRE2_OTHER_FUNCTIONS \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \ *pcre2_maketables(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_maketables_free(pcre2_general_context *, const uint8_t *); /* Define macros that generate width-specific names from generic versions. The three-level macro scheme is necessary to get the macros expanded when we want them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for generating three versions of everything below. After that, PCRE2_SUFFIX will be re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as pcre2_compile are called by application code. */ #define PCRE2_JOIN(a,b) a ## b #define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b) #define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH) /* Data types */ #define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) #define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) #define pcre2_code PCRE2_SUFFIX(pcre2_code_) #define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) #define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) #define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) #define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_) #define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_) #define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_) #define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_) #define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) #define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) /* Data blocks */ #define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) #define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_) #define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_) #define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) #define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) #define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_) #define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_) #define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) /* Functions: the complete list in alphabetical order */ #define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_) #define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_) #define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_) #define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) #define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) #define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) #define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_) #define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_) #define pcre2_config PCRE2_SUFFIX(pcre2_config_) #define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_) #define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_) #define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_) #define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_) #define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_) #define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_) #define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) #define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) #define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) #define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) #define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_) #define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) #define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) #define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) #define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) #define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) #define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_) #define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_) #define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_) #define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) #define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) #define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_) #define pcre2_match PCRE2_SUFFIX(pcre2_match_) #define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) #define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) #define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) #define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) #define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) #define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_) #define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) #define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_) #define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_) #define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_) #define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_) #define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_) #define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) #define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_) #define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) #define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) #define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) #define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) #define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) #define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_) #define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_) #define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_) #define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_) #define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_) #define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_) #define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_) #define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_) #define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_) /* Keep this old function name for backwards compatibility */ #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) /* Keep this obsolete function for backwards compatibility: it is now a noop. */ #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) /* Now generate all three sets of width-specific structures and function prototypes. */ #define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \ PCRE2_TYPES_LIST \ PCRE2_STRUCTURE_LIST \ PCRE2_GENERAL_INFO_FUNCTIONS \ PCRE2_GENERAL_CONTEXT_FUNCTIONS \ PCRE2_COMPILE_CONTEXT_FUNCTIONS \ PCRE2_CONVERT_CONTEXT_FUNCTIONS \ PCRE2_CONVERT_FUNCTIONS \ PCRE2_MATCH_CONTEXT_FUNCTIONS \ PCRE2_COMPILE_FUNCTIONS \ PCRE2_PATTERN_INFO_FUNCTIONS \ PCRE2_MATCH_FUNCTIONS \ PCRE2_SUBSTRING_FUNCTIONS \ PCRE2_SERIALIZE_FUNCTIONS \ PCRE2_SUBSTITUTE_FUNCTION \ PCRE2_JIT_FUNCTIONS \ PCRE2_OTHER_FUNCTIONS #define PCRE2_LOCAL_WIDTH 8 PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS #undef PCRE2_LOCAL_WIDTH #define PCRE2_LOCAL_WIDTH 16 PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS #undef PCRE2_LOCAL_WIDTH #define PCRE2_LOCAL_WIDTH 32 PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS #undef PCRE2_LOCAL_WIDTH /* Undefine the list macros; they are no longer needed. */ #undef PCRE2_TYPES_LIST #undef PCRE2_STRUCTURE_LIST #undef PCRE2_GENERAL_INFO_FUNCTIONS #undef PCRE2_GENERAL_CONTEXT_FUNCTIONS #undef PCRE2_COMPILE_CONTEXT_FUNCTIONS #undef PCRE2_CONVERT_CONTEXT_FUNCTIONS #undef PCRE2_MATCH_CONTEXT_FUNCTIONS #undef PCRE2_COMPILE_FUNCTIONS #undef PCRE2_PATTERN_INFO_FUNCTIONS #undef PCRE2_MATCH_FUNCTIONS #undef PCRE2_SUBSTRING_FUNCTIONS #undef PCRE2_SERIALIZE_FUNCTIONS #undef PCRE2_SUBSTITUTE_FUNCTION #undef PCRE2_JIT_FUNCTIONS #undef PCRE2_OTHER_FUNCTIONS #undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS /* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make PCRE2_SUFFIX a no-op. Otherwise, generate an error. */ #undef PCRE2_SUFFIX #ifndef PCRE2_CODE_UNIT_WIDTH #error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h. #error Use 8, 16, or 32; or 0 for a multi-width application. #else /* PCRE2_CODE_UNIT_WIDTH is defined */ #if PCRE2_CODE_UNIT_WIDTH == 8 || \ PCRE2_CODE_UNIT_WIDTH == 16 || \ PCRE2_CODE_UNIT_WIDTH == 32 #define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH) #elif PCRE2_CODE_UNIT_WIDTH == 0 #undef PCRE2_JOIN #undef PCRE2_GLUE #define PCRE2_SUFFIX(a) a #else #error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32. #endif #endif /* PCRE2_CODE_UNIT_WIDTH is defined */ #ifdef __cplusplus } /* extern "C" */ #endif #endif /* PCRE2_H_IDEMPOTENT_GUARD */ /* End of pcre2.h */ pcre2_internal.h 0000644 00000262577 15004461075 0007651 0 ustar 00 /************************************************* * Perl-Compatible Regular Expressions * *************************************************/ /* PCRE2 is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge New API code Copyright (c) 2016-2020 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ #ifndef PCRE2_INTERNAL_H_IDEMPOTENT_GUARD #define PCRE2_INTERNAL_H_IDEMPOTENT_GUARD /* We do not support both EBCDIC and Unicode at the same time. The "configure" script prevents both being selected, but not everybody uses "configure". EBCDIC is only supported for the 8-bit library, but the check for this has to be later in this file, because the first part is not width-dependent, and is included by pcre2test.c with CODE_UNIT_WIDTH == 0. */ #if defined EBCDIC && defined SUPPORT_UNICODE #error The use of both EBCDIC and SUPPORT_UNICODE is not supported. #endif /* Standard C headers */ #include <ctype.h> #include <limits.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> /* Macros to make boolean values more obvious. The #ifndef is to pacify compiler warnings in environments where these macros are defined elsewhere. Unfortunately, there is no way to do the same for the typedef. */ typedef int BOOL; #ifndef FALSE #define FALSE 0 #define TRUE 1 #endif /* Valgrind (memcheck) support */ #ifdef SUPPORT_VALGRIND #include <valgrind/memcheck.h> #endif /* -ftrivial-auto-var-init support supports initializing all local variables to avoid some classes of bug, but this can cause an unacceptable slowdown for large on-stack arrays in hot functions. This macro lets us annotate such arrays. */ #ifdef HAVE_ATTRIBUTE_UNINITIALIZED #define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized)) #else #define PCRE2_KEEP_UNINITIALIZED #endif /* Older versions of MSVC lack snprintf(). This define allows for warning/error-free compilation and testing with MSVC compilers back to at least MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ #if defined(_MSC_VER) && (_MSC_VER < 1900) #define snprintf _snprintf #endif /* When compiling a DLL for Windows, the exported symbols have to be declared using some MS magic. I found some useful information on this web page: http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the information there, using __declspec(dllexport) without "extern" we have a definition; with "extern" we have a declaration. The settings here override the setting in pcre2.h (which is included below); it defines only PCRE2_EXP_DECL, which is all that is needed for applications (they just import the symbols). We use: PCRE2_EXP_DECL for declarations PCRE2_EXP_DEFN for definitions The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcre2test, which is an application, but needs to import this file in order to "peek" at internals, can #include pcre2.h first to get an application's-eye view. In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, special-purpose environments) might want to stick other stuff in front of exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN only if it is not already set. */ #ifndef PCRE2_EXP_DECL # ifdef _WIN32 # ifndef PCRE2_STATIC # define PCRE2_EXP_DECL extern __declspec(dllexport) # define PCRE2_EXP_DEFN __declspec(dllexport) # else # define PCRE2_EXP_DECL extern # define PCRE2_EXP_DEFN # endif # else # ifdef __cplusplus # define PCRE2_EXP_DECL extern "C" # else # define PCRE2_EXP_DECL extern # endif # ifndef PCRE2_EXP_DEFN # define PCRE2_EXP_DEFN PCRE2_EXP_DECL # endif # endif #endif /* Include the public PCRE2 header and the definitions of UCP character property values. This must follow the setting of PCRE2_EXP_DECL above. */ #include "pcre2.h" #include "pcre2_ucp.h" /* When PCRE2 is compiled as a C++ library, the subject pointer can be replaced with a custom type. This makes it possible, for example, to allow pcre2_match() to process subject strings that are discontinuous by using a smart pointer class. It must always be possible to inspect all of the subject string in pcre2_match() because of the way it backtracks. */ /* WARNING: This is as yet untested for PCRE2. */ #ifdef CUSTOM_SUBJECT_PTR #undef PCRE2_SPTR #define PCRE2_SPTR CUSTOM_SUBJECT_PTR #endif /* When checking for integer overflow in pcre2_compile(), we need to handle large integers. If a 64-bit integer type is available, we can use that. Otherwise we have to cast to double, which of course requires floating point arithmetic. Handle this by defining a macro for the appropriate type. */ #if defined INT64_MAX || defined int64_t #define INT64_OR_DOUBLE int64_t #else #define INT64_OR_DOUBLE double #endif /* External (in the C sense) functions and tables that are private to the libraries are always referenced using the PRIV macro. This makes it possible for pcre2test.c to include some of the source files from the libraries using a different PRIV definition to avoid name clashes. It also makes it clear in the code that a non-static object is being referenced. */ #ifndef PRIV #define PRIV(name) _pcre2_##name #endif /* When compiling for use with the Virtual Pascal compiler, these functions need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT option on the command line. */ #ifdef VPCOMPAT #define strlen(s) _strlen(s) #define strncmp(s1,s2,m) _strncmp(s1,s2,m) #define memcmp(s,c,n) _memcmp(s,c,n) #define memcpy(d,s,n) _memcpy(d,s,n) #define memmove(d,s,n) _memmove(d,s,n) #define memset(s,c,n) _memset(s,c,n) #else /* VPCOMPAT */ /* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define a macro that calls an emulating function. */ #ifndef HAVE_MEMMOVE #undef memmove /* Some systems may have a macro */ #define memmove(a, b, c) PRIV(memmove)(a, b, c) #endif /* not HAVE_MEMMOVE */ #endif /* not VPCOMPAT */ /* This is an unsigned int value that no UTF character can ever have, as Unicode doesn't go beyond 0x0010ffff. */ #define NOTACHAR 0xffffffff /* This is the largest valid UTF/Unicode code point. */ #define MAX_UTF_CODE_POINT 0x10ffff /* Compile-time positive error numbers (all except UTF errors, which are negative) start at this value. It should probably never be changed, in case some application is checking for specific numbers. There is a copy of this #define in pcre2posix.c (which now no longer includes this file). Ideally, a way of having a single definition should be found, but as the number is unlikely to change, this is not a pressing issue. The original reason for having a base other than 0 was to keep the absolute values of compile-time and run-time error numbers numerically different, but in the event the code does not rely on this. */ #define COMPILE_ERROR_BASE 100 /* The initial frames vector for remembering backtracking points in pcre2_match() is allocated on the system stack, of this size (bytes). The size must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends on the number of capturing parentheses) so 20KiB handles quite a few frames. A larger vector on the heap is obtained for patterns that need more frames. The maximum size of this can be limited. */ #define START_FRAMES_SIZE 20480 /* Similarly, for DFA matching, an initial internal workspace vector is allocated on the stack. */ #define DFA_START_RWS_SIZE 30720 /* Define the default BSR convention. */ #ifdef BSR_ANYCRLF #define BSR_DEFAULT PCRE2_BSR_ANYCRLF #else #define BSR_DEFAULT PCRE2_BSR_UNICODE #endif /* ---------------- Basic UTF-8 macros ---------------- */ /* These UTF-8 macros are always defined because they are used in pcre2test for handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library is not supported. */ /* Tests whether a UTF-8 code point needs extra bytes to decode. */ #define HASUTF8EXTRALEN(c) ((c) >= 0xc0) /* The following macros were originally written in the form of loops that used data from the tables whose names start with PRIV(utf8_table). They were rewritten by a user so as not to use loops, because in some environments this gives a significant performance advantage, and it seems never to do any harm. */ /* Base macro to pick up the remaining bytes of a UTF-8 character, not advancing the pointer. */ #define GETUTF8(c, eptr) \ { \ if ((c & 0x20u) == 0) \ c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ else if ((c & 0x10u) == 0) \ c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ else if ((c & 0x08u) == 0) \ c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ else if ((c & 0x04u) == 0) \ c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ (eptr[4] & 0x3fu); \ else \ c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ } /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing the pointer. */ #define GETUTF8INC(c, eptr) \ { \ if ((c & 0x20u) == 0) \ c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \ else if ((c & 0x10u) == 0) \ { \ c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \ eptr += 2; \ } \ else if ((c & 0x08u) == 0) \ { \ c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \ ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ eptr += 3; \ } \ else if ((c & 0x04u) == 0) \ { \ c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \ ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \ (eptr[3] & 0x3fu); \ eptr += 4; \ } \ else \ { \ c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \ ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \ ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \ eptr += 5; \ } \ } /* Base macro to pick up the remaining bytes of a UTF-8 character, not advancing the pointer, incrementing the length. */ #define GETUTF8LEN(c, eptr, len) \ { \ if ((c & 0x20u) == 0) \ { \ c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ len++; \ } \ else if ((c & 0x10u) == 0) \ { \ c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ len += 2; \ } \ else if ((c & 0x08u) == 0) \ {\ c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ len += 3; \ } \ else if ((c & 0x04u) == 0) \ { \ c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ (eptr[4] & 0x3fu); \ len += 4; \ } \ else \ {\ c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ len += 5; \ } \ } /* --------------- Whitespace macros ---------------- */ /* Tests for Unicode horizontal and vertical whitespace characters must check a number of different values. Using a switch statement for this generates the fastest code (no loop, no memory access), and there are several places in the interpreter code where this happens. In order to ensure that all the case lists remain in step, we use macros so that there is only one place where the lists are defined. These values are also required as lists in pcre2_compile.c when processing \h, \H, \v and \V in a character class. The lists are defined in pcre2_tables.c, but macros that define the values are here so that all the definitions are together. The lists must be in ascending character order, terminated by NOTACHAR (which is 0xffffffff). Any changes should ensure that the various macros are kept in step with each other. NOTE: The values also appear in pcre2_jit_compile.c. */ /* -------------- ASCII/Unicode environments -------------- */ #ifndef EBCDIC /* Character U+180E (Mongolian Vowel Separator) is not included in the list of spaces in the Unicode file PropList.txt, and Perl does not recognize it as a space. However, in many other sources it is listed as a space and has been in PCRE (both APIs) for a long time. */ #define HSPACE_LIST \ CHAR_HT, CHAR_SPACE, CHAR_NBSP, \ 0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \ 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \ NOTACHAR #define HSPACE_MULTIBYTE_CASES \ case 0x1680: /* OGHAM SPACE MARK */ \ case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ \ case 0x2000: /* EN QUAD */ \ case 0x2001: /* EM QUAD */ \ case 0x2002: /* EN SPACE */ \ case 0x2003: /* EM SPACE */ \ case 0x2004: /* THREE-PER-EM SPACE */ \ case 0x2005: /* FOUR-PER-EM SPACE */ \ case 0x2006: /* SIX-PER-EM SPACE */ \ case 0x2007: /* FIGURE SPACE */ \ case 0x2008: /* PUNCTUATION SPACE */ \ case 0x2009: /* THIN SPACE */ \ case 0x200A: /* HAIR SPACE */ \ case 0x202f: /* NARROW NO-BREAK SPACE */ \ case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ \ case 0x3000 /* IDEOGRAPHIC SPACE */ #define HSPACE_BYTE_CASES \ case CHAR_HT: \ case CHAR_SPACE: \ case CHAR_NBSP #define HSPACE_CASES \ HSPACE_BYTE_CASES: \ HSPACE_MULTIBYTE_CASES #define VSPACE_LIST \ CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR #define VSPACE_MULTIBYTE_CASES \ case 0x2028: /* LINE SEPARATOR */ \ case 0x2029 /* PARAGRAPH SEPARATOR */ #define VSPACE_BYTE_CASES \ case CHAR_LF: \ case CHAR_VT: \ case CHAR_FF: \ case CHAR_CR: \ case CHAR_NEL #define VSPACE_CASES \ VSPACE_BYTE_CASES: \ VSPACE_MULTIBYTE_CASES /* -------------- EBCDIC environments -------------- */ #else #define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR #define HSPACE_BYTE_CASES \ case CHAR_HT: \ case CHAR_SPACE: \ case CHAR_NBSP #define HSPACE_CASES HSPACE_BYTE_CASES #ifdef EBCDIC_NL25 #define VSPACE_LIST \ CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR #else #define VSPACE_LIST \ CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR #endif #define VSPACE_BYTE_CASES \ case CHAR_LF: \ case CHAR_VT: \ case CHAR_FF: \ case CHAR_CR: \ case CHAR_NEL #define VSPACE_CASES VSPACE_BYTE_CASES #endif /* EBCDIC */ /* -------------- End of whitespace macros -------------- */ /* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF, "any" and "anycrlf" at present). The following macros are used to package up testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to indicate in which datablock the parameters exist, and what the start/end of string field names are. */ #define NLTYPE_FIXED 0 /* Newline is a fixed length string */ #define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ #define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ /* This macro checks for a newline at the given position */ #define IS_NEWLINE(p) \ ((NLBLOCK->nltype != NLTYPE_FIXED)? \ ((p) < NLBLOCK->PSEND && \ PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \ &(NLBLOCK->nllen), utf)) \ : \ ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ UCHAR21TEST(p) == NLBLOCK->nl[0] && \ (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \ ) \ ) /* This macro checks for a newline immediately preceding the given position */ #define WAS_NEWLINE(p) \ ((NLBLOCK->nltype != NLTYPE_FIXED)? \ ((p) > NLBLOCK->PSSTART && \ PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ &(NLBLOCK->nllen), utf)) \ : \ ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \ (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \ ) \ ) /* Private flags containing information about the compiled pattern. The first three must not be changed, because whichever is set is actually the number of bytes in a code unit in that mode. */ #define PCRE2_MODE8 0x00000001 /* compiled in 8 bit mode */ #define PCRE2_MODE16 0x00000002 /* compiled in 16 bit mode */ #define PCRE2_MODE32 0x00000004 /* compiled in 32 bit mode */ #define PCRE2_FIRSTSET 0x00000010 /* first_code unit is set */ #define PCRE2_FIRSTCASELESS 0x00000020 /* caseless first code unit */ #define PCRE2_FIRSTMAPSET 0x00000040 /* bitmap of first code units is set */ #define PCRE2_LASTSET 0x00000080 /* last code unit is set */ #define PCRE2_LASTCASELESS 0x00000100 /* caseless last code unit */ #define PCRE2_STARTLINE 0x00000200 /* start after \n for multiline */ #define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */ #define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ #define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */ #define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */ #define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */ #define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ #define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */ #define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */ #define PCRE2_DEREF_TABLES 0x00040000 /* release character tables */ #define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ #define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ #define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ #define PCRE2_HASBKC 0x00400000 /* contains \C */ #define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) /* Values for the matchedby field in a match data block. */ enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */ PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */ PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */ /* Values for the flags field in a match data block. */ #define PCRE2_MD_COPIED_SUBJECT 0x01u /* Magic number to provide a small check against being handed junk. */ #define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ /* The maximum remaining length of subject we are prepared to search for a req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is much faster than the search loop that has to be used in 16-bit and 32-bit modes. */ #if PCRE2_CODE_UNIT_WIDTH == 8 #define REQ_CU_MAX 5000 #else #define REQ_CU_MAX 2000 #endif /* Offsets for the bitmap tables in the cbits set of tables. Each table contains a set of bits for a class map. Some classes are built by combining these tables. */ #define cbit_space 0 /* [:space:] or \s */ #define cbit_xdigit 32 /* [:xdigit:] */ #define cbit_digit 64 /* [:digit:] or \d */ #define cbit_upper 96 /* [:upper:] */ #define cbit_lower 128 /* [:lower:] */ #define cbit_word 160 /* [:word:] or \w */ #define cbit_graph 192 /* [:graph:] */ #define cbit_print 224 /* [:print:] */ #define cbit_punct 256 /* [:punct:] */ #define cbit_cntrl 288 /* [:cntrl:] */ #define cbit_length 320 /* Length of the cbits table */ /* Bit definitions for entries in the ctypes table. Do not change these values without checking pcre2_jit_compile.c, which has an assertion to ensure that ctype_word has the value 16. */ #define ctype_space 0x01 #define ctype_letter 0x02 #define ctype_lcletter 0x04 #define ctype_digit 0x08 #define ctype_word 0x10 /* alphanumeric or '_' */ /* Offsets of the various tables from the base tables pointer, and total length of the tables. */ #define lcc_offset 0 /* Lower case */ #define fcc_offset 256 /* Flip case */ #define cbits_offset 512 /* Character classes */ #define ctypes_offset (cbits_offset + cbit_length) /* Character types */ #define TABLES_LENGTH (ctypes_offset + 256) /* -------------------- Character and string names ------------------------ */ /* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal character constants like '*' because the compiler would emit their EBCDIC code, which is different from their ASCII/UTF-8 code. Instead we define macros for the characters so that they always use the ASCII/UTF-8 code when UTF-8 support is enabled. When UTF-8 support is not enabled, the definitions use character literals. Both character and string versions of each character are needed, and there are some longer strings as well. This means that, on EBCDIC platforms, the PCRE2 library can handle either EBCDIC, or UTF-8, but not both. To support both in the same compiled library would need different lookups depending on whether PCRE2_UTF was set or not. This would make it impossible to use characters in switch/case statements, which would reduce performance. For a theoretical use (which nobody has asked for) in a minority area (EBCDIC platforms), this is not sensible. Any application that did need both could compile two versions of the library, using macros to give the functions distinct names. */ #ifndef SUPPORT_UNICODE /* UTF-8 support is not enabled; use the platform-dependent character literals so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF mode. Newline characters are problematic in EBCDIC. Though it has CR and LF characters, a common practice has been to use its NL (0x15) character as the line terminator in C-like processing environments. However, sometimes the LF (0x25) character is used instead, according to this Unicode document: http://unicode.org/standard/reports/tr13/tr13-5.html PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25 instead. Whichever is *not* chosen is defined as NEL. In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the same code point. */ #ifdef EBCDIC #ifndef EBCDIC_NL25 #define CHAR_NL '\x15' #define CHAR_NEL '\x25' #define STR_NL "\x15" #define STR_NEL "\x25" #else #define CHAR_NL '\x25' #define CHAR_NEL '\x15' #define STR_NL "\x25" #define STR_NEL "\x15" #endif #define CHAR_LF CHAR_NL #define STR_LF STR_NL #define CHAR_ESC '\047' #define CHAR_DEL '\007' #define CHAR_NBSP ((unsigned char)'\x41') #define STR_ESC "\047" #define STR_DEL "\007" #else /* Not EBCDIC */ /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for compatibility. NEL is the Unicode newline character; make sure it is a positive value. */ #define CHAR_LF '\n' #define CHAR_NL CHAR_LF #define CHAR_NEL ((unsigned char)'\x85') #define CHAR_ESC '\033' #define CHAR_DEL '\177' #define CHAR_NBSP ((unsigned char)'\xa0') #define STR_LF "\n" #define STR_NL STR_LF #define STR_NEL "\x85" #define STR_ESC "\033" #define STR_DEL "\177" #endif /* EBCDIC */ /* The remaining definitions work in both environments. */ #define CHAR_NUL '\0' #define CHAR_HT '\t' #define CHAR_VT '\v' #define CHAR_FF '\f' #define CHAR_CR '\r' #define CHAR_BS '\b' #define CHAR_BEL '\a' #define CHAR_SPACE ' ' #define CHAR_EXCLAMATION_MARK '!' #define CHAR_QUOTATION_MARK '"' #define CHAR_NUMBER_SIGN '#' #define CHAR_DOLLAR_SIGN '$' #define CHAR_PERCENT_SIGN '%' #define CHAR_AMPERSAND '&' #define CHAR_APOSTROPHE '\'' #define CHAR_LEFT_PARENTHESIS '(' #define CHAR_RIGHT_PARENTHESIS ')' #define CHAR_ASTERISK '*' #define CHAR_PLUS '+' #define CHAR_COMMA ',' #define CHAR_MINUS '-' #define CHAR_DOT '.' #define CHAR_SLASH '/' #define CHAR_0 '0' #define CHAR_1 '1' #define CHAR_2 '2' #define CHAR_3 '3' #define CHAR_4 '4' #define CHAR_5 '5' #define CHAR_6 '6' #define CHAR_7 '7' #define CHAR_8 '8' #define CHAR_9 '9' #define CHAR_COLON ':' #define CHAR_SEMICOLON ';' #define CHAR_LESS_THAN_SIGN '<' #define CHAR_EQUALS_SIGN '=' #define CHAR_GREATER_THAN_SIGN '>' #define CHAR_QUESTION_MARK '?' #define CHAR_COMMERCIAL_AT '@' #define CHAR_A 'A' #define CHAR_B 'B' #define CHAR_C 'C' #define CHAR_D 'D' #define CHAR_E 'E' #define CHAR_F 'F' #define CHAR_G 'G' #define CHAR_H 'H' #define CHAR_I 'I' #define CHAR_J 'J' #define CHAR_K 'K' #define CHAR_L 'L' #define CHAR_M 'M' #define CHAR_N 'N' #define CHAR_O 'O' #define CHAR_P 'P' #define CHAR_Q 'Q' #define CHAR_R 'R' #define CHAR_S 'S' #define CHAR_T 'T' #define CHAR_U 'U' #define CHAR_V 'V' #define CHAR_W 'W' #define CHAR_X 'X' #define CHAR_Y 'Y' #define CHAR_Z 'Z' #define CHAR_LEFT_SQUARE_BRACKET '[' #define CHAR_BACKSLASH '\\' #define CHAR_RIGHT_SQUARE_BRACKET ']' #define CHAR_CIRCUMFLEX_ACCENT '^' #define CHAR_UNDERSCORE '_' #define CHAR_GRAVE_ACCENT '`' #define CHAR_a 'a' #define CHAR_b 'b' #define CHAR_c 'c' #define CHAR_d 'd' #define CHAR_e 'e' #define CHAR_f 'f' #define CHAR_g 'g' #define CHAR_h 'h' #define CHAR_i 'i' #define CHAR_j 'j' #define CHAR_k 'k' #define CHAR_l 'l' #define CHAR_m 'm' #define CHAR_n 'n' #define CHAR_o 'o' #define CHAR_p 'p' #define CHAR_q 'q' #define CHAR_r 'r' #define CHAR_s 's' #define CHAR_t 't' #define CHAR_u 'u' #define CHAR_v 'v' #define CHAR_w 'w' #define CHAR_x 'x' #define CHAR_y 'y' #define CHAR_z 'z' #define CHAR_LEFT_CURLY_BRACKET '{' #define CHAR_VERTICAL_LINE '|' #define CHAR_RIGHT_CURLY_BRACKET '}' #define CHAR_TILDE '~' #define STR_HT "\t" #define STR_VT "\v" #define STR_FF "\f" #define STR_CR "\r" #define STR_BS "\b" #define STR_BEL "\a" #define STR_SPACE " " #define STR_EXCLAMATION_MARK "!" #define STR_QUOTATION_MARK "\"" #define STR_NUMBER_SIGN "#" #define STR_DOLLAR_SIGN "$" #define STR_PERCENT_SIGN "%" #define STR_AMPERSAND "&" #define STR_APOSTROPHE "'" #define STR_LEFT_PARENTHESIS "(" #define STR_RIGHT_PARENTHESIS ")" #define STR_ASTERISK "*" #define STR_PLUS "+" #define STR_COMMA "," #define STR_MINUS "-" #define STR_DOT "." #define STR_SLASH "/" #define STR_0 "0" #define STR_1 "1" #define STR_2 "2" #define STR_3 "3" #define STR_4 "4" #define STR_5 "5" #define STR_6 "6" #define STR_7 "7" #define STR_8 "8" #define STR_9 "9" #define STR_COLON ":" #define STR_SEMICOLON ";" #define STR_LESS_THAN_SIGN "<" #define STR_EQUALS_SIGN "=" #define STR_GREATER_THAN_SIGN ">" #define STR_QUESTION_MARK "?" #define STR_COMMERCIAL_AT "@" #define STR_A "A" #define STR_B "B" #define STR_C "C" #define STR_D "D" #define STR_E "E" #define STR_F "F" #define STR_G "G" #define STR_H "H" #define STR_I "I" #define STR_J "J" #define STR_K "K" #define STR_L "L" #define STR_M "M" #define STR_N "N" #define STR_O "O" #define STR_P "P" #define STR_Q "Q" #define STR_R "R" #define STR_S "S" #define STR_T "T" #define STR_U "U" #define STR_V "V" #define STR_W "W" #define STR_X "X" #define STR_Y "Y" #define STR_Z "Z" #define STR_LEFT_SQUARE_BRACKET "[" #define STR_BACKSLASH "\\" #define STR_RIGHT_SQUARE_BRACKET "]" #define STR_CIRCUMFLEX_ACCENT "^" #define STR_UNDERSCORE "_" #define STR_GRAVE_ACCENT "`" #define STR_a "a" #define STR_b "b" #define STR_c "c" #define STR_d "d" #define STR_e "e" #define STR_f "f" #define STR_g "g" #define STR_h "h" #define STR_i "i" #define STR_j "j" #define STR_k "k" #define STR_l "l" #define STR_m "m" #define STR_n "n" #define STR_o "o" #define STR_p "p" #define STR_q "q" #define STR_r "r" #define STR_s "s" #define STR_t "t" #define STR_u "u" #define STR_v "v" #define STR_w "w" #define STR_x "x" #define STR_y "y" #define STR_z "z" #define STR_LEFT_CURLY_BRACKET "{" #define STR_VERTICAL_LINE "|" #define STR_RIGHT_CURLY_BRACKET "}" #define STR_TILDE "~" #define STRING_ACCEPT0 "ACCEPT\0" #define STRING_COMMIT0 "COMMIT\0" #define STRING_F0 "F\0" #define STRING_FAIL0 "FAIL\0" #define STRING_MARK0 "MARK\0" #define STRING_PRUNE0 "PRUNE\0" #define STRING_SKIP0 "SKIP\0" #define STRING_THEN "THEN" #define STRING_atomic0 "atomic\0" #define STRING_pla0 "pla\0" #define STRING_plb0 "plb\0" #define STRING_napla0 "napla\0" #define STRING_naplb0 "naplb\0" #define STRING_nla0 "nla\0" #define STRING_nlb0 "nlb\0" #define STRING_sr0 "sr\0" #define STRING_asr0 "asr\0" #define STRING_positive_lookahead0 "positive_lookahead\0" #define STRING_positive_lookbehind0 "positive_lookbehind\0" #define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0" #define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0" #define STRING_negative_lookahead0 "negative_lookahead\0" #define STRING_negative_lookbehind0 "negative_lookbehind\0" #define STRING_script_run0 "script_run\0" #define STRING_atomic_script_run "atomic_script_run" #define STRING_alpha0 "alpha\0" #define STRING_lower0 "lower\0" #define STRING_upper0 "upper\0" #define STRING_alnum0 "alnum\0" #define STRING_ascii0 "ascii\0" #define STRING_blank0 "blank\0" #define STRING_cntrl0 "cntrl\0" #define STRING_digit0 "digit\0" #define STRING_graph0 "graph\0" #define STRING_print0 "print\0" #define STRING_punct0 "punct\0" #define STRING_space0 "space\0" #define STRING_word0 "word\0" #define STRING_xdigit "xdigit" #define STRING_DEFINE "DEFINE" #define STRING_VERSION "VERSION" #define STRING_WEIRD_STARTWORD "[:<:]]" #define STRING_WEIRD_ENDWORD "[:>:]]" #define STRING_CR_RIGHTPAR "CR)" #define STRING_LF_RIGHTPAR "LF)" #define STRING_CRLF_RIGHTPAR "CRLF)" #define STRING_ANY_RIGHTPAR "ANY)" #define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" #define STRING_NUL_RIGHTPAR "NUL)" #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" #define STRING_UTF8_RIGHTPAR "UTF8)" #define STRING_UTF16_RIGHTPAR "UTF16)" #define STRING_UTF32_RIGHTPAR "UTF32)" #define STRING_UTF_RIGHTPAR "UTF)" #define STRING_UCP_RIGHTPAR "UCP)" #define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)" #define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR "NO_DOTSTAR_ANCHOR)" #define STRING_NO_JIT_RIGHTPAR "NO_JIT)" #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" #define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)" #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" #define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP=" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_MARK "MARK" #else /* SUPPORT_UNICODE */ /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode only. */ #define CHAR_HT '\011' #define CHAR_VT '\013' #define CHAR_FF '\014' #define CHAR_CR '\015' #define CHAR_LF '\012' #define CHAR_NL CHAR_LF #define CHAR_NEL ((unsigned char)'\x85') #define CHAR_BS '\010' #define CHAR_BEL '\007' #define CHAR_ESC '\033' #define CHAR_DEL '\177' #define CHAR_NUL '\0' #define CHAR_SPACE '\040' #define CHAR_EXCLAMATION_MARK '\041' #define CHAR_QUOTATION_MARK '\042' #define CHAR_NUMBER_SIGN '\043' #define CHAR_DOLLAR_SIGN '\044' #define CHAR_PERCENT_SIGN '\045' #define CHAR_AMPERSAND '\046' #define CHAR_APOSTROPHE '\047' #define CHAR_LEFT_PARENTHESIS '\050' #define CHAR_RIGHT_PARENTHESIS '\051' #define CHAR_ASTERISK '\052' #define CHAR_PLUS '\053' #define CHAR_COMMA '\054' #define CHAR_MINUS '\055' #define CHAR_DOT '\056' #define CHAR_SLASH '\057' #define CHAR_0 '\060' #define CHAR_1 '\061' #define CHAR_2 '\062' #define CHAR_3 '\063' #define CHAR_4 '\064' #define CHAR_5 '\065' #define CHAR_6 '\066' #define CHAR_7 '\067' #define CHAR_8 '\070' #define CHAR_9 '\071' #define CHAR_COLON '\072' #define CHAR_SEMICOLON '\073' #define CHAR_LESS_THAN_SIGN '\074' #define CHAR_EQUALS_SIGN '\075' #define CHAR_GREATER_THAN_SIGN '\076' #define CHAR_QUESTION_MARK '\077' #define CHAR_COMMERCIAL_AT '\100' #define CHAR_A '\101' #define CHAR_B '\102' #define CHAR_C '\103' #define CHAR_D '\104' #define CHAR_E '\105' #define CHAR_F '\106' #define CHAR_G '\107' #define CHAR_H '\110' #define CHAR_I '\111' #define CHAR_J '\112' #define CHAR_K '\113' #define CHAR_L '\114' #define CHAR_M '\115' #define CHAR_N '\116' #define CHAR_O '\117' #define CHAR_P '\120' #define CHAR_Q '\121' #define CHAR_R '\122' #define CHAR_S '\123' #define CHAR_T '\124' #define CHAR_U '\125' #define CHAR_V '\126' #define CHAR_W '\127' #define CHAR_X '\130' #define CHAR_Y '\131' #define CHAR_Z '\132' #define CHAR_LEFT_SQUARE_BRACKET '\133' #define CHAR_BACKSLASH '\134' #define CHAR_RIGHT_SQUARE_BRACKET '\135' #define CHAR_CIRCUMFLEX_ACCENT '\136' #define CHAR_UNDERSCORE '\137' #define CHAR_GRAVE_ACCENT '\140' #define CHAR_a '\141' #define CHAR_b '\142' #define CHAR_c '\143' #define CHAR_d '\144' #define CHAR_e '\145' #define CHAR_f '\146' #define CHAR_g '\147' #define CHAR_h '\150' #define CHAR_i '\151' #define CHAR_j '\152' #define CHAR_k '\153' #define CHAR_l '\154' #define CHAR_m '\155' #define CHAR_n '\156' #define CHAR_o '\157' #define CHAR_p '\160' #define CHAR_q '\161' #define CHAR_r '\162' #define CHAR_s '\163' #define CHAR_t '\164' #define CHAR_u '\165' #define CHAR_v '\166' #define CHAR_w '\167' #define CHAR_x '\170' #define CHAR_y '\171' #define CHAR_z '\172' #define CHAR_LEFT_CURLY_BRACKET '\173' #define CHAR_VERTICAL_LINE '\174' #define CHAR_RIGHT_CURLY_BRACKET '\175' #define CHAR_TILDE '\176' #define CHAR_NBSP ((unsigned char)'\xa0') #define STR_HT "\011" #define STR_VT "\013" #define STR_FF "\014" #define STR_CR "\015" #define STR_NL "\012" #define STR_BS "\010" #define STR_BEL "\007" #define STR_ESC "\033" #define STR_DEL "\177" #define STR_SPACE "\040" #define STR_EXCLAMATION_MARK "\041" #define STR_QUOTATION_MARK "\042" #define STR_NUMBER_SIGN "\043" #define STR_DOLLAR_SIGN "\044" #define STR_PERCENT_SIGN "\045" #define STR_AMPERSAND "\046" #define STR_APOSTROPHE "\047" #define STR_LEFT_PARENTHESIS "\050" #define STR_RIGHT_PARENTHESIS "\051" #define STR_ASTERISK "\052" #define STR_PLUS "\053" #define STR_COMMA "\054" #define STR_MINUS "\055" #define STR_DOT "\056" #define STR_SLASH "\057" #define STR_0 "\060" #define STR_1 "\061" #define STR_2 "\062" #define STR_3 "\063" #define STR_4 "\064" #define STR_5 "\065" #define STR_6 "\066" #define STR_7 "\067" #define STR_8 "\070" #define STR_9 "\071" #define STR_COLON "\072" #define STR_SEMICOLON "\073" #define STR_LESS_THAN_SIGN "\074" #define STR_EQUALS_SIGN "\075" #define STR_GREATER_THAN_SIGN "\076" #define STR_QUESTION_MARK "\077" #define STR_COMMERCIAL_AT "\100" #define STR_A "\101" #define STR_B "\102" #define STR_C "\103" #define STR_D "\104" #define STR_E "\105" #define STR_F "\106" #define STR_G "\107" #define STR_H "\110" #define STR_I "\111" #define STR_J "\112" #define STR_K "\113" #define STR_L "\114" #define STR_M "\115" #define STR_N "\116" #define STR_O "\117" #define STR_P "\120" #define STR_Q "\121" #define STR_R "\122" #define STR_S "\123" #define STR_T "\124" #define STR_U "\125" #define STR_V "\126" #define STR_W "\127" #define STR_X "\130" #define STR_Y "\131" #define STR_Z "\132" #define STR_LEFT_SQUARE_BRACKET "\133" #define STR_BACKSLASH "\134" #define STR_RIGHT_SQUARE_BRACKET "\135" #define STR_CIRCUMFLEX_ACCENT "\136" #define STR_UNDERSCORE "\137" #define STR_GRAVE_ACCENT "\140" #define STR_a "\141" #define STR_b "\142" #define STR_c "\143" #define STR_d "\144" #define STR_e "\145" #define STR_f "\146" #define STR_g "\147" #define STR_h "\150" #define STR_i "\151" #define STR_j "\152" #define STR_k "\153" #define STR_l "\154" #define STR_m "\155" #define STR_n "\156" #define STR_o "\157" #define STR_p "\160" #define STR_q "\161" #define STR_r "\162" #define STR_s "\163" #define STR_t "\164" #define STR_u "\165" #define STR_v "\166" #define STR_w "\167" #define STR_x "\170" #define STR_y "\171" #define STR_z "\172" #define STR_LEFT_CURLY_BRACKET "\173" #define STR_VERTICAL_LINE "\174" #define STR_RIGHT_CURLY_BRACKET "\175" #define STR_TILDE "\176" #define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0" #define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0" #define STRING_F0 STR_F "\0" #define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0" #define STRING_MARK0 STR_M STR_A STR_R STR_K "\0" #define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0" #define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0" #define STRING_THEN STR_T STR_H STR_E STR_N #define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0" #define STRING_pla0 STR_p STR_l STR_a "\0" #define STRING_plb0 STR_p STR_l STR_b "\0" #define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0" #define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0" #define STRING_nla0 STR_n STR_l STR_a "\0" #define STRING_nlb0 STR_n STR_l STR_b "\0" #define STRING_sr0 STR_s STR_r "\0" #define STRING_asr0 STR_a STR_s STR_r "\0" #define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" #define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" #define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" #define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" #define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" #define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" #define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0" #define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n #define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" #define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0" #define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0" #define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0" #define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0" #define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0" #define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0" #define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0" #define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0" #define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0" #define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0" #define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0" #define STRING_word0 STR_w STR_o STR_r STR_d "\0" #define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t #define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E #define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N #define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET #define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET #define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS #define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS #define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS #define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS #define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS #define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS #define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS #define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS #define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_D STR_O STR_T STR_S STR_T STR_A STR_R STR_UNDERSCORE STR_A STR_N STR_C STR_H STR_O STR_R STR_RIGHT_PARENTHESIS #define STRING_NO_JIT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_J STR_I STR_T STR_RIGHT_PARENTHESIS #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS #define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_MARK STR_M STR_A STR_R STR_K #endif /* SUPPORT_UNICODE */ /* -------------------- End of character and string names -------------------*/ /* -------------------- Definitions for compiled patterns -------------------*/ /* Codes for different types of Unicode property */ #define PT_ANY 0 /* Any property - matches all chars */ #define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ #define PT_GC 2 /* Specified general characteristic (e.g. L) */ #define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ #define PT_SC 4 /* Script (e.g. Han) */ #define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ #define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */ #define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ #define PT_WORD 8 /* Word - L plus N plus underscore */ #define PT_CLIST 9 /* Pseudo-property: match character list */ #define PT_UCNC 10 /* Universal Character nameable character */ #define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ /* The following special properties are used only in XCLASS items, when POSIX classes are specified and PCRE2_UCP is set - in other words, for Unicode handling of these classes. They are not available via the \p or \P escapes like those in the above list, and so they do not take part in the autopossessifying table. */ #define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */ #define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */ #define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */ /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ #define XCL_NOT 0x01 /* Flag: this is a negative class */ #define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ #define XCL_HASPROP 0x04 /* Flag: property checks are present. */ #define XCL_END 0 /* Marks end of individual items */ #define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ #define XCL_RANGE 2 /* A range (two multibyte chars) follows */ #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 for a data character. In the escapes[] table in pcre2_compile.c their values are negated in order to distinguish them from data values. They must appear here in the same order as in the opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL mode rather than an escape sequence. It is also used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves like \N. Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in check_escape(). There are tests in the code for an escape greater than ESC_b and less than ESC_Z to detect the types that may be repeated. These are the types that consume characters. If any new escapes are put in between that don't consume a character, that code will have to change. */ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k }; /********************** Opcode definitions ******************/ /****** NOTE NOTE NOTE ****** Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in order to the list of escapes immediately above. Furthermore, values up to OP_DOLLM must not be changed without adjusting the table called autoposstab in pcre2_auto_possess.c. Whenever this list is updated, the two macro definitions that follow must be updated to match. The possessification table called "opcode_possessify" in pcre2_compile.c must also be updated, and also the tables called "coptable" and "poptable" in pcre2_dfa_match.c. ****** NOTE NOTE NOTE ******/ /* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive, are used in a table for deciding whether a repeated character type can be auto-possessified. */ #define FIRST_AUTOTAB_OP OP_NOT_DIGIT #define LAST_AUTOTAB_LEFT_OP OP_EXTUNI #define LAST_AUTOTAB_RIGHT_OP OP_DOLLM enum { OP_END, /* 0 End of pattern */ /* Values corresponding to backslashed metacharacters */ OP_SOD, /* 1 Start of data: \A */ OP_SOM, /* 2 Start of match (subject + offset): \G */ OP_SET_SOM, /* 3 Set start of match (\K) */ OP_NOT_WORD_BOUNDARY, /* 4 \B */ OP_WORD_BOUNDARY, /* 5 \b */ OP_NOT_DIGIT, /* 6 \D */ OP_DIGIT, /* 7 \d */ OP_NOT_WHITESPACE, /* 8 \S */ OP_WHITESPACE, /* 9 \s */ OP_NOT_WORDCHAR, /* 10 \W */ OP_WORDCHAR, /* 11 \w */ OP_ANY, /* 12 Match any character except newline (\N) */ OP_ALLANY, /* 13 Match any character */ OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */ OP_NOTPROP, /* 15 \P (not Unicode property) */ OP_PROP, /* 16 \p (Unicode property) */ OP_ANYNL, /* 17 \R (any newline sequence) */ OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */ OP_HSPACE, /* 19 \h (horizontal whitespace) */ OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */ OP_VSPACE, /* 21 \v (vertical whitespace) */ OP_EXTUNI, /* 22 \X (extended Unicode sequence */ OP_EODN, /* 23 End of data or \n at end of data (\Z) */ OP_EOD, /* 24 End of data (\z) */ /* Line end assertions */ OP_DOLL, /* 25 End of line - not multiline */ OP_DOLLM, /* 26 End of line - multiline */ OP_CIRC, /* 27 Start of line - not multiline */ OP_CIRCM, /* 28 Start of line - multiline */ /* Single characters; caseful must precede the caseless ones, and these must remain in this order, and adjacent. */ OP_CHAR, /* 29 Match one character, casefully */ OP_CHARI, /* 30 Match one character, caselessly */ OP_NOT, /* 31 Match one character, not the given one, casefully */ OP_NOTI, /* 32 Match one character, not the given one, caselessly */ /* The following sets of 13 opcodes must always be kept in step because the offset from the first one is used to generate the others. */ /* Repeated characters; caseful must precede the caseless ones */ OP_STAR, /* 33 The maximizing and minimizing versions of */ OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ OP_PLUS, /* 35 the minimizing one second. */ OP_MINPLUS, /* 36 */ OP_QUERY, /* 37 */ OP_MINQUERY, /* 38 */ OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/ OP_MINUPTO, /* 40 */ OP_EXACT, /* 41 Exactly n matches */ OP_POSSTAR, /* 42 Possessified star, caseful */ OP_POSPLUS, /* 43 Possessified plus, caseful */ OP_POSQUERY, /* 44 Posesssified query, caseful */ OP_POSUPTO, /* 45 Possessified upto, caseful */ /* Repeated characters; caseless must follow the caseful ones */ OP_STARI, /* 46 */ OP_MINSTARI, /* 47 */ OP_PLUSI, /* 48 */ OP_MINPLUSI, /* 49 */ OP_QUERYI, /* 50 */ OP_MINQUERYI, /* 51 */ OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */ OP_MINUPTOI, /* 53 */ OP_EXACTI, /* 54 */ OP_POSSTARI, /* 55 Possessified star, caseless */ OP_POSPLUSI, /* 56 Possessified plus, caseless */ OP_POSQUERYI, /* 57 Posesssified query, caseless */ OP_POSUPTOI, /* 58 Possessified upto, caseless */ /* The negated ones must follow the non-negated ones, and match them */ /* Negated repeated character, caseful; must precede the caseless ones */ OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ OP_NOTPLUS, /* 61 the minimizing one second. They must be in */ OP_NOTMINPLUS, /* 62 exactly the same order as those above. */ OP_NOTQUERY, /* 63 */ OP_NOTMINQUERY, /* 64 */ OP_NOTUPTO, /* 65 From 0 to n matches, caseful */ OP_NOTMINUPTO, /* 66 */ OP_NOTEXACT, /* 67 Exactly n matches */ OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */ OP_NOTPOSPLUS, /* 69 */ OP_NOTPOSQUERY, /* 70 */ OP_NOTPOSUPTO, /* 71 */ /* Negated repeated character, caseless; must follow the caseful ones */ OP_NOTSTARI, /* 72 */ OP_NOTMINSTARI, /* 73 */ OP_NOTPLUSI, /* 74 */ OP_NOTMINPLUSI, /* 75 */ OP_NOTQUERYI, /* 76 */ OP_NOTMINQUERYI, /* 77 */ OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */ OP_NOTMINUPTOI, /* 79 */ OP_NOTEXACTI, /* 80 Exactly n matches */ OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */ OP_NOTPOSPLUSI, /* 82 */ OP_NOTPOSQUERYI, /* 83 */ OP_NOTPOSUPTOI, /* 84 */ /* Character types */ OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */ OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */ OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */ OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */ OP_TYPEQUERY, /* 89 */ OP_TYPEMINQUERY, /* 90 */ OP_TYPEUPTO, /* 91 From 0 to n matches */ OP_TYPEMINUPTO, /* 92 */ OP_TYPEEXACT, /* 93 Exactly n matches */ OP_TYPEPOSSTAR, /* 94 Possessified versions */ OP_TYPEPOSPLUS, /* 95 */ OP_TYPEPOSQUERY, /* 96 */ OP_TYPEPOSUPTO, /* 97 */ /* These are used for character classes and back references; only the first six are the same as the sets above. */ OP_CRSTAR, /* 98 The maximizing and minimizing versions of */ OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */ OP_CRPLUS, /* 100 the minimizing one second. These codes must */ OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */ OP_CRQUERY, /* 102 */ OP_CRMINQUERY, /* 103 */ OP_CRRANGE, /* 104 These are different to the three sets above. */ OP_CRMINRANGE, /* 105 */ OP_CRPOSSTAR, /* 106 Possessified versions */ OP_CRPOSPLUS, /* 107 */ OP_CRPOSQUERY, /* 108 */ OP_CRPOSRANGE, /* 109 */ /* End of quantifier opcodes */ OP_CLASS, /* 110 Match a character class, chars < 256 only */ OP_NCLASS, /* 111 Same, but the bitmap was created from a negative class - the difference is relevant only when a character > 255 is encountered. */ OP_XCLASS, /* 112 Extended class for handling > 255 chars within the class. This does both positive and negative. */ OP_REF, /* 113 Match a back reference, casefully */ OP_REFI, /* 114 Match a back reference, caselessly */ OP_DNREF, /* 115 Match a duplicate name backref, casefully */ OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */ OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */ OP_CALLOUT, /* 118 Call out to external function if provided */ OP_CALLOUT_STR, /* 119 Call out with string argument */ OP_ALT, /* 120 Start of alternation */ OP_KET, /* 121 End of group that doesn't have an unbounded repeat */ OP_KETRMAX, /* 122 These two must remain together and in this */ OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */ OP_KETRPOS, /* 124 Possessive unlimited repeat. */ /* The assertions must come before BRA, CBRA, ONCE, and COND. */ OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */ OP_ASSERT, /* 126 Positive lookahead */ OP_ASSERT_NOT, /* 127 Negative lookahead */ OP_ASSERTBACK, /* 128 Positive lookbehind */ OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */ OP_ASSERT_NA, /* 130 Positive non-atomic lookahead */ OP_ASSERTBACK_NA, /* 131 Positive non-atomic lookbehind */ /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. The POS versions must immediately follow the non-POS versions in each case. */ OP_ONCE, /* 132 Atomic group, contains captures */ OP_SCRIPT_RUN, /* 133 Non-capture, but check characters' scripts */ OP_BRA, /* 134 Start of non-capturing bracket */ OP_BRAPOS, /* 135 Ditto, with unlimited, possessive repeat */ OP_CBRA, /* 136 Start of capturing bracket */ OP_CBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */ OP_COND, /* 138 Conditional group */ /* These five must follow the previous five, in the same order. There's a check for >= SBRA to distinguish the two sets. */ OP_SBRA, /* 139 Start of non-capturing bracket, check empty */ OP_SBRAPOS, /* 149 Ditto, with unlimited, possessive repeat */ OP_SCBRA, /* 141 Start of capturing bracket, check empty */ OP_SCBRAPOS, /* 142 Ditto, with unlimited, possessive repeat */ OP_SCOND, /* 143 Conditional group, check empty */ /* The next two pairs must (respectively) be kept together. */ OP_CREF, /* 144 Used to hold a capture number as condition */ OP_DNCREF, /* 145 Used to point to duplicate names as a condition */ OP_RREF, /* 146 Used to hold a recursion number as condition */ OP_DNRREF, /* 147 Used to point to duplicate names as a condition */ OP_FALSE, /* 148 Always false (used by DEFINE and VERSION) */ OP_TRUE, /* 149 Always true (used by VERSION) */ OP_BRAZERO, /* 150 These two must remain together and in this */ OP_BRAMINZERO, /* 151 order. */ OP_BRAPOSZERO, /* 152 */ /* These are backtracking control verbs */ OP_MARK, /* 153 always has an argument */ OP_PRUNE, /* 154 */ OP_PRUNE_ARG, /* 155 same, but with argument */ OP_SKIP, /* 156 */ OP_SKIP_ARG, /* 157 same, but with argument */ OP_THEN, /* 158 */ OP_THEN_ARG, /* 159 same, but with argument */ OP_COMMIT, /* 160 */ OP_COMMIT_ARG, /* 161 same, but with argument */ /* These are forced failure and success verbs. FAIL and ACCEPT do accept an argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL) without the need for a special opcode. */ OP_FAIL, /* 162 */ OP_ACCEPT, /* 163 */ OP_ASSERT_ACCEPT, /* 164 Used inside assertions */ OP_CLOSE, /* 165 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ OP_SKIPZERO, /* 166 */ /* This is used to identify a DEFINE group during compilation so that it can be checked for having only one branch. It is changed to OP_FALSE before compilation finishes. */ OP_DEFINE, /* 167 */ /* This is not an opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors - there have been some in the past. */ OP_TABLE_LENGTH }; /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro definitions that follow must also be updated to match. There are also tables called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in pcre2_dfa_match.c that must be updated. */ /* This macro defines textual names for all the opcodes. These are used only for debugging, and some of them are only partial names. The macro is referenced only in pcre2_printint.c, which fills out the full names in many cases (and in some cases doesn't actually use these names at all). */ #define OP_NAME_LIST \ "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \ "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ "extuni", "\\Z", "\\z", \ "$", "$", "^", "^", "char", "chari", "not", "noti", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", \ "*+","++", "?+", "{", \ "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ "Recurse", "Callout", "CalloutStr", \ "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ "Reverse", "Assert", "Assert not", \ "Assert back", "Assert back not", \ "Non-atomic assert", "Non-atomic assert back", \ "Once", \ "Script run", \ "Bra", "BraPos", "CBra", "CBraPos", \ "Cond", \ "SBra", "SBraPos", "SCBra", "SCBraPos", \ "SCond", \ "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \ "Cond false", "Cond true", \ "Brazero", "Braminzero", "Braposzero", \ "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \ "*ACCEPT", "*ASSERT_ACCEPT", \ "Close", "Skip zero", "Define" /* This macro defines the length of fixed length operations in the compiled regex. The lengths are used when searching for specific things, and also in the debugging printing of a compiled regex. We use a macro so that it can be defined close to the definitions of the opcodes themselves. As things have been extended, some of these are no longer fixed lenths, but are minima instead. For example, the length of a single-character repeat may vary in UTF-8 mode. The code that uses this table must know about such things. */ #define OP_LENGTHS \ 1, /* End */ \ 1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \ 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \ 1, 1, 1, /* Any, AllAny, Anybyte */ \ 3, 3, /* \P, \p */ \ 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ 1, /* \X */ \ 1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \ 2, /* Char - the minimum length */ \ 2, /* Chari - the minimum length */ \ 2, /* not */ \ 2, /* noti */ \ /* Positive single-char repeats ** These are */ \ 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \ 2+IMM2_SIZE, /* exact */ \ 2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \ 2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ 2+IMM2_SIZE, /* exact I */ \ 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ /* Negative single-char repeats - only for chars < 256 */ \ 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ 2+IMM2_SIZE, /* NOT exact */ \ 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ 2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ 2+IMM2_SIZE, /* NOT exact I */ \ 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ /* Positive type repeats */ \ 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ 2+IMM2_SIZE, /* Type exact */ \ 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ /* Character class & ref repeats */ \ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ 1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \ 1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \ 1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \ 0, /* XCLASS - variable length */ \ 1+IMM2_SIZE, /* REF */ \ 1+IMM2_SIZE, /* REFI */ \ 1+2*IMM2_SIZE, /* DNREF */ \ 1+2*IMM2_SIZE, /* DNREFI */ \ 1+LINK_SIZE, /* RECURSE */ \ 1+2*LINK_SIZE+1, /* CALLOUT */ \ 0, /* CALLOUT_STR - variable length */ \ 1+LINK_SIZE, /* Alt */ \ 1+LINK_SIZE, /* Ket */ \ 1+LINK_SIZE, /* KetRmax */ \ 1+LINK_SIZE, /* KetRmin */ \ 1+LINK_SIZE, /* KetRpos */ \ 1+LINK_SIZE, /* Reverse */ \ 1+LINK_SIZE, /* Assert */ \ 1+LINK_SIZE, /* Assert not */ \ 1+LINK_SIZE, /* Assert behind */ \ 1+LINK_SIZE, /* Assert behind not */ \ 1+LINK_SIZE, /* NA Assert */ \ 1+LINK_SIZE, /* NA Assert behind */ \ 1+LINK_SIZE, /* ONCE */ \ 1+LINK_SIZE, /* SCRIPT_RUN */ \ 1+LINK_SIZE, /* BRA */ \ 1+LINK_SIZE, /* BRAPOS */ \ 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ 1+LINK_SIZE, /* COND */ \ 1+LINK_SIZE, /* SBRA */ \ 1+LINK_SIZE, /* SBRAPOS */ \ 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ 1+LINK_SIZE, /* SCOND */ \ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \ 1, 1, /* FALSE, TRUE */ \ 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 1, 3, /* SKIP, SKIP_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \ 1, 3, /* COMMIT, COMMIT_ARG */ \ 1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \ 1 /* DEFINE */ /* A magic value for OP_RREF to indicate the "any recursion" condition. */ #define RREF_ANY 0xffff /* ---------- Private structures that are mode-independent. ---------- */ /* Structure to hold data for custom memory management. */ typedef struct pcre2_memctl { void * (*malloc)(size_t, void *); void (*free)(void *, void *); void *memory_data; } pcre2_memctl; /* Structure for building a chain of open capturing subpatterns during compiling, so that instructions to close them can be compiled when (*ACCEPT) is encountered. */ typedef struct open_capitem { struct open_capitem *next; /* Chain link */ uint16_t number; /* Capture number */ uint16_t assert_depth; /* Assertion depth when opened */ } open_capitem; /* Layout of the UCP type table that translates property names into types and codes. Each entry used to point directly to a name, but to reduce the number of relocations in shared libraries, it now has an offset into a single string instead. */ typedef struct { uint16_t name_offset; uint16_t type; uint16_t value; } ucp_type_table; /* Unicode character database (UCD) record format */ typedef struct { uint8_t script; /* ucp_Arabic, etc. */ uint8_t chartype; /* ucp_Cc, etc. (general categories) */ uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */ uint8_t caseset; /* offset to multichar other cases or zero */ int32_t other_case; /* offset to other case, or zero if none */ int16_t scriptx; /* script extension value */ int16_t dummy; /* spare - to round to multiple of 4 bytes */ } ucd_record; /* UCD access macros */ #define UCD_BLOCK_SIZE 128 #define REAL_GET_UCD(ch) (PRIV(ucd_records) + \ PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \ UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) #if PCRE2_CODE_UNIT_WIDTH == 32 #define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \ PRIV(dummy_ucd_record) : REAL_GET_UCD(ch)) #else #define GET_UCD(ch) REAL_GET_UCD(ch) #endif #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_SCRIPT(ch) GET_UCD(ch)->script #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] #define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop #define UCD_CASESET(ch) GET_UCD(ch)->caseset #define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) #define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx /* Header for serialized pcre2 codes. */ typedef struct pcre2_serialized_data { uint32_t magic; uint32_t version; uint32_t config; int32_t number_of_codes; } pcre2_serialized_data; /* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */ /* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as 0, so the following items are omitted. */ #if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0 /* EBCDIC is supported only for the 8-bit library. */ #if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8 #error EBCDIC is not supported for the 16-bit or 32-bit libraries #endif /* This is the largest non-UTF code point. */ #define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH)) /* Internal shared data tables and variables. These are used by more than one of the exported public functions. They have to be "external" in the C sense, but are not part of the PCRE2 public API. Although the data for some of them is identical in all libraries, they must have different names so that multiple libraries can be simultaneously linked to a single application. However, UTF-8 tables are needed only when compiling the 8-bit library. */ #if PCRE2_CODE_UNIT_WIDTH == 8 extern const int PRIV(utf8_table1)[]; extern const int PRIV(utf8_table1_size); extern const int PRIV(utf8_table2)[]; extern const int PRIV(utf8_table3)[]; extern const uint8_t PRIV(utf8_table4)[]; #endif #define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_) #define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_) #define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_) #define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_) #define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_) #define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_) #define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_) #if PCRE2_CODE_UNIT_WIDTH == 32 #define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_) #endif #define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_) #define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) #define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) #define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_) #define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_) #define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_) #define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_) #define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_) #define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_) #define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_) #define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_) #define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_) #define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_) #define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_) #define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_) extern const uint8_t PRIV(OP_lengths)[]; extern const uint32_t PRIV(callout_end_delims)[]; extern const uint32_t PRIV(callout_start_delims)[]; extern const pcre2_compile_context PRIV(default_compile_context); extern const pcre2_convert_context PRIV(default_convert_context); extern const pcre2_match_context PRIV(default_match_context); extern const uint8_t PRIV(default_tables)[]; extern const uint32_t PRIV(hspace_list)[]; extern const uint32_t PRIV(vspace_list)[]; extern const uint32_t PRIV(ucd_caseless_sets)[]; extern const uint32_t PRIV(ucd_digit_sets)[]; extern const uint8_t PRIV(ucd_script_sets)[]; extern const ucd_record PRIV(ucd_records)[]; #if PCRE2_CODE_UNIT_WIDTH == 32 extern const ucd_record PRIV(dummy_ucd_record)[]; #endif extern const uint16_t PRIV(ucd_stage1)[]; extern const uint16_t PRIV(ucd_stage2)[]; extern const uint32_t PRIV(ucp_gbtable)[]; extern const uint32_t PRIV(ucp_gentype)[]; #ifdef SUPPORT_JIT extern const int PRIV(ucp_typerange)[]; #endif extern const char *PRIV(unicode_version); extern const ucp_type_table PRIV(utt)[]; extern const char PRIV(utt_names)[]; extern const size_t PRIV(utt_size); /* Mode-dependent macros and hidden and private structures are defined in a separate file so that pcre2test can include them at all supported widths. When compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can include them at the appropriate width, after setting up suffix macros for the private structures. */ #define branch_chain PCRE2_SUFFIX(branch_chain_) #define compile_block PCRE2_SUFFIX(compile_block_) #define dfa_match_block PCRE2_SUFFIX(dfa_match_block_) #define match_block PCRE2_SUFFIX(match_block_) #define named_group PCRE2_SUFFIX(named_group_) #include "pcre2_intmodedep.h" /* Private "external" functions. These are internal functions that are called from modules other than the one in which they are defined. They have to be "external" in the C sense, but are not part of the PCRE2 public API. They are not referenced from pcre2test, and must not be defined when no code unit width is available. */ #define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_) #define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_) #define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_) #define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_) #define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_) #define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_) #define _pcre2_jit_free PCRE2_SUFFIX(_pcre2_jit_free_) #define _pcre2_jit_get_size PCRE2_SUFFIX(_pcre2_jit_get_size_) #define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_) #define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_) #define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_) #define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_) #define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_) #define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_) #define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_) #define _pcre2_strlen PCRE2_SUFFIX(_pcre2_strlen_) #define _pcre2_strncmp PCRE2_SUFFIX(_pcre2_strncmp_) #define _pcre2_strncmp_c8 PCRE2_SUFFIX(_pcre2_strncmp_c8_) #define _pcre2_study PCRE2_SUFFIX(_pcre2_study_) #define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_) #define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_) #define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_) extern int _pcre2_auto_possessify(PCRE2_UCHAR *, const compile_block *); extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *, int *, uint32_t, uint32_t, BOOL, compile_block *); extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR, BOOL, int *); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); extern void _pcre2_jit_free_rodata(void *, void *); extern void _pcre2_jit_free(void *, pcre2_memctl *); extern size_t _pcre2_jit_get_size(void *); const char * _pcre2_jit_get_target(void); extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *); extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *); extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL); extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR); extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *); extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *); extern PCRE2_SIZE _pcre2_strlen(PCRE2_SPTR); extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t); extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t); extern int _pcre2_study(pcre2_real_code *); extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *); extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); /* This function is needed only when memmove() is not available. */ #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) #define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove) extern void * _pcre2_memmove(void *, const void *, size_t); #endif #endif /* PCRE2_CODE_UNIT_WIDTH */ #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ /* End of pcre2_internal.h */ pcre2_intmodedep.h 0000644 00000110633 15004461075 0010146 0 ustar 00 /************************************************* * Perl-Compatible Regular Expressions * *************************************************/ /* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ /* This module contains mode-dependent macro and structure definitions. The file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined. These mode-dependent items are kept in a separate file so that they can also be #included multiple times for different code unit widths by pcre2test in order to have access to the hidden structures at all supported widths. Some of the mode-dependent macros are required at different widths for different parts of the pcre2test code (in particular, the included pcre_printint.c file). We undefine them here so that they can be re-defined for multiple inclusions. Not all of these are used in pcre2test, but it's easier just to undefine them all. */ #undef ACROSSCHAR #undef BACKCHAR #undef BYTES2CU #undef CHMAX_255 #undef CU2BYTES #undef FORWARDCHAR #undef FORWARDCHARTEST #undef GET #undef GET2 #undef GETCHAR #undef GETCHARINC #undef GETCHARINCTEST #undef GETCHARLEN #undef GETCHARLENTEST #undef GETCHARTEST #undef GET_EXTRALEN #undef HAS_EXTRALEN #undef IMM2_SIZE #undef MAX_255 #undef MAX_MARK #undef MAX_PATTERN_SIZE #undef MAX_UTF_SINGLE_CU #undef NOT_FIRSTCU #undef PUT #undef PUT2 #undef PUT2INC #undef PUTCHAR #undef PUTINC #undef TABLE_GET /* -------------------------- MACROS ----------------------------- */ /* PCRE keeps offsets in its compiled code as at least 16-bit quantities (always stored in big-endian order in 8-bit mode) by default. These are used, for example, to link from the start of a subpattern to its alternatives and its end. The use of 16 bits per offset limits the size of an 8-bit compiled regex to around 64K, which is big enough for almost everybody. However, I received a request for an even bigger limit. For this reason, and also to make the code easier to maintain, the storing and loading of offsets from the compiled code unit string is now handled by the macros that are defined here. The macros are controlled by the value of LINK_SIZE. This defaults to 2, but values of 3 or 4 are also supported. */ /* ------------------- 8-bit support ------------------ */ #if PCRE2_CODE_UNIT_WIDTH == 8 #if LINK_SIZE == 2 #define PUT(a,n,d) \ (a[n] = (PCRE2_UCHAR)((d) >> 8)), \ (a[(n)+1] = (PCRE2_UCHAR)((d) & 255)) #define GET(a,n) \ (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) #define MAX_PATTERN_SIZE (1 << 16) #elif LINK_SIZE == 3 #define PUT(a,n,d) \ (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ (a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \ (a[(n)+2] = (PCRE2_UCHAR)((d) & 255)) #define GET(a,n) \ (unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) #define MAX_PATTERN_SIZE (1 << 24) #elif LINK_SIZE == 4 #define PUT(a,n,d) \ (a[n] = (PCRE2_UCHAR)((d) >> 24)), \ (a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \ (a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \ (a[(n)+3] = (PCRE2_UCHAR)((d) & 255)) #define GET(a,n) \ (unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ #else #error LINK_SIZE must be 2, 3, or 4 #endif /* ------------------- 16-bit support ------------------ */ #elif PCRE2_CODE_UNIT_WIDTH == 16 #if LINK_SIZE == 2 #undef LINK_SIZE #define LINK_SIZE 1 #define PUT(a,n,d) \ (a[n] = (PCRE2_UCHAR)(d)) #define GET(a,n) \ (a[n]) #define MAX_PATTERN_SIZE (1 << 16) #elif LINK_SIZE == 3 || LINK_SIZE == 4 #undef LINK_SIZE #define LINK_SIZE 2 #define PUT(a,n,d) \ (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ (a[(n)+1] = (PCRE2_UCHAR)((d) & 65535)) #define GET(a,n) \ (unsigned int)(((a)[n] << 16) | (a)[(n)+1]) #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ #else #error LINK_SIZE must be 2, 3, or 4 #endif /* ------------------- 32-bit support ------------------ */ #elif PCRE2_CODE_UNIT_WIDTH == 32 #undef LINK_SIZE #define LINK_SIZE 1 #define PUT(a,n,d) \ (a[n] = (d)) #define GET(a,n) \ (a[n]) #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ #else #error Unsupported compiling mode #endif /* --------------- Other mode-specific macros ----------------- */ /* PCRE uses some other (at least) 16-bit quantities that do not change when the size of offsets changes. There are used for repeat counts and for other things such as capturing parenthesis numbers in back references. Define the number of code units required to hold a 16-bit count/offset, and macros to load and store such a value. For reasons that I do not understand, the expression in the 8-bit GET2 macro is treated by gcc as a signed expression, even when a is declared as unsigned. It seems that any kind of arithmetic results in a signed value. Hence the cast. */ #if PCRE2_CODE_UNIT_WIDTH == 8 #define IMM2_SIZE 2 #define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) #define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255 #else /* Code units are 16 or 32 bits */ #define IMM2_SIZE 1 #define GET2(a,n) a[n] #define PUT2(a,n,d) a[n] = d #endif /* Other macros that are different for 8-bit mode. The MAX_255 macro checks whether its argument, which is assumed to be one code unit, is less than 256. The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK name must fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro is used to access elements of tables containing exactly 256 items. Its argument is a code unit. When code points can be greater than 255, a check is needed before accessing these tables. */ #if PCRE2_CODE_UNIT_WIDTH == 8 #define MAX_255(c) TRUE #define MAX_MARK ((1u << 8) - 1) #define TABLE_GET(c, table, default) ((table)[c]) #ifdef SUPPORT_UNICODE #define SUPPORT_WIDE_CHARS #define CHMAX_255(c) ((c) <= 255u) #else #define CHMAX_255(c) TRUE #endif /* SUPPORT_UNICODE */ #else /* Code units are 16 or 32 bits */ #define CHMAX_255(c) ((c) <= 255u) #define MAX_255(c) ((c) <= 255u) #define MAX_MARK ((1u << 16) - 1) #define SUPPORT_WIDE_CHARS #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) #endif /* ----------------- Character-handling macros ----------------- */ /* There is a proposed future special "UTF-21" mode, in which only the lowest 21 bits of a 32-bit character are interpreted as UTF, with the remaining 11 high-order bits available to the application for other uses. In preparation for the future implementation of this mode, there are macros that load a data item and, if in this special mode, mask it to 21 bits. These macros all have names starting with UCHAR21. In all other modes, including the normal 32-bit library, the macros all have the same simple definitions. When the new mode is implemented, it is expected that these definitions will be varied appropriately using #ifdef when compiling the library that supports the special mode. */ #define UCHAR21(eptr) (*(eptr)) #define UCHAR21TEST(eptr) (*(eptr)) #define UCHAR21INC(eptr) (*(eptr)++) #define UCHAR21INCTEST(eptr) (*(eptr)++) /* When UTF encoding is being used, a character is no longer just a single byte in 8-bit mode or a single short in 16-bit mode. The macros for character handling generate simple sequences when used in the basic mode, and more complicated ones for UTF characters. GETCHARLENTEST and other macros are not used when UTF is not supported. To make sure they can never even appear when UTF support is omitted, we don't even define them. */ #ifndef SUPPORT_UNICODE /* #define MAX_UTF_SINGLE_CU */ /* #define HAS_EXTRALEN(c) */ /* #define GET_EXTRALEN(c) */ /* #define NOT_FIRSTCU(c) */ #define GETCHAR(c, eptr) c = *eptr; #define GETCHARTEST(c, eptr) c = *eptr; #define GETCHARINC(c, eptr) c = *eptr++; #define GETCHARINCTEST(c, eptr) c = *eptr++; #define GETCHARLEN(c, eptr, len) c = *eptr; #define PUTCHAR(c, p) (*p = c, 1) /* #define GETCHARLENTEST(c, eptr, len) */ /* #define BACKCHAR(eptr) */ /* #define FORWARDCHAR(eptr) */ /* #define FORWARCCHARTEST(eptr,end) */ /* #define ACROSSCHAR(condition, eptr, action) */ #else /* SUPPORT_UNICODE */ /* ------------------- 8-bit support ------------------ */ #if PCRE2_CODE_UNIT_WIDTH == 8 #define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ /* The largest UTF code point that can be encoded as a single code unit. */ #define MAX_UTF_SINGLE_CU 127 /* Tests whether the code point needs extra characters to decode. */ #define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c) /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. Otherwise it has an undefined behaviour. */ #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu]) /* Returns TRUE, if the given value is not the first code unit of a UTF sequence. */ #define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u) /* Get the next UTF-8 character, not advancing the pointer. This is called when we know we are in UTF-8 mode. */ #define GETCHAR(c, eptr) \ c = *eptr; \ if (c >= 0xc0u) GETUTF8(c, eptr); /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the pointer. */ #define GETCHARTEST(c, eptr) \ c = *eptr; \ if (utf && c >= 0xc0u) GETUTF8(c, eptr); /* Get the next UTF-8 character, advancing the pointer. This is called when we know we are in UTF-8 mode. */ #define GETCHARINC(c, eptr) \ c = *eptr++; \ if (c >= 0xc0u) GETUTF8INC(c, eptr); /* Get the next character, testing for UTF-8 mode, and advancing the pointer. This is called when we don't know if we are in UTF-8 mode. */ #define GETCHARINCTEST(c, eptr) \ c = *eptr++; \ if (utf && c >= 0xc0u) GETUTF8INC(c, eptr); /* Get the next UTF-8 character, not advancing the pointer, incrementing length if there are extra bytes. This is called when we know we are in UTF-8 mode. */ #define GETCHARLEN(c, eptr, len) \ c = *eptr; \ if (c >= 0xc0u) GETUTF8LEN(c, eptr, len); /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the pointer, incrementing length if there are extra bytes. This is called when we do not know if we are in UTF-8 mode. */ #define GETCHARLENTEST(c, eptr, len) \ c = *eptr; \ if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len); /* If the pointer is not at the start of a character, move it back until it is. This is called only in UTF-8 mode - we don't put a test within the macro because almost all calls are already within a block of UTF-8 only code. */ #define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr-- /* Same as above, just in the other direction. */ #define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++ #define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++ /* Same as above, but it allows a fully customizable form. */ #define ACROSSCHAR(condition, eptr, action) \ while((condition) && ((*eptr) & 0xc0u) == 0x80u) action /* Deposit a character into memory, returning the number of code units. */ #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ PRIV(ord2utf)(c,p) : (*p = c, 1)) /* ------------------- 16-bit support ------------------ */ #elif PCRE2_CODE_UNIT_WIDTH == 16 #define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ /* The largest UTF code point that can be encoded as a single code unit. */ #define MAX_UTF_SINGLE_CU 65535 /* Tests whether the code point needs extra characters to decode. */ #define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u) /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. Otherwise it has an undefined behaviour. */ #define GET_EXTRALEN(c) 1 /* Returns TRUE, if the given value is not the first code unit of a UTF sequence. */ #define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u) /* Base macro to pick up the low surrogate of a UTF-16 character, not advancing the pointer. */ #define GETUTF16(c, eptr) \ { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; } /* Get the next UTF-16 character, not advancing the pointer. This is called when we know we are in UTF-16 mode. */ #define GETCHAR(c, eptr) \ c = *eptr; \ if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); /* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the pointer. */ #define GETCHARTEST(c, eptr) \ c = *eptr; \ if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); /* Base macro to pick up the low surrogate of a UTF-16 character, advancing the pointer. */ #define GETUTF16INC(c, eptr) \ { c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; } /* Get the next UTF-16 character, advancing the pointer. This is called when we know we are in UTF-16 mode. */ #define GETCHARINC(c, eptr) \ c = *eptr++; \ if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); /* Get the next character, testing for UTF-16 mode, and advancing the pointer. This is called when we don't know if we are in UTF-16 mode. */ #define GETCHARINCTEST(c, eptr) \ c = *eptr++; \ if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); /* Base macro to pick up the low surrogate of a UTF-16 character, not advancing the pointer, incrementing the length. */ #define GETUTF16LEN(c, eptr, len) \ { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; } /* Get the next UTF-16 character, not advancing the pointer, incrementing length if there is a low surrogate. This is called when we know we are in UTF-16 mode. */ #define GETCHARLEN(c, eptr, len) \ c = *eptr; \ if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); /* Get the next UTF-816character, testing for UTF-16 mode, not advancing the pointer, incrementing length if there is a low surrogate. This is called when we do not know if we are in UTF-16 mode. */ #define GETCHARLENTEST(c, eptr, len) \ c = *eptr; \ if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); /* If the pointer is not at the start of a character, move it back until it is. This is called only in UTF-16 mode - we don't put a test within the macro because almost all calls are already within a block of UTF-16 only code. */ #define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr-- /* Same as above, just in the other direction. */ #define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++ #define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++ /* Same as above, but it allows a fully customizable form. */ #define ACROSSCHAR(condition, eptr, action) \ if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action /* Deposit a character into memory, returning the number of code units. */ #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ PRIV(ord2utf)(c,p) : (*p = c, 1)) /* ------------------- 32-bit support ------------------ */ #else /* These are trivial for the 32-bit library, since all UTF-32 characters fit into one PCRE2_UCHAR unit. */ #define MAX_UTF_SINGLE_CU (0x10ffffu) #define HAS_EXTRALEN(c) (0) #define GET_EXTRALEN(c) (0) #define NOT_FIRSTCU(c) (0) /* Get the next UTF-32 character, not advancing the pointer. This is called when we know we are in UTF-32 mode. */ #define GETCHAR(c, eptr) \ c = *(eptr); /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the pointer. */ #define GETCHARTEST(c, eptr) \ c = *(eptr); /* Get the next UTF-32 character, advancing the pointer. This is called when we know we are in UTF-32 mode. */ #define GETCHARINC(c, eptr) \ c = *((eptr)++); /* Get the next character, testing for UTF-32 mode, and advancing the pointer. This is called when we don't know if we are in UTF-32 mode. */ #define GETCHARINCTEST(c, eptr) \ c = *((eptr)++); /* Get the next UTF-32 character, not advancing the pointer, not incrementing length (since all UTF-32 is of length 1). This is called when we know we are in UTF-32 mode. */ #define GETCHARLEN(c, eptr, len) \ GETCHAR(c, eptr) /* Get the next UTF-32character, testing for UTF-32 mode, not advancing the pointer, not incrementing the length (since all UTF-32 is of length 1). This is called when we do not know if we are in UTF-32 mode. */ #define GETCHARLENTEST(c, eptr, len) \ GETCHARTEST(c, eptr) /* If the pointer is not at the start of a character, move it back until it is. This is called only in UTF-32 mode - we don't put a test within the macro because almost all calls are already within a block of UTF-32 only code. These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */ #define BACKCHAR(eptr) do { } while (0) /* Same as above, just in the other direction. */ #define FORWARDCHAR(eptr) do { } while (0) #define FORWARDCHARTEST(eptr,end) do { } while (0) /* Same as above, but it allows a fully customizable form. */ #define ACROSSCHAR(condition, eptr, action) do { } while (0) /* Deposit a character into memory, returning the number of code units. */ #define PUTCHAR(c, p) (*p = c, 1) #endif /* UTF-32 character handling */ #endif /* SUPPORT_UNICODE */ /* Mode-dependent macros that have the same definition in all modes. */ #define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8))) #define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8))) #define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE #define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE /* ----------------------- HIDDEN STRUCTURES ----------------------------- */ /* NOTE: All these structures *must* start with a pcre2_memctl structure. The code that uses them is simpler because it assumes this. */ /* The real general context structure. At present it holds only data for custom memory control. */ typedef struct pcre2_real_general_context { pcre2_memctl memctl; } pcre2_real_general_context; /* The real compile context structure */ typedef struct pcre2_real_compile_context { pcre2_memctl memctl; int (*stack_guard)(uint32_t, void *); void *stack_guard_data; const uint8_t *tables; PCRE2_SIZE max_pattern_length; uint16_t bsr_convention; uint16_t newline_convention; uint32_t parens_nest_limit; uint32_t extra_options; } pcre2_real_compile_context; /* The real match context structure. */ typedef struct pcre2_real_match_context { pcre2_memctl memctl; #ifdef SUPPORT_JIT pcre2_jit_callback jit_callback; void *jit_callback_data; #endif int (*callout)(pcre2_callout_block *, void *); void *callout_data; int (*substitute_callout)(pcre2_substitute_callout_block *, void *); void *substitute_callout_data; PCRE2_SIZE offset_limit; uint32_t heap_limit; uint32_t match_limit; uint32_t depth_limit; } pcre2_real_match_context; /* The real convert context structure. */ typedef struct pcre2_real_convert_context { pcre2_memctl memctl; uint32_t glob_separator; uint32_t glob_escape; } pcre2_real_convert_context; /* The real compiled code structure. The type for the blocksize field is defined specially because it is required in pcre2_serialize_decode() when copying the size from possibly unaligned memory into a variable of the same type. Use a macro rather than a typedef to avoid compiler warnings when this file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field here.) */ #undef CODE_BLOCKSIZE_TYPE #define CODE_BLOCKSIZE_TYPE size_t #undef LOOKBEHIND_MAX #define LOOKBEHIND_MAX UINT16_MAX typedef struct pcre2_real_code { pcre2_memctl memctl; /* Memory control fields */ const uint8_t *tables; /* The character tables */ void *executable_jit; /* Pointer to JIT code */ uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */ CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */ uint32_t magic_number; /* Paranoid and endianness check */ uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t overall_options; /* Options after processing the pattern */ uint32_t extra_options; /* Taken from compile_context */ uint32_t flags; /* Various state flags */ uint32_t limit_heap; /* Limit set in the pattern */ uint32_t limit_match; /* Limit set in the pattern */ uint32_t limit_depth; /* Limit set in the pattern */ uint32_t first_codeunit; /* Starting code unit */ uint32_t last_codeunit; /* This codeunit must be seen */ uint16_t bsr_convention; /* What \R matches */ uint16_t newline_convention; /* What is a newline? */ uint16_t max_lookbehind; /* Longest lookbehind (characters) */ uint16_t minlength; /* Minimum length of match */ uint16_t top_bracket; /* Highest numbered group */ uint16_t top_backref; /* Highest numbered back reference */ uint16_t name_entry_size; /* Size (code units) of table entries */ uint16_t name_count; /* Number of name entries in the table */ } pcre2_real_code; /* The real match data structure. Define ovector as large as it can ever actually be so that array bound checkers don't grumble. Memory for this structure is obtained by calling pcre2_match_data_create(), which sets the size as the offset of ovector plus a pair of elements for each capturable string, so the size varies from call to call. As the maximum number of capturing subpatterns is 65535 we must allow for 65536 strings to include the overall match. (See also the heapframe structure below.) */ typedef struct pcre2_real_match_data { pcre2_memctl memctl; const pcre2_real_code *code; /* The pattern used for the match */ PCRE2_SPTR subject; /* The subject that was matched */ PCRE2_SPTR mark; /* Pointer to last mark */ PCRE2_SIZE leftchar; /* Offset to leftmost code unit */ PCRE2_SIZE rightchar; /* Offset to rightmost code unit */ PCRE2_SIZE startchar; /* Offset to starting code unit */ uint8_t matchedby; /* Type of match (normal, JIT, DFA) */ uint8_t flags; /* Various flags */ uint16_t oveccount; /* Number of pairs */ int rc; /* The return code from the match */ PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ } pcre2_real_match_data; /* ----------------------- PRIVATE STRUCTURES ----------------------------- */ /* These structures are not needed for pcre2test. */ #ifndef PCRE2_PCRE2TEST /* Structures for checking for mutual recursion when scanning compiled or parsed code. */ typedef struct recurse_check { struct recurse_check *prev; PCRE2_SPTR group; } recurse_check; typedef struct parsed_recurse_check { struct parsed_recurse_check *prev; uint32_t *groupptr; } parsed_recurse_check; /* Structure for building a cache when filling in recursion offsets. */ typedef struct recurse_cache { PCRE2_SPTR group; int groupnumber; } recurse_cache; /* Structure for maintaining a chain of pointers to the currently incomplete branches, for testing for left recursion while compiling. */ typedef struct branch_chain { struct branch_chain *outer; PCRE2_UCHAR *current_branch; } branch_chain; /* Structure for building a list of named groups during the first pass of compiling. */ typedef struct named_group { PCRE2_SPTR name; /* Points to the name in the pattern */ uint32_t number; /* Group number */ uint16_t length; /* Length of the name */ uint16_t isdup; /* TRUE if a duplicate */ } named_group; /* Structure for passing "static" information around between the functions doing the compiling, so that they are thread-safe. */ typedef struct compile_block { pcre2_real_compile_context *cx; /* Points to the compile context */ const uint8_t *lcc; /* Points to lower casing table */ const uint8_t *fcc; /* Points to case-flipping table */ const uint8_t *cbits; /* Points to character type table */ const uint8_t *ctypes; /* Points to table of type maps */ PCRE2_SPTR start_workspace; /* The start of working space */ PCRE2_SPTR start_code; /* The start of the compiled code */ PCRE2_SPTR start_pattern; /* The start of the pattern */ PCRE2_SPTR end_pattern; /* The end of the pattern */ PCRE2_UCHAR *name_table; /* The name/number table */ PCRE2_SIZE workspace_size; /* Size of workspace */ PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */ PCRE2_SIZE erroroffset; /* Offset of error in pattern */ uint16_t names_found; /* Number of entries so far */ uint16_t name_entry_size; /* Size of each entry */ uint16_t parens_depth; /* Depth of nested parentheses */ uint16_t assert_depth; /* Depth of nested assertions */ open_capitem *open_caps; /* Chain of open capture items */ named_group *named_groups; /* Points to vector in pre-compile */ uint32_t named_group_list_size; /* Number of entries in the list */ uint32_t external_options; /* External (initial) options */ uint32_t external_flags; /* External flag bits to be set */ uint32_t bracount; /* Count of capturing parentheses */ uint32_t lastcapture; /* Last capture encountered */ uint32_t *parsed_pattern; /* Parsed pattern buffer */ uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */ uint32_t *groupinfo; /* Group info vector */ uint32_t top_backref; /* Maximum back reference */ uint32_t backref_map; /* Bitmap of low back refs */ uint32_t nltype; /* Newline type */ uint32_t nllen; /* Newline string length */ uint32_t class_range_start; /* Overall class range start */ uint32_t class_range_end; /* Overall class range end */ PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ int max_lookbehind; /* Maximum lookbehind (characters) */ int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL had_recurse; /* Had a recursion or subroutine call */ BOOL dupnames; /* Duplicate names exist */ } compile_block; /* Structure for keeping the properties of the in-memory stack used by the JIT matcher. */ typedef struct pcre2_real_jit_stack { pcre2_memctl memctl; void* stack; } pcre2_real_jit_stack; /* Structure for items in a linked list that represents an explicit recursive call within the pattern when running pcre_dfa_match(). */ typedef struct dfa_recursion_info { struct dfa_recursion_info *prevrec; PCRE2_SPTR subject_position; uint32_t group_num; } dfa_recursion_info; /* Structure for "stack" frames that are used for remembering backtracking positions during matching. As these are used in a vector, with the ovector item being extended, the size of the structure must be a multiple of PCRE2_SIZE. The only way to check this at compile time is to force an error by generating an array with a negative size. By putting this in a typedef (which is never used), we don't generate any code when all is well. */ typedef struct heapframe { /* The first set of fields are variables that have to be preserved over calls to RRMATCH(), but which do not need to be copied to new frames. */ PCRE2_SPTR ecode; /* The current position in the pattern */ PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */ PCRE2_SIZE length; /* Used for character, string, or code lengths */ PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */ uint32_t rdepth; /* "Recursion" depth */ uint32_t group_frame_type; /* Type information for group frames */ uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */ uint8_t return_id; /* Where to go on in internal "return" */ uint8_t op; /* Processing opcode */ /* At this point, the structure is 16-bit aligned. On most architectures the alignment requirement for a pointer will ensure that the eptr field below is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer that is 16-bit aligned. We must therefore ensure that what comes between here and eptr is an odd multiple of 16 bits so as to get back into 32-bit alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs fudges in the other cases. In the 32-bit case the padding comes first so that the occu field itself is 32-bit aligned. Without the padding, this structure is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */ #if PCRE2_CODE_UNIT_WIDTH == 8 PCRE2_UCHAR occu[6]; /* Used for other case code units */ #elif PCRE2_CODE_UNIT_WIDTH == 16 PCRE2_UCHAR occu[2]; /* Used for other case code units */ uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ #else uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ PCRE2_UCHAR occu[1]; /* Used for other case code units */ #endif /* The rest have to be copied from the previous frame whenever a new frame becomes current. The final field is specified as a large vector so that runtime array bound checks don't catch references to it. However, for any specific call to pcre2_match() the memory allocated for each frame structure allows for exactly the right size ovector for the number of capturing parentheses. (See also the comment for pcre2_real_match_data above.) */ PCRE2_SPTR eptr; /* MUST BE FIRST */ PCRE2_SPTR start_match; /* Can be adjusted by \K */ PCRE2_SPTR mark; /* Most recent mark on the success path */ uint32_t current_recurse; /* Current (deepest) recursion number */ uint32_t capture_last; /* Most recent capture */ PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */ PCRE2_SIZE offset_top; /* Offset after highest capture */ PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ } heapframe; /* This typedef is a check that the size of the heapframe structure is a multiple of PCRE2_SIZE. See various comments above. */ typedef char check_heapframe_size[ ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; /* Structure for passing "static" information around between the functions doing traditional NFA matching (pcre2_match() and friends). */ typedef struct match_block { pcre2_memctl memctl; /* For general use */ PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */ heapframe *match_frames; /* Points to vector of frames */ heapframe *match_frames_top; /* Points after the end of the vector */ heapframe *stack_frames; /* The original vector on the stack */ PCRE2_SIZE heap_limit; /* As it says */ uint32_t match_limit; /* As it says */ uint32_t match_limit_depth; /* As it says */ uint32_t match_call_count; /* Number of times a new frame is created */ BOOL hitend; /* Hit the end of the subject at some point */ BOOL hasthen; /* Pattern contains (*THEN) */ BOOL allowemptypartial; /* Allow empty hard partial */ const uint8_t *lcc; /* Points to lower casing table */ const uint8_t *fcc; /* Points to case-flipping table */ const uint8_t *ctypes; /* Points to table of type maps */ PCRE2_SIZE start_offset; /* The start offset value */ PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */ uint16_t partial; /* PARTIAL options */ uint16_t bsr_convention; /* \R interpretation */ uint16_t name_count; /* Number of names in name table */ uint16_t name_entry_size; /* Size of entry in names table */ PCRE2_SPTR name_table; /* Table of group names */ PCRE2_SPTR start_code; /* For use when recursing */ PCRE2_SPTR start_subject; /* Start of the subject string */ PCRE2_SPTR check_subject; /* Where UTF-checked from */ PCRE2_SPTR end_subject; /* End of the subject string */ PCRE2_SPTR end_match_ptr; /* Subject position at end match */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ PCRE2_SPTR last_used_ptr; /* Latest consulted character */ PCRE2_SPTR mark; /* Mark pointer to pass back on success */ PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */ PCRE2_SPTR verb_ecode_ptr; /* For passing back info */ PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */ uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */ uint32_t moptions; /* Match options */ uint32_t poptions; /* Pattern options */ uint32_t skip_arg_count; /* For counting SKIP_ARGs */ uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */ uint32_t nltype; /* Newline type */ uint32_t nllen; /* Newline string length */ PCRE2_UCHAR nl[4]; /* Newline string when fixed */ pcre2_callout_block *cb; /* Points to a callout block */ void *callout_data; /* To pass back to callouts */ int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ } match_block; /* A similar structure is used for the same purpose by the DFA matching functions. */ typedef struct dfa_match_block { pcre2_memctl memctl; /* For general use */ PCRE2_SPTR start_code; /* Start of the compiled pattern */ PCRE2_SPTR start_subject ; /* Start of the subject string */ PCRE2_SPTR end_subject; /* End of subject string */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ PCRE2_SPTR last_used_ptr; /* Latest consulted character */ const uint8_t *tables; /* Character tables */ PCRE2_SIZE start_offset; /* The start offset value */ PCRE2_SIZE heap_limit; /* As it says */ PCRE2_SIZE heap_used; /* As it says */ uint32_t match_limit; /* As it says */ uint32_t match_limit_depth; /* As it says */ uint32_t match_call_count; /* Number of calls of internal function */ uint32_t moptions; /* Match options */ uint32_t poptions; /* Pattern options */ uint32_t nltype; /* Newline type */ uint32_t nllen; /* Newline string length */ BOOL allowemptypartial; /* Allow empty hard partial */ PCRE2_UCHAR nl[4]; /* Newline string when fixed */ uint16_t bsr_convention; /* \R interpretation */ pcre2_callout_block *cb; /* Points to a callout block */ void *callout_data; /* To pass back to callouts */ int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ dfa_recursion_info *recursive; /* Linked list of recursion data */ } dfa_match_block; #endif /* PCRE2_PCRE2TEST */ /* End of pcre2_intmodedep.h */ pcre2_jit_neon_inc.h 0000644 00000020234 15004461075 0010451 0 ustar 00 /************************************************* * Perl-Compatible Regular Expressions * *************************************************/ /* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel This module by Zoltan Herczeg and Sebastian Pop Original API code Copyright (c) 1997-2012 University of Cambridge New API code Copyright (c) 2016-2019 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ # if defined(FFCS) # if defined(FF_UTF) # define FF_FUN ffcs_utf # else # define FF_FUN ffcs # endif # elif defined(FFCS_2) # if defined(FF_UTF) # define FF_FUN ffcs_2_utf # else # define FF_FUN ffcs_2 # endif # elif defined(FFCS_MASK) # if defined(FF_UTF) # define FF_FUN ffcs_mask_utf # else # define FF_FUN ffcs_mask # endif # elif defined(FFCPS_0) # if defined (FF_UTF) # define FF_FUN ffcps_0_utf # else # define FF_FUN ffcps_0 # endif # elif defined (FFCPS_1) # if defined (FF_UTF) # define FF_FUN ffcps_1_utf # else # define FF_FUN ffcps_1 # endif # elif defined (FFCPS_DEFAULT) # if defined (FF_UTF) # define FF_FUN ffcps_default_utf # else # define FF_FUN ffcps_default # endif # endif static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 *str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars) #undef FF_FUN { quad_word qw; int_char ic; ic.x = chars; #if defined(FFCS) sljit_u8 c1 = ic.c.c1; vect_t vc1 = VDUPQ(c1); #elif defined(FFCS_2) sljit_u8 c1 = ic.c.c1; vect_t vc1 = VDUPQ(c1); sljit_u8 c2 = ic.c.c2; vect_t vc2 = VDUPQ(c2); #elif defined(FFCS_MASK) sljit_u8 c1 = ic.c.c1; vect_t vc1 = VDUPQ(c1); sljit_u8 mask = ic.c.c2; vect_t vmask = VDUPQ(mask); #endif #if defined(FFCPS) compare_type compare1_type = compare_match1; compare_type compare2_type = compare_match1; vect_t cmp1a, cmp1b, cmp2a, cmp2b; const sljit_u32 diff = IN_UCHARS(offs1 - offs2); PCRE2_UCHAR char1a = ic.c.c1; PCRE2_UCHAR char2a = ic.c.c3; # ifdef FFCPS_CHAR1A2A cmp1a = VDUPQ(char1a); cmp2a = VDUPQ(char2a); cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ # else PCRE2_UCHAR char1b = ic.c.c2; PCRE2_UCHAR char2b = ic.c.c4; if (char1a == char1b) { cmp1a = VDUPQ(char1a); cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ } else { sljit_u32 bit1 = char1a ^ char1b; if (is_powerof2(bit1)) { compare1_type = compare_match1i; cmp1a = VDUPQ(char1a | bit1); cmp1b = VDUPQ(bit1); } else { compare1_type = compare_match2; cmp1a = VDUPQ(char1a); cmp1b = VDUPQ(char1b); } } if (char2a == char2b) { cmp2a = VDUPQ(char2a); cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ } else { sljit_u32 bit2 = char2a ^ char2b; if (is_powerof2(bit2)) { compare2_type = compare_match1i; cmp2a = VDUPQ(char2a | bit2); cmp2b = VDUPQ(bit2); } else { compare2_type = compare_match2; cmp2a = VDUPQ(char2a); cmp2b = VDUPQ(char2b); } } # endif str_ptr += IN_UCHARS(offs1); #endif #if PCRE2_CODE_UNIT_WIDTH != 8 vect_t char_mask = VDUPQ(0xff); #endif #if defined(FF_UTF) restart:; #endif #if defined(FFCPS) sljit_u8 *p1 = str_ptr - diff; #endif sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf); str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf); vect_t data = VLD1Q(str_ptr); #if PCRE2_CODE_UNIT_WIDTH != 8 data = VANDQ(data, char_mask); #endif #if defined(FFCS) vect_t eq = VCEQQ(data, vc1); #elif defined(FFCS_2) vect_t eq1 = VCEQQ(data, vc1); vect_t eq2 = VCEQQ(data, vc2); vect_t eq = VORRQ(eq1, eq2); #elif defined(FFCS_MASK) vect_t eq = VORRQ(data, vmask); eq = VCEQQ(eq, vc1); #elif defined(FFCPS) # if defined(FFCPS_DIFF1) vect_t prev_data = data; # endif vect_t data2; if (p1 < str_ptr) { data2 = VLD1Q(str_ptr - diff); #if PCRE2_CODE_UNIT_WIDTH != 8 data2 = VANDQ(data2, char_mask); #endif } else data2 = shift_left_n_lanes(data, offs1 - offs2); if (compare1_type == compare_match1) data = VCEQQ(data, cmp1a); else data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b); if (compare2_type == compare_match1) data2 = VCEQQ(data2, cmp2a); else data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b); vect_t eq = VANDQ(data, data2); #endif VST1Q(qw.mem, eq); /* Ignore matches before the first STR_PTR. */ if (align_offset < 8) { qw.dw[0] >>= align_offset * 8; if (qw.dw[0]) { str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8; goto match; } if (qw.dw[1]) { str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8; goto match; } } else { qw.dw[1] >>= (align_offset - 8) * 8; if (qw.dw[1]) { str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8; goto match; } } str_ptr += 16; while (str_ptr < str_end) { vect_t orig_data = VLD1Q(str_ptr); #if PCRE2_CODE_UNIT_WIDTH != 8 orig_data = VANDQ(orig_data, char_mask); #endif data = orig_data; #if defined(FFCS) eq = VCEQQ(data, vc1); #elif defined(FFCS_2) eq1 = VCEQQ(data, vc1); eq2 = VCEQQ(data, vc2); eq = VORRQ(eq1, eq2); #elif defined(FFCS_MASK) eq = VORRQ(data, vmask); eq = VCEQQ(eq, vc1); #endif #if defined(FFCPS) # if defined (FFCPS_DIFF1) data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1); # else data2 = VLD1Q(str_ptr - diff); # if PCRE2_CODE_UNIT_WIDTH != 8 data2 = VANDQ(data2, char_mask); # endif # endif # ifdef FFCPS_CHAR1A2A data = VCEQQ(data, cmp1a); data2 = VCEQQ(data2, cmp2a); # else if (compare1_type == compare_match1) data = VCEQQ(data, cmp1a); else data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b); if (compare2_type == compare_match1) data2 = VCEQQ(data2, cmp2a); else data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b); # endif eq = VANDQ(data, data2); #endif VST1Q(qw.mem, eq); if (qw.dw[0]) str_ptr += __builtin_ctzll(qw.dw[0]) / 8; else if (qw.dw[1]) str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8; else { str_ptr += 16; #if defined (FFCPS_DIFF1) prev_data = orig_data; #endif continue; } match:; if (str_ptr >= str_end) /* Failed match. */ return NULL; #if defined(FF_UTF) if (utf_continue(str_ptr + IN_UCHARS(-offs1))) { /* Not a match. */ str_ptr += IN_UCHARS(1); goto restart; } #endif /* Match. */ #if defined (FFCPS) str_ptr -= IN_UCHARS(offs1); #endif return str_ptr; } /* Failed match. */ return NULL; } pcre2_jit_simd_inc.h 0000644 00000102062 15004461075 0010446 0 ustar 00 /************************************************* * Perl-Compatible Regular Expressions * *************************************************/ /* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge New API code Copyright (c) 2016-2019 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) { #if PCRE2_CODE_UNIT_WIDTH == 8 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); #elif PCRE2_CODE_UNIT_WIDTH == 16 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); #else #error "Unknown code width" #endif } #endif static sljit_s32 character_to_int32(PCRE2_UCHAR chr) { sljit_u32 value = chr; #if PCRE2_CODE_UNIT_WIDTH == 8 #define SSE2_COMPARE_TYPE_INDEX 0 return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); #elif PCRE2_CODE_UNIT_WIDTH == 16 #define SSE2_COMPARE_TYPE_INDEX 1 return (sljit_s32)((value << 16) | value); #elif PCRE2_CODE_UNIT_WIDTH == 32 #define SSE2_COMPARE_TYPE_INDEX 2 return (sljit_s32)(value); #else #error "Unsupported unit width" #endif } static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset) { sljit_u8 instruction[5]; SLJIT_ASSERT(dst_xmm_reg < 8); SLJIT_ASSERT(src_general_reg < 8); /* MOVDQA xmm1, xmm2/m128 */ instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3; instruction[1] = 0x0f; instruction[2] = 0x6f; if (offset == 0) { instruction[3] = (dst_xmm_reg << 3) | src_general_reg; sljit_emit_op_custom(compiler, instruction, 4); return; } instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg; instruction[4] = (sljit_u8)offset; sljit_emit_op_custom(compiler, instruction, 5); } typedef enum { sse2_compare_match1, sse2_compare_match1i, sse2_compare_match2, } sse2_compare_type; static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) { sljit_u8 instruction[4]; instruction[0] = 0x66; instruction[1] = 0x0f; SLJIT_ASSERT(step >= 0 && step <= 3); if (compare_type != sse2_compare_match2) { if (step == 0) { if (compare_type == sse2_compare_match1i) { /* POR xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xeb; instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; sljit_emit_op_custom(compiler, instruction, 4); } return; } if (step != 2) return; /* PCMPEQB/W/D xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; sljit_emit_op_custom(compiler, instruction, 4); return; } switch (step) { case 0: /* MOVDQA xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x6f; instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; sljit_emit_op_custom(compiler, instruction, 4); return; case 1: /* PCMPEQB/W/D xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; sljit_emit_op_custom(compiler, instruction, 4); return; case 2: /* PCMPEQB/W/D xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; sljit_emit_op_custom(compiler, instruction, 4); return; case 3: /* POR xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xeb; instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; sljit_emit_op_custom(compiler, instruction, 4); return; } } #define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { DEFINE_COMPILER; struct sljit_label *start; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_label *restart; #endif struct sljit_jump *quit; struct sljit_jump *partial_quit[2]; sse2_compare_type compare_type = sse2_compare_match1; sljit_u8 instruction[8]; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; sljit_s32 tmp_ind = 1; sljit_s32 cmp1_ind = 2; sljit_s32 cmp2_ind = 3; sljit_u32 bit = 0; int i; SLJIT_UNUSED_ARG(offset); if (char1 != char2) { bit = char1 ^ char2; compare_type = sse2_compare_match1i; if (!is_powerof2(bit)) { bit = 0; compare_type = sse2_compare_match2; } } partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); if (common->mode == PCRE2_JIT_COMPLETE) add_jump(compiler, &common->failed_match, partial_quit[0]); /* First part (unaligned start) */ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); SLJIT_ASSERT(tmp1_reg_ind < 8); /* MOVD xmm, r/m32 */ instruction[0] = 0x66; instruction[1] = 0x0f; instruction[2] = 0x6e; instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); if (char1 != char2) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); /* MOVD xmm, r/m32 */ instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); } OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); /* PSHUFD xmm1, xmm2/m128, imm8 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x70; instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; instruction[4] = 0; sljit_emit_op_custom(compiler, instruction, 5); if (char1 != char2) { /* PSHUFD xmm1, xmm2/m128, imm8 */ instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; sljit_emit_op_custom(compiler, instruction, 5); } #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 restart = LABEL(); #endif OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); /* PMOVMSKB reg, xmm */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xd7; instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; sljit_emit_op_custom(compiler, instruction, 4); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); /* Second part (aligned) */ start = LABEL(); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); if (common->mode == PCRE2_JIT_COMPLETE) add_jump(compiler, &common->failed_match, partial_quit[1]); load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); /* PMOVMSKB reg, xmm */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xd7; instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; sljit_emit_op_custom(compiler, instruction, 4); CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); JUMPHERE(quit); /* BSF r32, r/m32 */ instruction[0] = 0x0f; instruction[1] = 0xbc; instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 3); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); if (common->mode != PCRE2_JIT_COMPLETE) { JUMPHERE(partial_quit[0]); JUMPHERE(partial_quit[1]); OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } else add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) { SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); quit = jump_if_utf_char_start(compiler, TMP1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); JUMPTO(SLJIT_JUMP, restart); JUMPHERE(quit); } #endif } #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) { DEFINE_COMPILER; struct sljit_label *start; struct sljit_jump *quit; jump_list *not_found = NULL; sse2_compare_type compare_type = sse2_compare_match1; sljit_u8 instruction[8]; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; sljit_s32 tmp_ind = 1; sljit_s32 cmp1_ind = 2; sljit_s32 cmp2_ind = 3; sljit_u32 bit = 0; int i; if (char1 != char2) { bit = char1 ^ char2; compare_type = sse2_compare_match1i; if (!is_powerof2(bit)) { bit = 0; compare_type = sse2_compare_match2; } } add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); /* First part (unaligned start) */ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); SLJIT_ASSERT(tmp1_reg_ind < 8); /* MOVD xmm, r/m32 */ instruction[0] = 0x66; instruction[1] = 0x0f; instruction[2] = 0x6e; instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); if (char1 != char2) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); /* MOVD xmm, r/m32 */ instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); } OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); /* PSHUFD xmm1, xmm2/m128, imm8 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x70; instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; instruction[4] = 0; sljit_emit_op_custom(compiler, instruction, 5); if (char1 != char2) { /* PSHUFD xmm1, xmm2/m128, imm8 */ instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; sljit_emit_op_custom(compiler, instruction, 5); } OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); /* PMOVMSKB reg, xmm */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xd7; instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; sljit_emit_op_custom(compiler, instruction, 4); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); /* Second part (aligned) */ start = LABEL(); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); /* PMOVMSKB reg, xmm */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xd7; instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; sljit_emit_op_custom(compiler, instruction, 4); CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); JUMPHERE(quit); /* BSF r32, r/m32 */ instruction[0] = 0x0f; instruction[1] = 0xbc; instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 3); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0); add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); return not_found; } #ifndef _WIN64 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) { #if PCRE2_CODE_UNIT_WIDTH == 8 return 15; #elif PCRE2_CODE_UNIT_WIDTH == 16 return 7; #elif PCRE2_CODE_UNIT_WIDTH == 32 return 3; #else #error "Unsupported unit width" #endif } #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) { DEFINE_COMPILER; sse2_compare_type compare1_type = sse2_compare_match1; sse2_compare_type compare2_type = sse2_compare_match1; sljit_u32 bit1 = 0; sljit_u32 bit2 = 0; sljit_u32 diff = IN_UCHARS(offs1 - offs2); sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data1_ind = 0; sljit_s32 data2_ind = 1; sljit_s32 tmp1_ind = 2; sljit_s32 tmp2_ind = 3; sljit_s32 cmp1a_ind = 4; sljit_s32 cmp1b_ind = 5; sljit_s32 cmp2a_ind = 6; sljit_s32 cmp2b_ind = 7; struct sljit_label *start; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_label *restart; #endif struct sljit_jump *jump[2]; sljit_u8 instruction[8]; int i; SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1); /* Initialize. */ if (common->match_end_ptr != 0) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); CMOV(SLJIT_LESS, STR_END, TMP1, 0); } OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); /* MOVD xmm, r/m32 */ instruction[0] = 0x66; instruction[1] = 0x0f; instruction[2] = 0x6e; if (char1a == char1b) OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); else { bit1 = char1a ^ char1b; if (is_powerof2(bit1)) { compare1_type = sse2_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); } else { compare1_type = sse2_compare_match2; bit1 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); } } instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); if (char1a != char1b) { instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); } if (char2a == char2b) OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); else { bit2 = char2a ^ char2b; if (is_powerof2(bit2)) { compare2_type = sse2_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); } else { compare2_type = sse2_compare_match2; bit2 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); } } instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); if (char2a != char2b) { instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind; sljit_emit_op_custom(compiler, instruction, 4); } /* PSHUFD xmm1, xmm2/m128, imm8 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x70; instruction[4] = 0; instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind; sljit_emit_op_custom(compiler, instruction, 5); if (char1a != char1b) { instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind; sljit_emit_op_custom(compiler, instruction, 5); } instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind; sljit_emit_op_custom(compiler, instruction, 5); if (char2a != char2b) { instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind; sljit_emit_op_custom(compiler, instruction, 5); } #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 restart = LABEL(); #endif OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff); OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); jump[1] = JUMP(SLJIT_JUMP); JUMPHERE(jump[0]); /* MOVDQA xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x6f; instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; sljit_emit_op_custom(compiler, instruction, 4); /* PSLLDQ xmm1, imm8 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0x73; instruction[3] = 0xc0 | (7 << 3) | data2_ind; instruction[4] = diff; sljit_emit_op_custom(compiler, instruction, 5); JUMPHERE(jump[1]); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); for (i = 0; i < 4; i++) { fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); } /* PAND xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xdb; instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; sljit_emit_op_custom(compiler, instruction, 4); /* PMOVMSKB reg, xmm */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xd7; instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; sljit_emit_op_custom(compiler, instruction, 4); /* Ignore matches before the first STR_PTR. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); /* Main loop. */ start = LABEL(); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); for (i = 0; i < 4; i++) { fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind); fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); } /* PAND xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xdb; instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; sljit_emit_op_custom(compiler, instruction, 4); /* PMOVMSKB reg, xmm */ /* instruction[0] = 0x66; */ /* instruction[1] = 0x0f; */ instruction[2] = 0xd7; instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; sljit_emit_op_custom(compiler, instruction, 4); CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); JUMPHERE(jump[0]); /* BSF r32, r/m32 */ instruction[0] = 0x0f; instruction[1] = 0xbc; instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; sljit_emit_op_custom(compiler, instruction, 3); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) { OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); jump[0] = jump_if_utf_char_start(compiler, TMP1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); JUMPHERE(jump[0]); } #endif OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); } #endif /* !_WIN64 */ #undef SSE2_COMPARE_TYPE_INDEX #endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) #include <arm_neon.h> typedef union { unsigned int x; struct { unsigned char c1, c2, c3, c4; } c; } int_char; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 static SLJIT_INLINE int utf_continue(sljit_u8 *s) { #if PCRE2_CODE_UNIT_WIDTH == 8 return (*s & 0xc0) == 0x80; #elif PCRE2_CODE_UNIT_WIDTH == 16 return (*s & 0xfc00) == 0xdc00; #else #error "Unknown code width" #endif } #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ #if PCRE2_CODE_UNIT_WIDTH == 8 # define VECTOR_FACTOR 16 # define vect_t uint8x16_t # define VLD1Q(X) vld1q_u8((sljit_u8 *)(X)) # define VCEQQ vceqq_u8 # define VORRQ vorrq_u8 # define VST1Q vst1q_u8 # define VDUPQ vdupq_n_u8 # define VEXTQ vextq_u8 # define VANDQ vandq_u8 typedef union { uint8_t mem[16]; uint64_t dw[2]; } quad_word; #elif PCRE2_CODE_UNIT_WIDTH == 16 # define VECTOR_FACTOR 8 # define vect_t uint16x8_t # define VLD1Q(X) vld1q_u16((sljit_u16 *)(X)) # define VCEQQ vceqq_u16 # define VORRQ vorrq_u16 # define VST1Q vst1q_u16 # define VDUPQ vdupq_n_u16 # define VEXTQ vextq_u16 # define VANDQ vandq_u16 typedef union { uint16_t mem[8]; uint64_t dw[2]; } quad_word; #else # define VECTOR_FACTOR 4 # define vect_t uint32x4_t # define VLD1Q(X) vld1q_u32((sljit_u32 *)(X)) # define VCEQQ vceqq_u32 # define VORRQ vorrq_u32 # define VST1Q vst1q_u32 # define VDUPQ vdupq_n_u32 # define VEXTQ vextq_u32 # define VANDQ vandq_u32 typedef union { uint32_t mem[4]; uint64_t dw[2]; } quad_word; #endif #define FFCS #include "pcre2_jit_neon_inc.h" #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 # define FF_UTF # include "pcre2_jit_neon_inc.h" # undef FF_UTF #endif #undef FFCS #define FFCS_2 #include "pcre2_jit_neon_inc.h" #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 # define FF_UTF # include "pcre2_jit_neon_inc.h" # undef FF_UTF #endif #undef FFCS_2 #define FFCS_MASK #include "pcre2_jit_neon_inc.h" #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 # define FF_UTF # include "pcre2_jit_neon_inc.h" # undef FF_UTF #endif #undef FFCS_MASK #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { DEFINE_COMPILER; int_char ic; struct sljit_jump *partial_quit; /* Save temporary registers. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0); /* Prepare function arguments */ OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset); if (char1 == char2) { ic.c.c1 = char1; ic.c.c2 = char2; OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf)); else sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); #else sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); #endif } else { PCRE2_UCHAR mask = char1 ^ char2; if (is_powerof2(mask)) { ic.c.c1 = char1 | mask; ic.c.c2 = mask; OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf)); else sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); #else sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); #endif } else { ic.c.c1 = char1; ic.c.c2 = char2; OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf)); else sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); #else sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); #endif } } /* Restore registers. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); /* Check return value. */ partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); if (common->mode == PCRE2_JIT_COMPLETE) add_jump(compiler, &common->failed_match, partial_quit); /* Fast forward STR_PTR to the result of memchr. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); if (common->mode != PCRE2_JIT_COMPLETE) JUMPHERE(partial_quit); } typedef enum { compare_match1, compare_match1i, compare_match2, } compare_type; static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2) { if (ctype == compare_match2) { vect_t tmp = dst; dst = VCEQQ(dst, cmp1); tmp = VCEQQ(tmp, cmp2); dst = VORRQ(dst, tmp); return dst; } if (ctype == compare_match1i) dst = VORRQ(dst, cmp2); dst = VCEQQ(dst, cmp1); return dst; } static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) { #if PCRE2_CODE_UNIT_WIDTH == 8 return 15; #elif PCRE2_CODE_UNIT_WIDTH == 16 return 7; #elif PCRE2_CODE_UNIT_WIDTH == 32 return 3; #else #error "Unsupported unit width" #endif } /* ARM doesn't have a shift left across lanes. */ static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n) { vect_t zero = VDUPQ(0); SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR); /* VEXTQ takes an immediate as last argument. */ #define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X); switch (n) { C(1); C(2); C(3); #if PCRE2_CODE_UNIT_WIDTH != 32 C(4); C(5); C(6); C(7); # if PCRE2_CODE_UNIT_WIDTH != 16 C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15); # endif #endif default: /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't happen. The return is still here for compilers to not warn. */ return a; } } #define FFCPS #define FFCPS_DIFF1 #define FFCPS_CHAR1A2A #define FFCPS_0 #include "pcre2_jit_neon_inc.h" #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 # define FF_UTF # include "pcre2_jit_neon_inc.h" # undef FF_UTF #endif #undef FFCPS_0 #undef FFCPS_CHAR1A2A #define FFCPS_1 #include "pcre2_jit_neon_inc.h" #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 # define FF_UTF # include "pcre2_jit_neon_inc.h" # undef FF_UTF #endif #undef FFCPS_1 #undef FFCPS_DIFF1 #define FFCPS_DEFAULT #include "pcre2_jit_neon_inc.h" #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 # define FF_UTF # include "pcre2_jit_neon_inc.h" # undef FF_UTF #endif #undef FFCPS #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) { DEFINE_COMPILER; sljit_u32 diff = IN_UCHARS(offs1 - offs2); struct sljit_jump *partial_quit; int_char ic; SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); SLJIT_ASSERT(compiler->scratches == 5); /* Save temporary register STR_PTR. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); /* Prepare arguments for the function call. */ if (common->match_end_ptr == 0) OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); else { OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0); CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0); } OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1); OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2); ic.c.c1 = char1a; ic.c.c2 = char1b; ic.c.c3 = char2a; ic.c.c4 = char2b; OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x); if (diff == 1) { if (char1a == char1b && char2a == char2b) { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf)); else #endif sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0)); } else { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf)); else #endif sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1)); } } else { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf)); else #endif sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default)); } /* Restore STR_PTR register. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); /* Check return value. */ partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); add_jump(compiler, &common->failed_match, partial_quit); /* Fast forward STR_PTR to the result of memchr. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); JUMPHERE(partial_quit); } #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ pcre2_ucp.h 0000644 00000017545 15004461075 0006615 0 ustar 00 /************************************************* * Perl-Compatible Regular Expressions * *************************************************/ /* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD #define PCRE2_UCP_H_IDEMPOTENT_GUARD /* This file contains definitions of the property values that are returned by the UCD access macros. New values that are added for new releases of Unicode should always be at the end of each enum, for backwards compatibility. IMPORTANT: Note also that the specific numeric values of the enums have to be the same as the values that are generated by the maint/MultiStage2.py script, where the equivalent property descriptive names are listed in vectors. ALSO: The specific values of the first two enums are assumed for the table called catposstab in pcre2_compile.c. */ /* These are the general character categories. */ enum { ucp_C, /* Other */ ucp_L, /* Letter */ ucp_M, /* Mark */ ucp_N, /* Number */ ucp_P, /* Punctuation */ ucp_S, /* Symbol */ ucp_Z /* Separator */ }; /* These are the particular character categories. */ enum { ucp_Cc, /* Control */ ucp_Cf, /* Format */ ucp_Cn, /* Unassigned */ ucp_Co, /* Private use */ ucp_Cs, /* Surrogate */ ucp_Ll, /* Lower case letter */ ucp_Lm, /* Modifier letter */ ucp_Lo, /* Other letter */ ucp_Lt, /* Title case letter */ ucp_Lu, /* Upper case letter */ ucp_Mc, /* Spacing mark */ ucp_Me, /* Enclosing mark */ ucp_Mn, /* Non-spacing mark */ ucp_Nd, /* Decimal number */ ucp_Nl, /* Letter number */ ucp_No, /* Other number */ ucp_Pc, /* Connector punctuation */ ucp_Pd, /* Dash punctuation */ ucp_Pe, /* Close punctuation */ ucp_Pf, /* Final punctuation */ ucp_Pi, /* Initial punctuation */ ucp_Po, /* Other punctuation */ ucp_Ps, /* Open punctuation */ ucp_Sc, /* Currency symbol */ ucp_Sk, /* Modifier symbol */ ucp_Sm, /* Mathematical symbol */ ucp_So, /* Other symbol */ ucp_Zl, /* Line separator */ ucp_Zp, /* Paragraph separator */ ucp_Zs /* Space separator */ }; /* These are grapheme break properties. The Extended Pictographic property comes from the emoji-data.txt file. */ enum { ucp_gbCR, /* 0 */ ucp_gbLF, /* 1 */ ucp_gbControl, /* 2 */ ucp_gbExtend, /* 3 */ ucp_gbPrepend, /* 4 */ ucp_gbSpacingMark, /* 5 */ ucp_gbL, /* 6 Hangul syllable type L */ ucp_gbV, /* 7 Hangul syllable type V */ ucp_gbT, /* 8 Hangul syllable type T */ ucp_gbLV, /* 9 Hangul syllable type LV */ ucp_gbLVT, /* 10 Hangul syllable type LVT */ ucp_gbRegionalIndicator, /* 11 */ ucp_gbOther, /* 12 */ ucp_gbZWJ, /* 13 */ ucp_gbExtended_Pictographic /* 14 */ }; /* These are the script identifications. */ enum { ucp_Unknown, ucp_Arabic, ucp_Armenian, ucp_Bengali, ucp_Bopomofo, ucp_Braille, ucp_Buginese, ucp_Buhid, ucp_Canadian_Aboriginal, ucp_Cherokee, ucp_Common, ucp_Coptic, ucp_Cypriot, ucp_Cyrillic, ucp_Deseret, ucp_Devanagari, ucp_Ethiopic, ucp_Georgian, ucp_Glagolitic, ucp_Gothic, ucp_Greek, ucp_Gujarati, ucp_Gurmukhi, ucp_Han, ucp_Hangul, ucp_Hanunoo, ucp_Hebrew, ucp_Hiragana, ucp_Inherited, ucp_Kannada, ucp_Katakana, ucp_Kharoshthi, ucp_Khmer, ucp_Lao, ucp_Latin, ucp_Limbu, ucp_Linear_B, ucp_Malayalam, ucp_Mongolian, ucp_Myanmar, ucp_New_Tai_Lue, ucp_Ogham, ucp_Old_Italic, ucp_Old_Persian, ucp_Oriya, ucp_Osmanya, ucp_Runic, ucp_Shavian, ucp_Sinhala, ucp_Syloti_Nagri, ucp_Syriac, ucp_Tagalog, ucp_Tagbanwa, ucp_Tai_Le, ucp_Tamil, ucp_Telugu, ucp_Thaana, ucp_Thai, ucp_Tibetan, ucp_Tifinagh, ucp_Ugaritic, ucp_Yi, /* New for Unicode 5.0 */ ucp_Balinese, ucp_Cuneiform, ucp_Nko, ucp_Phags_Pa, ucp_Phoenician, /* New for Unicode 5.1 */ ucp_Carian, ucp_Cham, ucp_Kayah_Li, ucp_Lepcha, ucp_Lycian, ucp_Lydian, ucp_Ol_Chiki, ucp_Rejang, ucp_Saurashtra, ucp_Sundanese, ucp_Vai, /* New for Unicode 5.2 */ ucp_Avestan, ucp_Bamum, ucp_Egyptian_Hieroglyphs, ucp_Imperial_Aramaic, ucp_Inscriptional_Pahlavi, ucp_Inscriptional_Parthian, ucp_Javanese, ucp_Kaithi, ucp_Lisu, ucp_Meetei_Mayek, ucp_Old_South_Arabian, ucp_Old_Turkic, ucp_Samaritan, ucp_Tai_Tham, ucp_Tai_Viet, /* New for Unicode 6.0.0 */ ucp_Batak, ucp_Brahmi, ucp_Mandaic, /* New for Unicode 6.1.0 */ ucp_Chakma, ucp_Meroitic_Cursive, ucp_Meroitic_Hieroglyphs, ucp_Miao, ucp_Sharada, ucp_Sora_Sompeng, ucp_Takri, /* New for Unicode 7.0.0 */ ucp_Bassa_Vah, ucp_Caucasian_Albanian, ucp_Duployan, ucp_Elbasan, ucp_Grantha, ucp_Khojki, ucp_Khudawadi, ucp_Linear_A, ucp_Mahajani, ucp_Manichaean, ucp_Mende_Kikakui, ucp_Modi, ucp_Mro, ucp_Nabataean, ucp_Old_North_Arabian, ucp_Old_Permic, ucp_Pahawh_Hmong, ucp_Palmyrene, ucp_Psalter_Pahlavi, ucp_Pau_Cin_Hau, ucp_Siddham, ucp_Tirhuta, ucp_Warang_Citi, /* New for Unicode 8.0.0 */ ucp_Ahom, ucp_Anatolian_Hieroglyphs, ucp_Hatran, ucp_Multani, ucp_Old_Hungarian, ucp_SignWriting, /* New for Unicode 10.0.0 (no update since 8.0.0) */ ucp_Adlam, ucp_Bhaiksuki, ucp_Marchen, ucp_Newa, ucp_Osage, ucp_Tangut, ucp_Masaram_Gondi, ucp_Nushu, ucp_Soyombo, ucp_Zanabazar_Square, /* New for Unicode 11.0.0 */ ucp_Dogra, ucp_Gunjala_Gondi, ucp_Hanifi_Rohingya, ucp_Makasar, ucp_Medefaidrin, ucp_Old_Sogdian, ucp_Sogdian, /* New for Unicode 12.0.0 */ ucp_Elymaic, ucp_Nandinagari, ucp_Nyiakeng_Puachue_Hmong, ucp_Wancho, /* New for Unicode 13.0.0 */ ucp_Chorasmian, ucp_Dives_Akuru, ucp_Khitan_Small_Script, ucp_Yezidi }; #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ /* End of pcre2_ucp.h */
| ver. 1.4 |
Github
|
.
| PHP 8.1.31 | Генерация страницы: 0.01 |
proxy
|
phpinfo
|
Настройка