1 | /************************************************* |
---|
2 | * Perl-Compatible Regular Expressions * |
---|
3 | *************************************************/ |
---|
4 | |
---|
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
---|
6 | and semantics are as close as possible to those of the Perl 5 language. |
---|
7 | |
---|
8 | Written by Philip Hazel |
---|
9 | Copyright (c) 1997-2008 University of Cambridge |
---|
10 | |
---|
11 | ----------------------------------------------------------------------------- |
---|
12 | Redistribution and use in source and binary forms, with or without |
---|
13 | modification, are permitted provided that the following conditions are met: |
---|
14 | |
---|
15 | * Redistributions of source code must retain the above copyright notice, |
---|
16 | this list of conditions and the following disclaimer. |
---|
17 | |
---|
18 | * Redistributions in binary form must reproduce the above copyright |
---|
19 | notice, this list of conditions and the following disclaimer in the |
---|
20 | documentation and/or other materials provided with the distribution. |
---|
21 | |
---|
22 | * Neither the name of the University of Cambridge nor the names of its |
---|
23 | contributors may be used to endorse or promote products derived from |
---|
24 | this software without specific prior written permission. |
---|
25 | |
---|
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
---|
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
---|
30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
---|
31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
---|
32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
---|
33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
---|
34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
---|
35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
---|
36 | POSSIBILITY OF SUCH DAMAGE. |
---|
37 | ----------------------------------------------------------------------------- |
---|
38 | */ |
---|
39 | |
---|
40 | |
---|
41 | /* This module contains internal functions for testing newlines when more than |
---|
42 | one kind of newline is to be recognized. When a newline is found, its length is |
---|
43 | returned. In principle, we could implement several newline "types", each |
---|
44 | referring to a different set of newline characters. At present, PCRE supports |
---|
45 | only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, |
---|
46 | and NLTYPE_ANY. The full list of Unicode newline characters is taken from |
---|
47 | http://unicode.org/unicode/reports/tr18/. */ |
---|
48 | |
---|
49 | |
---|
50 | #include "pcre_config.h" |
---|
51 | #include "pcre_internal.h" |
---|
52 | |
---|
53 | |
---|
54 | |
---|
55 | /************************************************* |
---|
56 | * Check for newline at given position * |
---|
57 | *************************************************/ |
---|
58 | |
---|
59 | /* It is guaranteed that the initial value of ptr is less than the end of the |
---|
60 | string that is being processed. |
---|
61 | |
---|
62 | Arguments: |
---|
63 | ptr pointer to possible newline |
---|
64 | type the newline type |
---|
65 | endptr pointer to the end of the string |
---|
66 | lenptr where to return the length |
---|
67 | utf8 TRUE if in utf8 mode |
---|
68 | |
---|
69 | Returns: TRUE or FALSE |
---|
70 | */ |
---|
71 | |
---|
72 | BOOL |
---|
73 | _pcre_is_newline(const uschar *ptr, int type, const uschar *endptr, |
---|
74 | int *lenptr, BOOL utf8) |
---|
75 | { |
---|
76 | int c; |
---|
77 | if (utf8) { GETCHAR(c, ptr); } else c = *ptr; |
---|
78 | |
---|
79 | if (type == NLTYPE_ANYCRLF) switch(c) |
---|
80 | { |
---|
81 | case 0x000a: *lenptr = 1; return TRUE; /* LF */ |
---|
82 | case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; |
---|
83 | return TRUE; /* CR */ |
---|
84 | default: return FALSE; |
---|
85 | } |
---|
86 | |
---|
87 | /* NLTYPE_ANY */ |
---|
88 | |
---|
89 | else switch(c) |
---|
90 | { |
---|
91 | case 0x000a: /* LF */ |
---|
92 | case 0x000b: /* VT */ |
---|
93 | case 0x000c: *lenptr = 1; return TRUE; /* FF */ |
---|
94 | case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; |
---|
95 | return TRUE; /* CR */ |
---|
96 | case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ |
---|
97 | case 0x2028: /* LS */ |
---|
98 | case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
---|
99 | default: return FALSE; |
---|
100 | } |
---|
101 | } |
---|
102 | |
---|
103 | |
---|
104 | |
---|
105 | /************************************************* |
---|
106 | * Check for newline at previous position * |
---|
107 | *************************************************/ |
---|
108 | |
---|
109 | /* It is guaranteed that the initial value of ptr is greater than the start of |
---|
110 | the string that is being processed. |
---|
111 | |
---|
112 | Arguments: |
---|
113 | ptr pointer to possible newline |
---|
114 | type the newline type |
---|
115 | startptr pointer to the start of the string |
---|
116 | lenptr where to return the length |
---|
117 | utf8 TRUE if in utf8 mode |
---|
118 | |
---|
119 | Returns: TRUE or FALSE |
---|
120 | */ |
---|
121 | |
---|
122 | BOOL |
---|
123 | _pcre_was_newline(const uschar *ptr, int type, const uschar *startptr, |
---|
124 | int *lenptr, BOOL utf8) |
---|
125 | { |
---|
126 | int c; |
---|
127 | ptr--; |
---|
128 | #ifdef SUPPORT_UTF8 |
---|
129 | if (utf8) |
---|
130 | { |
---|
131 | BACKCHAR(ptr); |
---|
132 | GETCHAR(c, ptr); |
---|
133 | } |
---|
134 | else c = *ptr; |
---|
135 | #else /* no UTF-8 support */ |
---|
136 | c = *ptr; |
---|
137 | #endif /* SUPPORT_UTF8 */ |
---|
138 | |
---|
139 | if (type == NLTYPE_ANYCRLF) switch(c) |
---|
140 | { |
---|
141 | case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; |
---|
142 | return TRUE; /* LF */ |
---|
143 | case 0x000d: *lenptr = 1; return TRUE; /* CR */ |
---|
144 | default: return FALSE; |
---|
145 | } |
---|
146 | |
---|
147 | else switch(c) |
---|
148 | { |
---|
149 | case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; |
---|
150 | return TRUE; /* LF */ |
---|
151 | case 0x000b: /* VT */ |
---|
152 | case 0x000c: /* FF */ |
---|
153 | case 0x000d: *lenptr = 1; return TRUE; /* CR */ |
---|
154 | case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ |
---|
155 | case 0x2028: /* LS */ |
---|
156 | case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
---|
157 | default: return FALSE; |
---|
158 | } |
---|
159 | } |
---|
160 | |
---|
161 | /* End of pcre_newline.c */ |
---|