source: XMLIO_V2/external/include/Poco/RegularExpression.h @ 80

Last change on this file since 80 was 80, checked in by ymipsl, 14 years ago

ajout lib externe

  • Property svn:eol-style set to native
File size: 12.4 KB
Line 
1//
2// RegularExpression.h
3//
4// $Id: //poco/1.3/Foundation/include/Poco/RegularExpression.h#2 $
5//
6// Library: Foundation
7// Package: RegExp
8// Module:  RegularExpression
9//
10// Definitions of class RegularExpression.
11//
12// A wrapper class for Philip Hazel's PCRE - Perl Compatible Regular Expressions
13// library (http://www.pcre.org).
14//
15// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
16// and Contributors.
17//
18// Permission is hereby granted, free of charge, to any person or organization
19// obtaining a copy of the software and accompanying documentation covered by
20// this license (the "Software") to use, reproduce, display, distribute,
21// execute, and transmit the Software, and to prepare derivative works of the
22// Software, and to permit third-parties to whom the Software is furnished to
23// do so, all subject to the following:
24//
25// The copyright notices in the Software and this entire statement, including
26// the above license grant, this restriction and the following disclaimer,
27// must be included in all copies of the Software, in whole or in part, and
28// all derivative works of the Software, unless such copies or derivative
29// works are solely in the form of machine-executable object code generated by
30// a source language processor.
31//
32// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
35// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
36// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
37// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
38// DEALINGS IN THE SOFTWARE.
39//
40
41
42#ifndef Foundation_RegularExpression_INCLUDED
43#define Foundation_RegularExpression_INCLUDED
44
45
46#include "Poco/Foundation.h"
47#include <vector>
48
49
50//
51// Copy these definitions from pcre.h
52// to avoid pulling in the entire header file
53//
54extern "C"
55{
56        struct real_pcre;
57        typedef struct real_pcre pcre;
58        struct pcre_extra;
59}
60
61
62namespace Poco {
63
64
65class Foundation_API RegularExpression
66        /// A class for working with regular expressions.
67        /// Implemented using PCRE, the Perl Compatible
68        /// Regular Expressions library by Philip Hazel
69        /// (see http://www.pcre.org).
70{
71public:
72        enum Options // These must match the corresponsing options in pcre.h!
73                /// Some of the following options can only be passed to the constructor;
74                /// some can be passed only to matching functions, and some can be used
75                /// everywhere.
76                ///
77                ///   * Options marked [ctor] can be passed to the constructor.
78                ///   * Options marked [match] can be passed to match, extract, split and subst.
79                ///   * Options marked [subst] can be passed to subst.
80                ///
81                /// See the PCRE documentation for more information.
82        {
83                RE_CASELESS        = 0x00000001, /// case insensitive matching (/i) [ctor]
84                RE_MULTILINE       = 0x00000002, /// enable multi-line mode; affects ^ and $ (/m) [ctor]
85                RE_DOTALL          = 0x00000004, /// dot matches all characters, including newline (/s) [ctor]
86                RE_EXTENDED        = 0x00000004, /// totally ignore whitespace (/x) [ctor]
87                RE_ANCHORED        = 0x00000010, /// treat pattern as if it starts with a ^ [ctor, match]
88                RE_DOLLAR_ENDONLY  = 0x00000020, /// dollar matches end-of-string only, not last newline in string [ctor]
89                RE_EXTRA           = 0x00000040, /// enable optional PCRE functionality [ctor]
90                RE_NOTBOL          = 0x00000080, /// circumflex does not match beginning of string [match]
91                RE_NOTEOL          = 0x00000100, /// $ does not match end of string [match]
92                RE_UNGREEDY        = 0x00000200, /// make quantifiers ungreedy [ctor]
93                RE_NOTEMPTY        = 0x00000400, /// empty string never matches [match]
94                RE_UTF8            = 0x00000800, /// assume pattern and subject is UTF-8 encoded [ctor]
95                RE_NO_AUTO_CAPTURE = 0x00001000, /// disable numbered capturing parentheses [ctor, match]
96                RE_NO_UTF8_CHECK   = 0x00002000, /// do not check validity of UTF-8 code sequences [match]
97                RE_FIRSTLINE       = 0x00040000, /// an  unanchored  pattern  is  required  to  match
98                                                 /// before  or  at  the  first  newline  in  the subject string,
99                                                 /// though the matched text may continue over the newline [ctor]
100                RE_DUPNAMES        = 0x00080000, /// names used to identify capturing  subpatterns  need not be unique [ctor]
101                RE_NEWLINE_CR      = 0x00100000, /// assume newline is CR ('\r'), the default [ctor]
102                RE_NEWLINE_LF      = 0x00200000, /// assume newline is LF ('\n') [ctor]
103                RE_NEWLINE_CRLF    = 0x00300000, /// assume newline is CRLF ("\r\n") [ctor]
104                RE_NEWLINE_ANY     = 0x00400000, /// assume newline is any valid Unicode newline character [ctor]
105                RE_NEWLINE_ANYCRLF = 0x00500000, /// assume newline is any of CR, LF, CRLF [ctor]
106                RE_GLOBAL          = 0x10000000, /// replace all occurences (/g) [subst]
107                RE_NO_VARS         = 0x20000000  /// treat dollar in replacement string as ordinary character [subst]
108        };
109       
110        struct Match
111        {
112                std::string::size_type offset; /// zero based offset (std::string::npos if subexpr does not match)
113                std::string::size_type length; /// length of substring
114        };
115        typedef std::vector<Match> MatchVec;
116       
117        RegularExpression(const std::string& pattern, int options = 0, bool study = true);
118                /// Creates a regular expression and parses the given pattern.
119                /// If study is true, the pattern is analyzed and optimized. This
120                /// is mainly useful if the pattern is used more than once.
121                /// For a description of the options, please see the PCRE documentation.
122                /// Throws a RegularExpressionException if the patter cannot be compiled.
123               
124        ~RegularExpression();
125                /// Destroys the regular expression.
126
127        int match(const std::string& subject, Match& mtch, int options = 0) const;
128                /// Matches the given subject string against the pattern. Returns the position
129                /// of the first captured substring in mtch.
130                /// If no part of the subject matches the pattern, mtch.offset is std::string::npos and
131                /// mtch.length is 0.
132                /// Throws a RegularExpressionException in case of an error.
133                /// Returns the number of matches.
134
135        int match(const std::string& subject, std::string::size_type offset, Match& mtch, int options = 0) const;
136                /// Matches the given subject string, starting at offset, against the pattern.
137                /// Returns the position of the captured substring in mtch.
138                /// If no part of the subject matches the pattern, mtch.offset is std::string::npos and
139                /// mtch.length is 0.
140                /// Throws a RegularExpressionException in case of an error.
141                /// Returns the number of matches.
142
143        int match(const std::string& subject, std::string::size_type offset, MatchVec& matches, int options = 0) const;
144                /// Matches the given subject string against the pattern.
145                /// The first entry in matches contains the position of the captured substring.
146                /// The following entries identify matching subpatterns. See the PCRE documentation
147                /// for a more detailed explanation.
148                /// If no part of the subject matches the pattern, matches is empty.
149                /// Throws a RegularExpressionException in case of an error.
150                /// Returns the number of matches.
151
152        bool match(const std::string& subject, std::string::size_type offset = 0) const;
153                /// Returns true if and only if the subject matches the regular expression.
154                ///
155                /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
156                /// matching, which means that the empty string will never match and
157                /// the pattern is treated as if it starts with a ^.
158
159        bool match(const std::string& subject, std::string::size_type offset, int options) const;
160                /// Returns true if and only if the subject matches the regular expression.
161
162        bool operator == (const std::string& subject) const;
163                /// Returns true if and only if the subject matches the regular expression.
164                ///
165                /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
166                /// matching, which means that the empty string will never match and
167                /// the pattern is treated as if it starts with a ^.
168
169        bool operator != (const std::string& subject) const;
170                /// Returns true if and only if the subject does not match the regular expression.
171                ///
172                /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
173                /// matching, which means that the empty string will never match and
174                /// the pattern is treated as if it starts with a ^.
175
176        int extract(const std::string& subject, std::string& str, int options = 0) const;
177                /// Matches the given subject string against the pattern.
178                /// Returns the captured string.
179                /// Throws a RegularExpressionException in case of an error.
180                /// Returns the number of matches.
181
182        int extract(const std::string& subject, std::string::size_type offset, std::string& str, int options = 0) const;
183                /// Matches the given subject string, starting at offset, against the pattern.
184                /// Returns the captured string.
185                /// Throws a RegularExpressionException in case of an error.
186                /// Returns the number of matches.
187
188        int split(const std::string& subject, std::vector<std::string>& strings, int options = 0) const;
189                /// Matches the given subject string against the pattern.
190                /// The first entry in captured is the captured substring.
191                /// The following entries contain substrings matching subpatterns. See the PCRE documentation
192                /// for a more detailed explanation.
193                /// If no part of the subject matches the pattern, captured is empty.
194                /// Throws a RegularExpressionException in case of an error.
195                /// Returns the number of matches.
196
197        int split(const std::string& subject, std::string::size_type offset, std::vector<std::string>& strings, int options = 0) const;
198                /// Matches the given subject string against the pattern.
199                /// The first entry in captured is the captured substring.
200                /// The following entries contain substrings matching subpatterns. See the PCRE documentation
201                /// for a more detailed explanation.
202                /// If no part of the subject matches the pattern, captured is empty.
203                /// Throws a RegularExpressionException in case of an error.
204                /// Returns the number of matches.
205       
206        int subst(std::string& subject, const std::string& replacement, int options = 0) const;
207                /// Substitute in subject all matches of the pattern with replacement.
208                /// If RE_GLOBAL is specified as option, all matches are replaced. Otherwise,
209                /// only the first match is replaced.
210                /// Occurences of $<n> (for example, $1, $2, ...) in replacement are replaced
211                /// with the corresponding captured string. $0 is the original subject string.
212                /// Returns the number of replaced occurences.
213
214        int subst(std::string& subject, std::string::size_type offset, const std::string& replacement, int options = 0) const;
215                /// Substitute in subject all matches of the pattern with replacement,
216                /// starting at offset.
217                /// If RE_GLOBAL is specified as option, all matches are replaced. Otherwise,
218                /// only the first match is replaced.
219                /// Unless RE_NO_VARS is specified, occurences of $<n> (for example, $0, $1, $2, ... $9)
220                /// in replacement are replaced with the corresponding captured string.
221                /// $0 is the captured substring. $1 ... $n are the substrings maching the subpatterns.
222                /// Returns the number of replaced occurences.
223
224        static bool match(const std::string& subject, const std::string& pattern, int options = 0);
225                /// Matches the given subject string against the regular expression given in pattern,
226                /// using the given options.
227
228protected:
229        std::string::size_type substOne(std::string& subject, std::string::size_type offset, const std::string& replacement, int options) const;
230
231private:
232        pcre*       _pcre;
233        pcre_extra* _extra;
234       
235        static const int OVEC_SIZE;
236       
237        RegularExpression();
238        RegularExpression(const RegularExpression&);
239        RegularExpression& operator = (const RegularExpression&);
240};
241
242
243//
244// inlines
245//
246inline int RegularExpression::match(const std::string& subject, Match& mtch, int options) const
247{
248        return match(subject, 0, mtch, options);
249}
250
251
252inline int RegularExpression::split(const std::string& subject, std::vector<std::string>& strings, int options) const
253{
254        return split(subject, 0, strings, options);
255}
256
257
258inline int RegularExpression::subst(std::string& subject, const std::string& replacement, int options) const
259{
260        return subst(subject, 0, replacement, options);
261}
262
263
264inline bool RegularExpression::operator == (const std::string& subject) const
265{
266        return match(subject);
267}
268
269
270inline bool RegularExpression::operator != (const std::string& subject) const
271{
272        return !match(subject);
273}
274
275
276} // namespace Poco
277
278
279#endif // Foundation_RegularExpression_INCLUDED
Note: See TracBrowser for help on using the repository browser.