source: XMLIO_V2/external/src/POCO/Foundation.save/Poco/TextEncoding.h @ 80

Last change on this file since 80 was 80, checked in by ymipsl, 14 years ago

ajout lib externe

  • Property svn:eol-style set to native
File size: 8.1 KB
Line 
1//
2// TextEncoding.h
3//
4// $Id: //poco/1.3/Foundation/include/Poco/TextEncoding.h#4 $
5//
6// Library: Foundation
7// Package: Text
8// Module:  TextEncoding
9//
10// Definition of the abstract TextEncoding class.
11//
12// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
13// and Contributors.
14//
15// Permission is hereby granted, free of charge, to any person or organization
16// obtaining a copy of the software and accompanying documentation covered by
17// this license (the "Software") to use, reproduce, display, distribute,
18// execute, and transmit the Software, and to prepare derivative works of the
19// Software, and to permit third-parties to whom the Software is furnished to
20// do so, all subject to the following:
21//
22// The copyright notices in the Software and this entire statement, including
23// the above license grant, this restriction and the following disclaimer,
24// must be included in all copies of the Software, in whole or in part, and
25// all derivative works of the Software, unless such copies or derivative
26// works are solely in the form of machine-executable object code generated by
27// a source language processor.
28//
29// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
32// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
33// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
34// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
35// DEALINGS IN THE SOFTWARE.
36//
37
38
39#ifndef Foundation_TextEncoding_INCLUDED
40#define Foundation_TextEncoding_INCLUDED
41
42
43#include "Poco/Foundation.h"
44#include "Poco/SharedPtr.h"
45
46
47namespace Poco {
48
49
50class TextEncodingManager;
51
52
53class Foundation_API TextEncoding
54        /// An abstract base class for implementing text encodings
55        /// like UTF-8 or ISO 8859-1.
56        ///
57        /// Subclasses must override the canonicalName(), isA(),
58        /// characterMap() and convert() methods and need to be
59        /// thread safe and stateless.
60        ///
61        /// TextEncoding also provides static member functions
62        /// for managing mappings from encoding names to
63        /// TextEncoding objects.
64{
65public:
66        typedef SharedPtr<TextEncoding> Ptr;
67       
68        enum
69        {
70                MAX_SEQUENCE_LENGTH = 6 /// The maximum character byte sequence length supported.
71        };
72       
73        typedef int CharacterMap[256];
74                /// The map[b] member gives information about byte sequences
75                /// whose first byte is b.
76                /// If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c.
77                /// If map[b] is -1, then the byte sequence is malformed.
78                /// If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
79                /// sequence that encodes a single Unicode scalar value. Byte sequences up
80                /// to 6 bytes in length are supported.
81
82        virtual ~TextEncoding();
83                /// Destroys the encoding.
84
85        virtual const char* canonicalName() const = 0;
86                /// Returns the canonical name of this encoding,
87                /// e.g. "ISO-8859-1". Encoding name comparisons are case
88                /// insensitive.
89
90        virtual bool isA(const std::string& encodingName) const = 0;
91                /// Returns true if the given name is one of the names of this encoding.
92                /// For example, the "ISO-8859-1" encoding is also known as "Latin-1".
93                ///
94                /// Encoding name comparision are be case insensitive.
95                       
96        virtual const CharacterMap& characterMap() const = 0;
97                /// Returns the CharacterMap for the encoding.
98                /// The CharacterMap should be kept in a static member. As
99                /// characterMap() can be called frequently, it should be
100                /// implemented in such a way that it just returns a static
101                /// map. If the map is built at runtime, this should be
102                /// done in the constructor.
103               
104        virtual int convert(const unsigned char* bytes) const;
105                /// The convert function is used to convert multibyte sequences;
106                /// bytes will point to a byte sequence of n bytes where
107                /// sequenceLength(bytes, length) == -n, with length >= n.
108                ///
109                /// The convert function must return the Unicode scalar value
110                /// represented by this byte sequence or -1 if the byte sequence is malformed.
111                /// The default implementation returns (int) bytes[0].
112
113        virtual int queryConvert(const unsigned char* bytes, int length) const;
114                /// The queryConvert function is used to convert single byte characters
115                /// or multibyte sequences;
116                /// bytes will point to a byte sequence of length bytes.
117                ///
118                /// The queryConvert function must return the Unicode scalar value
119                /// represented by this byte sequence or -1 if the byte sequence is malformed
120                /// or -n where n is number of bytes requested for the sequence, if lenght is
121                /// shorter than the sequence.
122                /// The length of the sequence might not be determined by the first byte,
123                /// in which case the conversion becomes an iterative process:
124                /// First call with length == 1 might return -2,
125                /// Then a second call with lenght == 2 might return -4
126                /// Eventually, the third call with length == 4 should return either a
127                /// Unicode scalar value, or -1 if the byte sequence is malformed.
128                /// The default implementation returns (int) bytes[0].
129
130        virtual int sequenceLength(const unsigned char* bytes, int length) const;
131                /// The sequenceLength function is used to get the lenth of the sequence pointed
132                /// by bytes. The length paramater should be greater or equal to the length of
133                /// the sequence.
134                ///
135                /// The sequenceLength function must return the lenght of the sequence
136                /// represented by this byte sequence or a negative value -n if length is
137                /// shorter than the sequence, where n is the number of byte requested
138                /// to determine the length of the sequence.
139                /// The length of the sequence might not be determined by the first byte,
140                /// in which case the conversion becomes an iterative process as long as the
141                /// result is negative:
142                /// First call with length == 1 might return -2,
143                /// Then a second call with lenght == 2 might return -4
144                /// Eventually, the third call with length == 4 should return 4.
145                /// The default implementation returns 1.
146
147        virtual int convert(int ch, unsigned char* bytes, int length) const;
148                /// Transform the Unicode character ch into the encoding's
149                /// byte sequence. The method returns the number of bytes
150                /// used. The method must not use more than length characters.
151                /// Bytes and length can also be null - in this case only the number
152                /// of bytes required to represent ch is returned.
153                /// If the character cannot be converted, 0 is returned and
154                /// the byte sequence remains unchanged.
155                /// The default implementation simply returns 0.
156
157        static TextEncoding& byName(const std::string& encodingName);
158                /// Returns the TextEncoding object for the given encoding name.
159                ///
160                /// Throws a NotFoundException if the encoding with given name is not available.
161               
162        static TextEncoding::Ptr find(const std::string& encodingName);
163                /// Returns a pointer to the TextEncoding object for the given encodingName,
164                /// or NULL if no such TextEncoding object exists.
165
166        static void add(TextEncoding::Ptr encoding);
167                /// Adds the given TextEncoding to the table of text encodings,
168                /// under the encoding's canonical name.
169                ///
170                /// If an encoding with the given name is already registered,
171                /// it is replaced.
172
173        static void add(TextEncoding::Ptr encoding, const std::string& name);
174                /// Adds the given TextEncoding to the table of text encodings,
175                /// under the given name.
176                ///
177                /// If an encoding with the given name is already registered,
178                /// it is replaced.
179
180        static void remove(const std::string& encodingName);
181                /// Removes the encoding with the given name from the table
182                /// of text encodings.
183
184        static TextEncoding::Ptr global(TextEncoding::Ptr encoding);
185                /// Sets global TextEncoding object.
186                ///
187                /// This function sets the global encoding to the argument and returns a
188                /// reference of the previous global encoding.
189
190        static TextEncoding& global();
191                /// Return the current global TextEncoding object
192
193        static const std::string GLOBAL;
194                /// Name of the global TextEncoding, which is the empty string.
195               
196protected:
197        static TextEncodingManager& manager();
198                /// Returns the TextEncodingManager.
199};
200
201
202} // namespace Poco
203
204
205#endif // Foundation_TextEncoding_INCLUDED
Note: See TracBrowser for help on using the repository browser.