1 | module m_fsm |
---|
2 | ! |
---|
3 | use m_buffer |
---|
4 | use m_dictionary |
---|
5 | use m_charset |
---|
6 | use m_entities |
---|
7 | use m_elstack |
---|
8 | |
---|
9 | private |
---|
10 | |
---|
11 | type, public :: fsm_t |
---|
12 | ! |
---|
13 | ! Contains information about the "finite state machine" |
---|
14 | ! Some of the components (marked *) could at this point be made into |
---|
15 | ! saved module variables. |
---|
16 | ! |
---|
17 | ! |
---|
18 | integer :: state |
---|
19 | integer :: context |
---|
20 | integer :: nbrackets !* |
---|
21 | integer :: nlts !* |
---|
22 | character(len=1) :: quote_char !* |
---|
23 | type(buffer_t) :: buffer !* |
---|
24 | type(buffer_t) :: element_name |
---|
25 | type(dictionary_t) :: attributes |
---|
26 | type(buffer_t) :: pcdata |
---|
27 | type(elstack_t) :: element_stack |
---|
28 | logical :: root_element_seen |
---|
29 | type(buffer_t) :: root_element_name |
---|
30 | character(len=150) :: action |
---|
31 | logical :: debug |
---|
32 | end type fsm_t |
---|
33 | |
---|
34 | public :: init_fsm, reset_fsm, evolve_fsm |
---|
35 | |
---|
36 | ! |
---|
37 | ! State parameters |
---|
38 | ! |
---|
39 | integer, parameter, public :: ERROR = -1 |
---|
40 | integer, parameter, public :: INIT = 1 |
---|
41 | integer, parameter, private :: START_TAG_MARKER = 2 |
---|
42 | integer, parameter, private :: END_TAG_MARKER = 3 |
---|
43 | integer, parameter, private :: IN_NAME = 4 |
---|
44 | integer, parameter, private :: WHITESPACE_IN_TAG = 5 |
---|
45 | integer, parameter, private :: IN_PCDATA = 6 |
---|
46 | integer, parameter, private :: SINGLETAG_MARKER = 7 |
---|
47 | integer, parameter, private :: CLOSINGTAG_MARKER = 8 |
---|
48 | integer, parameter, private :: IN_COMMENT = 9 |
---|
49 | integer, parameter, private :: IN_ATT_NAME = 10 |
---|
50 | integer, parameter, private :: IN_ATT_VALUE = 11 |
---|
51 | integer, parameter, private :: EQUAL = 12 |
---|
52 | integer, parameter, private :: SPACE_AFTER_EQUAL = 13 |
---|
53 | integer, parameter, private :: SPACE_BEFORE_EQUAL = 14 |
---|
54 | integer, parameter, private :: START_QUOTE = 15 |
---|
55 | integer, parameter, private :: END_QUOTE = 16 |
---|
56 | integer, parameter, private :: BANG = 17 |
---|
57 | integer, parameter, private :: BANG_HYPHEN = 18 |
---|
58 | integer, parameter, private :: ONE_HYPHEN = 19 |
---|
59 | integer, parameter, private :: TWO_HYPHEN = 20 |
---|
60 | integer, parameter, private :: QUESTION_MARK = 21 |
---|
61 | integer, parameter, private :: START_XML_DECLARATION = 22 |
---|
62 | integer, parameter, private :: IN_SGML_DECLARATION = 23 |
---|
63 | integer, parameter, private :: IN_CDATA_SECTION = 24 |
---|
64 | integer, parameter, private :: ONE_BRACKET = 25 |
---|
65 | integer, parameter, private :: TWO_BRACKET = 26 |
---|
66 | integer, parameter, private :: CDATA_PREAMBLE = 27 |
---|
67 | integer, parameter, private :: IN_PCDATA_AT_EOL = 30 |
---|
68 | ! |
---|
69 | ! Context parameters |
---|
70 | ! |
---|
71 | integer, parameter, public :: OPENING_TAG = 100 |
---|
72 | integer, parameter, public :: CLOSING_TAG = 110 |
---|
73 | integer, parameter, public :: SINGLE_TAG = 120 |
---|
74 | integer, parameter, public :: COMMENT_TAG = 130 |
---|
75 | integer, parameter, public :: XML_DECLARATION_TAG = 140 |
---|
76 | integer, parameter, public :: SGML_DECLARATION_TAG = 150 |
---|
77 | integer, parameter, public :: CDATA_SECTION_TAG = 160 |
---|
78 | integer, parameter, public :: NULL_CONTEXT = 200 |
---|
79 | ! |
---|
80 | ! Signal parameters |
---|
81 | ! |
---|
82 | integer, parameter, public :: QUIET = 1000 |
---|
83 | integer, parameter, public :: END_OF_TAG = 1100 |
---|
84 | integer, parameter, public :: CHUNK_OF_PCDATA = 1200 |
---|
85 | integer, parameter, public :: EXCEPTION = 1500 |
---|
86 | |
---|
87 | CONTAINS |
---|
88 | |
---|
89 | !------------------------------------------------------------ |
---|
90 | ! Initialize once and for all the derived types (Fortran90 restriction) |
---|
91 | ! |
---|
92 | subroutine init_fsm(fx) |
---|
93 | type(fsm_t), intent(inout) :: fx |
---|
94 | |
---|
95 | fx%state = INIT |
---|
96 | call setup_xml_charsets() |
---|
97 | fx%context = NULL_CONTEXT |
---|
98 | call init_elstack(fx%element_stack) |
---|
99 | fx%root_element_seen = .false. |
---|
100 | fx%debug = .false. |
---|
101 | fx%action = "" |
---|
102 | call init_buffer(fx%buffer) |
---|
103 | call init_buffer(fx%element_name) |
---|
104 | call init_buffer(fx%pcdata) |
---|
105 | call init_buffer(fx%root_element_name) |
---|
106 | call init_dict(fx%attributes) |
---|
107 | end subroutine init_fsm |
---|
108 | !------------------------------------------------------------ |
---|
109 | subroutine reset_fsm(fx) |
---|
110 | type(fsm_t), intent(inout) :: fx |
---|
111 | |
---|
112 | fx%state = INIT |
---|
113 | call setup_xml_charsets() |
---|
114 | fx%context = NULL_CONTEXT |
---|
115 | call reset_elstack(fx%element_stack) |
---|
116 | fx%action = "" |
---|
117 | fx%root_element_seen = .false. |
---|
118 | call reset_buffer(fx%buffer) |
---|
119 | call reset_buffer(fx%element_name) |
---|
120 | call reset_buffer(fx%pcdata) |
---|
121 | call reset_buffer(fx%root_element_name) |
---|
122 | call reset_dict(fx%attributes) |
---|
123 | end subroutine reset_fsm |
---|
124 | |
---|
125 | !------------------------------------------------------------ |
---|
126 | subroutine evolve_fsm(fx,c,signal) |
---|
127 | ! |
---|
128 | ! Finite-state machine evolution rules for XML parsing. |
---|
129 | ! |
---|
130 | type(fsm_t), intent(inout) :: fx ! Internal state |
---|
131 | character(len=1), intent(in) :: c |
---|
132 | integer, intent(out) :: signal |
---|
133 | |
---|
134 | ! |
---|
135 | ! Reset signal |
---|
136 | ! |
---|
137 | signal = QUIET |
---|
138 | ! |
---|
139 | |
---|
140 | if (.not. (c .in. valid_chars)) then |
---|
141 | ! |
---|
142 | ! Let it pass (in case the underlying encoding is UTF-8) |
---|
143 | ! But this chars in a name will cause havoc |
---|
144 | ! |
---|
145 | ! signal = EXCEPTION |
---|
146 | ! fx%state = ERROR |
---|
147 | ! fx%action = trim("Not a valid character in simple encoding: "//c) |
---|
148 | ! RETURN |
---|
149 | endif |
---|
150 | |
---|
151 | select case(fx%state) |
---|
152 | |
---|
153 | case (INIT) |
---|
154 | if (c == "<") then |
---|
155 | fx%state = START_TAG_MARKER |
---|
156 | if (fx%debug) fx%action = ("Starting tag") |
---|
157 | else if (c == ">") then |
---|
158 | fx%state = ERROR |
---|
159 | fx%action = ("Ending tag without being in one!") |
---|
160 | else |
---|
161 | if (fx%debug) fx%action = ("Reading garbage chars") |
---|
162 | endif |
---|
163 | |
---|
164 | case (START_TAG_MARKER) |
---|
165 | if (c == ">") then |
---|
166 | fx%state = ERROR |
---|
167 | fx%action = ("Tag empty!") |
---|
168 | else if (c == "<") then |
---|
169 | fx%state = ERROR |
---|
170 | fx%action = ("Double opening of tag!!") |
---|
171 | else if (c == "/") then |
---|
172 | fx%state = CLOSINGTAG_MARKER |
---|
173 | if (fx%debug) fx%action = ("Starting endtag: ") |
---|
174 | fx%context = CLOSING_TAG |
---|
175 | else if (c == "?") then |
---|
176 | fx%state = START_XML_DECLARATION |
---|
177 | if (fx%debug) fx%action = ("Starting XML declaration ") |
---|
178 | fx%context = XML_DECLARATION_TAG |
---|
179 | else if (c == "!") then |
---|
180 | fx%state = BANG |
---|
181 | if (fx%debug) fx%action = ("Saw ! -- comment or SGML declaration expected...") |
---|
182 | else if (c .in. whitespace) then |
---|
183 | fx%state = ERROR |
---|
184 | fx%action = ("Cannot have whitespace after <") |
---|
185 | else if (c .in. initial_name_chars) then |
---|
186 | fx%context = OPENING_TAG |
---|
187 | fx%state = IN_NAME |
---|
188 | call add_to_buffer(c,fx%buffer) |
---|
189 | if (fx%debug) fx%action = ("Starting to read name in tag") |
---|
190 | else |
---|
191 | fx%state = ERROR |
---|
192 | fx%action = ("Illegal initial character for name") |
---|
193 | endif |
---|
194 | |
---|
195 | |
---|
196 | case (BANG) |
---|
197 | if (c == "-") then |
---|
198 | fx%state = BANG_HYPHEN |
---|
199 | if (fx%debug) fx%action = ("Almost ready to start comment ") |
---|
200 | else if (c .in. uppercase_chars) then |
---|
201 | fx%state = IN_SGML_DECLARATION |
---|
202 | fx%nlts = 0 |
---|
203 | fx%nbrackets = 0 |
---|
204 | if (fx%debug) fx%action = ("SGML declaration ") |
---|
205 | fx%context = SGML_DECLARATION_TAG |
---|
206 | call add_to_buffer(c,fx%buffer) |
---|
207 | else if (c == "[") then |
---|
208 | fx%state = CDATA_PREAMBLE |
---|
209 | if (fx%debug) fx%action = ("Declaration with [ ") |
---|
210 | fx%context = CDATA_SECTION_TAG |
---|
211 | else |
---|
212 | fx%state = ERROR |
---|
213 | fx%action = ("Wrong character after ! ") |
---|
214 | endif |
---|
215 | |
---|
216 | case (CDATA_PREAMBLE) |
---|
217 | ! We assume a CDATA[ is forthcoming, we do not check |
---|
218 | if (c == "[") then |
---|
219 | fx%state = IN_CDATA_SECTION |
---|
220 | if (fx%debug) fx%action = ("About to start reading CDATA contents") |
---|
221 | else if (c == "]") then |
---|
222 | fx%state = ERROR |
---|
223 | fx%action = ("Unexpected ] in CDATA preamble") |
---|
224 | else |
---|
225 | if (fx%debug) fx%action = ("Reading CDATA preamble") |
---|
226 | endif |
---|
227 | |
---|
228 | case (IN_CDATA_SECTION) |
---|
229 | if (c == "]") then |
---|
230 | fx%state = ONE_BRACKET |
---|
231 | if (fx%debug) fx%action = ("Saw a ] in CDATA section") |
---|
232 | else |
---|
233 | call add_to_buffer(c,fx%buffer) |
---|
234 | if (fx%debug) fx%action = ("Reading contents of CDATA section") |
---|
235 | endif |
---|
236 | |
---|
237 | case (ONE_BRACKET) |
---|
238 | if (c == "]") then |
---|
239 | fx%state = TWO_BRACKET |
---|
240 | if (fx%debug) fx%action = ("Maybe finish a CDATA section") |
---|
241 | else |
---|
242 | fx%state = IN_CDATA_SECTION |
---|
243 | call add_to_buffer("]",fx%buffer) |
---|
244 | if (fx%debug) fx%action = ("Continue reading contents of CDATA section") |
---|
245 | endif |
---|
246 | |
---|
247 | case (TWO_BRACKET) |
---|
248 | if (c == ">") then |
---|
249 | fx%state = END_TAG_MARKER |
---|
250 | signal = END_OF_TAG |
---|
251 | if (fx%debug) fx%action = ("End of CDATA section") |
---|
252 | fx%pcdata = fx%buffer ! Not quite the same behavior |
---|
253 | ! as pcdata... (not filtered) |
---|
254 | call reset_buffer(fx%buffer) |
---|
255 | else |
---|
256 | fx%state = IN_CDATA_SECTION |
---|
257 | call add_to_buffer("]",fx%buffer) |
---|
258 | if (fx%debug) fx%action = ("Continue reading contents of CDATA section") |
---|
259 | endif |
---|
260 | |
---|
261 | case (IN_SGML_DECLARATION) |
---|
262 | if (c == "<") then |
---|
263 | fx%nlts = fx%nlts + 1 |
---|
264 | call add_to_buffer("<",fx%buffer) |
---|
265 | fx%action = "Read an intermediate < in SGML declaration" |
---|
266 | else if (c == "[") then |
---|
267 | fx%nbrackets = fx%nbrackets + 1 |
---|
268 | call add_to_buffer("[",fx%buffer) |
---|
269 | fx%action = "Read a [ in SGML declaration" |
---|
270 | else if (c == "]") then |
---|
271 | fx%nbrackets = fx%nbrackets - 1 |
---|
272 | call add_to_buffer("]",fx%buffer) |
---|
273 | fx%action = "Read a ] in SGML declaration" |
---|
274 | else if (c == ">") then |
---|
275 | if (fx%nlts == 0) then |
---|
276 | if (fx%nbrackets == 0) then |
---|
277 | fx%state = END_TAG_MARKER |
---|
278 | signal = END_OF_TAG |
---|
279 | if (fx%debug) fx%action = ("Ending SGML declaration tag") |
---|
280 | fx%pcdata = fx%buffer ! Same behavior as pcdata |
---|
281 | call reset_buffer(fx%buffer) |
---|
282 | else |
---|
283 | fx%state = ERROR |
---|
284 | fx%action = ("Unmatched ] in SGML declaration") |
---|
285 | endif |
---|
286 | else |
---|
287 | fx%nlts = fx%nlts -1 |
---|
288 | call add_to_buffer(">",fx%buffer) |
---|
289 | fx%action = "Read an intermediate > in SGML declaration" |
---|
290 | endif |
---|
291 | else |
---|
292 | if (fx%debug) fx%action = ("Keep reading SGML declaration") |
---|
293 | call add_to_buffer(c,fx%buffer) |
---|
294 | endif |
---|
295 | |
---|
296 | case (BANG_HYPHEN) |
---|
297 | if (c == "-") then |
---|
298 | fx%state = IN_COMMENT |
---|
299 | fx%context = COMMENT_TAG |
---|
300 | if (fx%debug) fx%action = ("In comment ") |
---|
301 | else |
---|
302 | fx%state = ERROR |
---|
303 | fx%action = ("Wrong character after <!- ") |
---|
304 | endif |
---|
305 | |
---|
306 | case (START_XML_DECLARATION) |
---|
307 | if (c .in. initial_name_chars) then |
---|
308 | fx%state = IN_NAME |
---|
309 | call add_to_buffer(c,fx%buffer) |
---|
310 | if (fx%debug) fx%action = ("Starting to read name in XML declaration") |
---|
311 | else |
---|
312 | fx%state = ERROR |
---|
313 | fx%action = "Wrong character after ? in start of XML declaration" |
---|
314 | endif |
---|
315 | |
---|
316 | case (CLOSINGTAG_MARKER) |
---|
317 | if (c == ">") then |
---|
318 | fx%state = ERROR |
---|
319 | fx%action = ("Closing tag empty!") |
---|
320 | else if (c == "<") then |
---|
321 | fx%state = ERROR |
---|
322 | fx%action = ("Double opening of closing tag!!") |
---|
323 | else if (c == "/") then |
---|
324 | fx%state = ERROR |
---|
325 | fx%action = ("Syntax error (<//)") |
---|
326 | else if (c .in. whitespace) then |
---|
327 | fx%state = ERROR |
---|
328 | fx%action = ("Cannot have whitespace after </") |
---|
329 | else if (c .in. initial_name_chars) then |
---|
330 | fx%state = IN_NAME |
---|
331 | if (fx%debug) fx%action = ("Starting to read name inside endtag") |
---|
332 | call add_to_buffer(c,fx%buffer) |
---|
333 | else |
---|
334 | fx%state = ERROR |
---|
335 | fx%action = ("Illegal initial character for name") |
---|
336 | endif |
---|
337 | |
---|
338 | case (IN_NAME) |
---|
339 | if (c == "<") then |
---|
340 | fx%state = ERROR |
---|
341 | fx%action = ("Starting tag within tag") |
---|
342 | else if (c == ">") then |
---|
343 | fx%state = END_TAG_MARKER |
---|
344 | signal = END_OF_TAG |
---|
345 | if (fx%debug) fx%action = ("Ending tag") |
---|
346 | ! call set_element_name(fx%buffer,fx%element) |
---|
347 | fx%element_name = fx%buffer |
---|
348 | call reset_buffer(fx%buffer) |
---|
349 | call reset_dict(fx%attributes) |
---|
350 | else if (c == "/") then |
---|
351 | if (fx%context /= OPENING_TAG) then |
---|
352 | fx%state = ERROR |
---|
353 | fx%action = ("Single tag did not open as start tag") |
---|
354 | else |
---|
355 | fx%state = SINGLETAG_MARKER |
---|
356 | fx%context = SINGLE_TAG |
---|
357 | if (fx%debug) fx%action = ("Almost ending single tag") |
---|
358 | ! call set_element_name(fx%buffer,fx%element) |
---|
359 | fx%element_name = fx%buffer |
---|
360 | call reset_buffer(fx%buffer) |
---|
361 | call reset_dict(fx%attributes) |
---|
362 | endif |
---|
363 | else if (c .in. whitespace) then |
---|
364 | fx%state = WHITESPACE_IN_TAG |
---|
365 | if (fx%debug) fx%action = ("Ending name chars") |
---|
366 | ! call set_element_name(fx%buffer,fx%element) |
---|
367 | fx%element_name = fx%buffer |
---|
368 | call reset_buffer(fx%buffer) |
---|
369 | call reset_dict(fx%attributes) |
---|
370 | else if (c .in. name_chars) then |
---|
371 | if (fx%debug) fx%action = ("Reading name chars in tag") |
---|
372 | call add_to_buffer(c,fx%buffer) |
---|
373 | else |
---|
374 | fx%state = ERROR |
---|
375 | fx%action = ("Illegal character for name") |
---|
376 | endif |
---|
377 | |
---|
378 | case (IN_ATT_NAME) |
---|
379 | if (c == "<") then |
---|
380 | fx%state = ERROR |
---|
381 | fx%action = ("Starting tag within tag") |
---|
382 | else if (c == ">") then |
---|
383 | fx%state = ERROR |
---|
384 | fx%action = ("Ending tag in the middle of an attribute") |
---|
385 | else if (c == "/") then |
---|
386 | fx%state = ERROR |
---|
387 | fx%action = ("Ending tag in the middle of an attribute") |
---|
388 | else if (c .in. whitespace) then |
---|
389 | fx%state = SPACE_BEFORE_EQUAL |
---|
390 | if (fx%debug) fx%action = ("Whitespace after attr. name (specs?)") |
---|
391 | call add_key_to_dict(fx%buffer,fx%attributes) |
---|
392 | call reset_buffer(fx%buffer) |
---|
393 | else if ( c == "=" ) then |
---|
394 | fx%state = EQUAL |
---|
395 | if (fx%debug) fx%action = ("End of attr. name") |
---|
396 | call add_key_to_dict(fx%buffer,fx%attributes) |
---|
397 | call reset_buffer(fx%buffer) |
---|
398 | else if (c .in. name_chars) then |
---|
399 | if (fx%debug) fx%action = ("Reading attribute name chars") |
---|
400 | call add_to_buffer(c,fx%buffer) |
---|
401 | else |
---|
402 | fx%state = ERROR |
---|
403 | fx%action = ("Illegal character for attribute name") |
---|
404 | endif |
---|
405 | |
---|
406 | case (EQUAL) |
---|
407 | if ( (c == """") .or. (c == "'") ) then |
---|
408 | fx%state = START_QUOTE |
---|
409 | if (fx%debug) fx%action = ("Found beginning quote") |
---|
410 | fx%quote_char = c |
---|
411 | else if (c .in. whitespace) then |
---|
412 | fx%state = SPACE_AFTER_EQUAL |
---|
413 | if (fx%debug) fx%action = ("Whitespace after equal sign...") |
---|
414 | else |
---|
415 | fx%state = ERROR |
---|
416 | fx%action = ("Must use quotes for attribute values") |
---|
417 | endif |
---|
418 | |
---|
419 | case (SPACE_BEFORE_EQUAL) |
---|
420 | if ( c == "=" ) then |
---|
421 | fx%state = EQUAL |
---|
422 | if (fx%debug) fx%action = ("Equal sign") |
---|
423 | else if (c .in. whitespace) then |
---|
424 | if (fx%debug) fx%action = ("More whitespace before equal sign...") |
---|
425 | else |
---|
426 | fx%state = ERROR |
---|
427 | fx%action = ("Must use equal sign for attribute values") |
---|
428 | endif |
---|
429 | |
---|
430 | case (SPACE_AFTER_EQUAL) |
---|
431 | if ( c == "=" ) then |
---|
432 | fx%state = ERROR |
---|
433 | fx%action = ("Duplicate Equal sign") |
---|
434 | else if (c .in. whitespace) then |
---|
435 | if (fx%debug) fx%action = ("More whitespace after equal sign...") |
---|
436 | else if ( (c == """") .or. (c == "'") ) then |
---|
437 | fx%state = START_QUOTE |
---|
438 | fx%quote_char = c |
---|
439 | if (fx%debug) fx%action = ("Found beginning quote") |
---|
440 | else |
---|
441 | fx%state = ERROR |
---|
442 | fx%action = ("Must use quotes for attribute values") |
---|
443 | endif |
---|
444 | |
---|
445 | case (START_QUOTE) |
---|
446 | if (c == fx%quote_char) then |
---|
447 | fx%state = END_QUOTE |
---|
448 | if (fx%debug) fx%action = ("Emtpy attribute value...") |
---|
449 | call add_value_to_dict(fx%buffer,fx%attributes) |
---|
450 | call reset_buffer(fx%buffer) |
---|
451 | else if (c == "<") then |
---|
452 | fx%state = ERROR |
---|
453 | fx%action = ("Attribute value cannot contain <") |
---|
454 | else ! actually allowed chars in att values... Specs: No "<" |
---|
455 | fx%state = IN_ATT_VALUE |
---|
456 | if (fx%debug) fx%action = ("Starting to read attribute value") |
---|
457 | call add_to_buffer(c,fx%buffer) |
---|
458 | endif |
---|
459 | |
---|
460 | case (IN_ATT_VALUE) |
---|
461 | if (c == fx%quote_char) then |
---|
462 | fx%state = END_QUOTE |
---|
463 | if (fx%debug) fx%action = ("End of attribute value") |
---|
464 | call add_value_to_dict(fx%buffer,fx%attributes) |
---|
465 | call reset_buffer(fx%buffer) |
---|
466 | else if (c == "<") then |
---|
467 | fx%state = ERROR |
---|
468 | fx%action = ("Attribute value cannot contain <") |
---|
469 | else if ( (c == char(10)) ) then |
---|
470 | fx%state = ERROR |
---|
471 | ! |
---|
472 | ! Aparently other whitespace is allowed... |
---|
473 | ! |
---|
474 | fx%action = ("No newline allowed in attr. value (specs?)") |
---|
475 | else ! all other chars allowed in attr value |
---|
476 | if (fx%debug) fx%action = ("Reading attribute value chars") |
---|
477 | call add_to_buffer(c,fx%buffer) |
---|
478 | endif |
---|
479 | |
---|
480 | case (END_QUOTE) |
---|
481 | if ((c == """") .or. (c == "'")) then |
---|
482 | fx%state = ERROR |
---|
483 | fx%action = ("Duplicate end quote") |
---|
484 | else if (c .in. whitespace) then |
---|
485 | fx%state = WHITESPACE_IN_TAG |
---|
486 | if (fx%debug) fx%action = ("Space in between attributes or to end of tag") |
---|
487 | else if (c == "<") then |
---|
488 | fx%state = ERROR |
---|
489 | fx%action = ("Starting tag within tag") |
---|
490 | else if (c == ">") then |
---|
491 | if (fx%context == XML_DECLARATION_TAG) then |
---|
492 | fx%state = ERROR |
---|
493 | fx%action = "End of XML declaration without ?" |
---|
494 | else |
---|
495 | fx%state = END_TAG_MARKER |
---|
496 | signal = END_OF_TAG |
---|
497 | if (fx%debug) fx%action = ("Ending tag after some attributes") |
---|
498 | endif |
---|
499 | else if (c == "/") then |
---|
500 | if (fx%context /= OPENING_TAG) then |
---|
501 | fx%state = ERROR |
---|
502 | fx%action = ("Single tag did not open as start tag") |
---|
503 | else |
---|
504 | fx%state = SINGLETAG_MARKER |
---|
505 | fx%context = SINGLE_TAG |
---|
506 | if (fx%debug) fx%action = ("Almost ending single tag after some attributes") |
---|
507 | endif |
---|
508 | else if (c == "?") then |
---|
509 | if (fx%context /= XML_DECLARATION_TAG) then |
---|
510 | fx%state = ERROR |
---|
511 | fx%action = "Wrong lone ? in tag" |
---|
512 | else |
---|
513 | fx%state = QUESTION_MARK |
---|
514 | if (fx%debug) fx%action = ("About to end XML declaration") |
---|
515 | endif |
---|
516 | else |
---|
517 | fx%state = ERROR |
---|
518 | fx%action = ("Must have some whitespace after att. value") |
---|
519 | endif |
---|
520 | |
---|
521 | |
---|
522 | case (WHITESPACE_IN_TAG) |
---|
523 | if ( c .in. whitespace) then |
---|
524 | if (fx%debug) fx%action = ("Reading whitespace in tag") |
---|
525 | else if (c == "<") then |
---|
526 | fx%state = ERROR |
---|
527 | fx%action = ("Starting tag within tag") |
---|
528 | else if (c == ">") then |
---|
529 | if (fx%context == XML_DECLARATION_TAG) then |
---|
530 | fx%state = ERROR |
---|
531 | fx%action = "End of XML declaration without ?" |
---|
532 | else |
---|
533 | fx%state = END_TAG_MARKER |
---|
534 | signal = END_OF_TAG |
---|
535 | if (fx%debug) fx%action = ("End whitespace in tag") |
---|
536 | endif |
---|
537 | else if (c == "/") then |
---|
538 | if (fx%context /= OPENING_TAG) then |
---|
539 | fx%state = ERROR |
---|
540 | fx%action = ("Single tag did not open as start tag") |
---|
541 | else |
---|
542 | fx%state = SINGLETAG_MARKER |
---|
543 | fx%context = SINGLE_TAG |
---|
544 | if (fx%debug) fx%action = ("End whitespace in single tag") |
---|
545 | endif |
---|
546 | else if (c .in. initial_name_chars) then |
---|
547 | fx%state = IN_ATT_NAME |
---|
548 | if (fx%debug) fx%action = ("Starting Attribute name in tag") |
---|
549 | call add_to_buffer(c,fx%buffer) |
---|
550 | else if (c == "?") then |
---|
551 | if (fx%context /= XML_DECLARATION_TAG) then |
---|
552 | fx%state = ERROR |
---|
553 | fx%action = "Wrong lone ? in tag" |
---|
554 | else |
---|
555 | fx%state = QUESTION_MARK |
---|
556 | if (fx%debug) fx%action = ("About to end XML declaration after whitespace") |
---|
557 | endif |
---|
558 | else |
---|
559 | fx%state = ERROR |
---|
560 | fx%action = ("Illegal initial character for attribute") |
---|
561 | endif |
---|
562 | |
---|
563 | case (QUESTION_MARK) |
---|
564 | if (c == ">") then |
---|
565 | fx%state = END_TAG_MARKER |
---|
566 | signal = END_OF_TAG |
---|
567 | if (fx%debug) fx%action = ("End of XML declaration tag") |
---|
568 | else |
---|
569 | fx%state = ERROR |
---|
570 | fx%action = "No > after ? in XML declaration tag" |
---|
571 | endif |
---|
572 | |
---|
573 | case (IN_COMMENT) |
---|
574 | ! |
---|
575 | ! End of comment is "-->", and ">" can appear inside comments |
---|
576 | ! |
---|
577 | if (c == "-") then |
---|
578 | fx%state = ONE_HYPHEN |
---|
579 | if (fx%debug) fx%action = ("Saw - in Comment") |
---|
580 | else |
---|
581 | if (fx%debug) fx%action = ("Reading comment") |
---|
582 | call add_to_buffer(c,fx%buffer) |
---|
583 | endif |
---|
584 | |
---|
585 | case (ONE_HYPHEN) |
---|
586 | if (c == "-") then |
---|
587 | fx%state = TWO_HYPHEN |
---|
588 | if (fx%debug) fx%action = ("About to end comment") |
---|
589 | else |
---|
590 | fx%state = IN_COMMENT |
---|
591 | if (fx%debug) fx%action = ("Keep reading comment after -: ") |
---|
592 | call add_to_buffer("-",fx%buffer) |
---|
593 | call add_to_buffer(c,fx%buffer) |
---|
594 | endif |
---|
595 | |
---|
596 | case (TWO_HYPHEN) |
---|
597 | if (c == ">") then |
---|
598 | fx%state = END_TAG_MARKER |
---|
599 | signal = END_OF_TAG |
---|
600 | if (fx%debug) fx%action = ("End of Comment") |
---|
601 | fx%pcdata = fx%buffer ! Same behavior as pcdata |
---|
602 | call reset_buffer(fx%buffer) |
---|
603 | else |
---|
604 | fx%state = ERROR |
---|
605 | fx%action = ("Cannot have -- in comment") |
---|
606 | endif |
---|
607 | |
---|
608 | case (SINGLETAG_MARKER) |
---|
609 | |
---|
610 | if (c == ">") then |
---|
611 | fx%state = END_TAG_MARKER |
---|
612 | signal = END_OF_TAG |
---|
613 | if (fx%debug) fx%action = ("Ending tag") |
---|
614 | ! We have to call begin_element AND end_element |
---|
615 | else |
---|
616 | fx%state = ERROR |
---|
617 | fx%action = ("Wrong ending of single tag") |
---|
618 | endif |
---|
619 | |
---|
620 | case (IN_PCDATA) |
---|
621 | if (c == "<") then |
---|
622 | fx%state = START_TAG_MARKER |
---|
623 | signal = CHUNK_OF_PCDATA |
---|
624 | if (fx%debug) fx%action = ("End of pcdata -- Starting tag") |
---|
625 | fx%pcdata = fx%buffer |
---|
626 | call reset_buffer(fx%buffer) |
---|
627 | else if (c == ">") then |
---|
628 | fx%state = ERROR |
---|
629 | fx%action = ("Ending tag without starting it!") |
---|
630 | else if (c == char(10)) then |
---|
631 | fx%state = IN_PCDATA_AT_EOL |
---|
632 | signal = CHUNK_OF_PCDATA |
---|
633 | if (fx%debug) fx%action = ("Resetting PCDATA buffer at newline") |
---|
634 | call add_to_buffer(c,fx%buffer) |
---|
635 | fx%pcdata = fx%buffer |
---|
636 | call reset_buffer(fx%buffer) |
---|
637 | else |
---|
638 | call add_to_buffer(c,fx%buffer) |
---|
639 | if (fx%debug) fx%action = ("Reading chars outside tags") |
---|
640 | ! |
---|
641 | ! Check whether we are close to the end of the buffer. |
---|
642 | ! If so, make a chunk and reset the buffer |
---|
643 | if (c .in. whitespace) then |
---|
644 | if (buffer_nearly_full(fx%buffer)) then |
---|
645 | signal = CHUNK_OF_PCDATA |
---|
646 | if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer") |
---|
647 | fx%pcdata = fx%buffer |
---|
648 | call reset_buffer(fx%buffer) |
---|
649 | endif |
---|
650 | endif |
---|
651 | endif |
---|
652 | |
---|
653 | case (IN_PCDATA_AT_EOL) |
---|
654 | ! |
---|
655 | ! Avoid triggering an extra pcdata event |
---|
656 | ! |
---|
657 | if (c == "<") then |
---|
658 | fx%state = START_TAG_MARKER |
---|
659 | if (fx%debug) fx%action = ("No more pcdata after eol-- Starting tag") |
---|
660 | else if (c == ">") then |
---|
661 | fx%state = ERROR |
---|
662 | fx%action = ("Ending tag without starting it!") |
---|
663 | else if (c == char(10)) then |
---|
664 | fx%state = IN_PCDATA_AT_EOL |
---|
665 | signal = CHUNK_OF_PCDATA |
---|
666 | if (fx%debug) fx%action = ("Resetting PCDATA buffer at repeated newline") |
---|
667 | call add_to_buffer(c,fx%buffer) |
---|
668 | fx%pcdata = fx%buffer |
---|
669 | call reset_buffer(fx%buffer) |
---|
670 | else |
---|
671 | fx%state = IN_PCDATA |
---|
672 | call add_to_buffer(c,fx%buffer) |
---|
673 | if (fx%debug) fx%action = ("Reading chars outside tags") |
---|
674 | ! |
---|
675 | ! Check whether we are close to the end of the buffer. |
---|
676 | ! If so, make a chunk and reset the buffer |
---|
677 | if (c .in. whitespace) then |
---|
678 | if (buffer_nearly_full(fx%buffer)) then |
---|
679 | signal = CHUNK_OF_PCDATA |
---|
680 | if (fx%debug) fx%action = ("Resetting almost full PCDATA buffer") |
---|
681 | fx%pcdata = fx%buffer |
---|
682 | call reset_buffer(fx%buffer) |
---|
683 | endif |
---|
684 | endif |
---|
685 | endif |
---|
686 | |
---|
687 | |
---|
688 | |
---|
689 | case (END_TAG_MARKER) |
---|
690 | ! |
---|
691 | if (c == "<") then |
---|
692 | fx%state = START_TAG_MARKER |
---|
693 | if (fx%debug) fx%action = ("Starting tag") |
---|
694 | else if (c == ">") then |
---|
695 | fx%state = ERROR |
---|
696 | fx%action = ("Double ending of tag!") |
---|
697 | ! |
---|
698 | ! We should make this whitespace in general (maybe not? |
---|
699 | ! how about indentation in text chunks?) |
---|
700 | ! See specs. |
---|
701 | ! |
---|
702 | else if (c == char(10)) then |
---|
703 | ! Ignoring LF after end of tag is probably non standard... |
---|
704 | |
---|
705 | if (fx%debug) & |
---|
706 | fx%action = ("---------Discarding newline after end of tag") |
---|
707 | |
---|
708 | !!! New code for full compliance |
---|
709 | ! fx%state = IN_PCDATA_AT_EOL |
---|
710 | ! call add_to_buffer(c,fx%buffer) |
---|
711 | ! if (fx%debug) & |
---|
712 | ! fx%action = ("Found LF after end of tag. Emitting PCDATA event") |
---|
713 | ! signal = CHUNK_OF_PCDATA |
---|
714 | ! fx%pcdata = fx%buffer |
---|
715 | ! call reset_buffer(fx%buffer) |
---|
716 | else |
---|
717 | fx%state = IN_PCDATA |
---|
718 | call add_to_buffer(c,fx%buffer) |
---|
719 | if (fx%debug) fx%action = ("End of Tag. Starting to read PCDATA") |
---|
720 | endif |
---|
721 | |
---|
722 | case (ERROR) |
---|
723 | |
---|
724 | stop "Cannot continue after parsing errors!" |
---|
725 | |
---|
726 | end select |
---|
727 | |
---|
728 | if (fx%state == ERROR) signal = EXCEPTION |
---|
729 | |
---|
730 | end subroutine evolve_fsm |
---|
731 | |
---|
732 | end module m_fsm |
---|
733 | |
---|
734 | |
---|
735 | |
---|
736 | |
---|
737 | |
---|
738 | |
---|
739 | |
---|
740 | |
---|
741 | |
---|
742 | |
---|
743 | |
---|
744 | |
---|
745 | |
---|