Complete a program that is designed to read an XMl file, follow an XPath to a selected node, and then list all of the text content (ignoring attributes) in the subtree rooted at the selected node, in the order that the text appeared within the original XML file.
You will be provided with the bulk of the code for this program, including the input processing to read XML and convert it into a tree structure (declared in node.h).
Your task is to supply the functions declared in extraction.h:
Your bodies for these functions should be written in extraction.cpp.
To run the application program, supply two command line parameters. The first will designate an XML file and the second will be the XPath to the desired node.
Example 1
./xmlextract test0.html /html/body
will print
Hello world!
Example 2
./xmlextract test1.html /html/body/p[2]
will print
world!
Example 3
./xmlextract books1.xml /rdf:RDF/pgterms:etext/dc:creator
will print
Twain, Mark, 1835-1910
Example 4
./xmlextract books1.xml /rdf:RDF/pgterms:etext[3]
will print
&pg; A History of the Early Part of the Reign of James the Second Fox, Charles
James, 1749-1806 Morley, Henry, 1822-1894 [Editor] A History of the Early Part
of the Reign of James en Great Britain — History — James II, 1685-1688 DA
2003-07-01 17
Please only submit new extraction.cpp file with the bodies fot the two missing functions!
FlexLexer.h
// -*-C++-*-
// FlexLexer.h — define interfaces for lexical analyzer classes generated
// by flex
// Copyright (c) 1993 The Regents of the University of California.
// All rights reserved.
//
// This code is derived from software contributed to Berkeley by
// Kent Williams and Tom Epperly.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the University nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED “AS IS” AND WITHOUT ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE.
// This file defines FlexLexer, an abstract class which specifies the
// external interface provided to flex C++ lexer objects, and yyFlexLexer,
// which defines a particular lexer class.
//
// If you want to create multiple lexer classes, you use the -P flag
// to rename each yyFlexLexer to some other xxFlexLexer. You then
// include
//
// #undef yyFlexLexer
// #define yyFlexLexer xxFlexLexer
// #include
//
// #undef yyFlexLexer
// #define yyFlexLexer zzFlexLexer
// #include
// …
#ifndef __FLEX_LEXER_H
// Never included before – need to define base class.
#define __FLEX_LEXER_H
#include
extern “C++” {
struct yy_buffer_state;
typedef int yy_state_type;
class FlexLexer
{
public:
virtual ~FlexLexer() { }
const char* YYText() const { return yytext; }
int YYLeng() const { return yyleng; }
virtual void
yy_switch_to_buffer( yy_buffer_state* new_buffer ) = 0;
virtual yy_buffer_state* yy_create_buffer( std::istream* s, int size ) = 0;
virtual yy_buffer_state* yy_create_buffer( std::istream& s, int size ) = 0;
virtual void yy_delete_buffer( yy_buffer_state* b ) = 0;
virtual void yyrestart( std::istream* s ) = 0;
virtual void yyrestart( std::istream& s ) = 0;
virtual int yylex() = 0;
// Call yylex with new input/output sources.
int yylex( std::istream& new_in, std::ostream& new_out )
{
switch_streams( new_in, new_out );
return yylex();
}
int yylex( std::istream* new_in, std::ostream* new_out = 0)
{
switch_streams( new_in, new_out );
return yylex();
}
// Switch to new input/output streams. A nil stream pointer
// indicates “keep the current one”.
virtual void switch_streams( std::istream* new_in,
std::ostream* new_out ) = 0;
virtual void switch_streams( std::istream& new_in,
std::ostream& new_out ) = 0;
int lineno() const { return yylineno; }
int debug() const { return yy_flex_debug; }
void set_debug( int flag ) { yy_flex_debug = flag; }
protected:
char* yytext;
int yyleng;
int yylineno; // only maintained if you use %option yylineno
int yy_flex_debug; // only has effect with -d or “%option debug”
};
}
#endif // FLEXLEXER_H
#if defined(yyFlexLexer) || ! defined(yyFlexLexerOnce)
// Either this is the first time through (yyFlexLexerOnce not defined),
// or this is a repeated include to define a different flavor of
// yyFlexLexer, as discussed in the flex manual.
# define yyFlexLexerOnce
extern “C++” {
class yyFlexLexer : public FlexLexer {
public:
// arg_yyin and arg_yyout default to the cin and cout, but we
// only make that assignment when initializing in yylex().
yyFlexLexer( std::istream& arg_yyin, std::ostream& arg_yyout );
yyFlexLexer( std::istream* arg_yyin = 0, std::ostream* arg_yyout = 0 );
private:
void ctor_common();
public:
virtual ~yyFlexLexer();
void yy_switch_to_buffer( yy_buffer_state* new_buffer );
yy_buffer_state* yy_create_buffer( std::istream* s, int size );
yy_buffer_state* yy_create_buffer( std::istream& s, int size );
void yy_delete_buffer( yy_buffer_state* b );
void yyrestart( std::istream* s );
void yyrestart( std::istream& s );
void yypush_buffer_state( yy_buffer_state* new_buffer );
void yypop_buffer_state();
virtual int yylex();
virtual void switch_streams( std::istream& new_in, std::ostream& new_out );
virtual void switch_streams( std::istream* new_in = 0, std::ostream* new_out = 0 );
virtual int yywrap();
protected:
virtual int LexerInput( char* buf, int max_size );
virtual void LexerOutput( const char* buf, int size );
virtual void LexerError( const char* msg );
void yyunput( int c, char* buf_ptr );
int yyinput();
void yy_load_buffer_state();
void yy_init_buffer( yy_buffer_state* b, std::istream& s );
void yy_flush_buffer( yy_buffer_state* b );
int yy_start_stack_ptr;
int yy_start_stack_depth;
int* yy_start_stack;
void yy_push_state( int new_state );
void yy_pop_state();
int yy_top_state();
yy_state_type yy_get_previous_state();
yy_state_type yy_try_NUL_trans( yy_state_type current_state );
int yy_get_next_buffer();
std::istream yyin; // input source for default LexerInput
std::ostream yyout; // output sink for default LexerOutput
// yy_hold_char holds the character lost when yytext is formed.
char yy_hold_char;
// Number of characters read into yy_ch_buf.
int yy_n_chars;
// Points to current character in buffer.
char* yy_c_buf_p;
int yy_init; // whether we need to initialize
int yy_start; // start state number
// Flag which is used to allow yywrap()’s to do buffer switches
// instead of setting up a fresh yyin. A bit of a hack …
int yy_did_buffer_switch_on_eof;
size_t yy_buffer_stack_top; /**< index of top of stack. */
size_t yy_buffer_stack_max; /**< capacity of stack. */
yy_buffer_state ** yy_buffer_stack; /**< Stack as an array. */
void yyensure_buffer_stack(void);
// The following are not always needed, but may be depending
// on use of certain flex features (like REJECT or yymore()).
yy_state_type yy_last_accepting_state;
char* yy_last_accepting_cpos;
yy_state_type* yy_state_buf;
yy_state_type* yy_state_ptr;
char* yy_full_match;
int* yy_full_state;
int yy_full_lp;
int yy_lp;
int yy_looking_for_trail_begin;
int yy_more_flag;
int yy_more_len;
int yy_more_offset;
int yy_prev_more_offset;
};
}
#endif // yyFlexLexer || ! yyFlexLexerOnce
bin/Linux/xmlextract
books1.xml
2014-04-15
Project Gutenberg
Roughing It
Twain, Mark, 1835-1910
Roughing It by Mark Twain
en
Authors, American -- Homes and haunts -- West (U.S.)
Twain, Mark, 1835-1910 -- Travel -- West (U.S.)
West (U.S.) -- Intellectual life -- 19th century
West (U.S.) -- Description and travel
PS
E660
2004-07-03
3926
Project Gutenberg
Queen Victoria, her girlhood and womanhood
Greenwood, Grace, 1823-1904
Queen Victoria, her girlhood and womanhood
en
Victoria, Queen of Great Britain, 1819-1901
DA
2004-09-01
23
Project Gutenberg
A History of the Early Part of the Reign of James the Second
Fox, Charles James, 1749-1806
Morley, Henry, 1822-1894 [Editor]
A History of the Early Part of the Reign of James
en
Great Britain -- History -- James II, 1685-1688
DA
2003-07-01
17
Project Gutenberg
A History of the Four Georges, Volume I
McCarthy, Justin, 1830-1912
A History of the Four Georges, Volume I
en
Great Britain -- Politics and government -- 1714-1837
Great Britain -- History -- 1714-1837
DA
2007-11-13
3
extraction.cpp
#include "extraction.h"
#include
#include
#include
using namespace std;
/**
* Examine an xpath step of the form “/tagName[k]” and pull out the tagname
* and index. The index part may be omitted, in which case it is assumed to
* be 1.
*
* @param xpathStep the string containing one step in an xpath.
* @param tagName the tag name that must be matched in the step (output)
* @param index the index of the desired child with that tagName (output)
*/
void interpretXPathStep(string xpathStep, string &tagName, unsigned &index)
{
index = 1;
tagName = xpathStep;
if (tagName.size() > 0 && tagName[0] == ‘/’)
tagName = tagName.substr(1); // discard the ‘/’
string::size_type indexStart = xpathStep.find(‘[‘);
if (indexStart != string::npos)
{
string::size_type indexStop = xpathStep.find(‘]’);
index = stoi(xpathStep.substr(indexStart+1, indexStop – indexStart – 1));
tagName = tagName.substr(0, indexStart);
}
}
/**
* Find a node in an XML tree usign a subset of XPATH:
* /tag1[k1]/tag2[k2]/…/tagn[kn]
* Each tagi is an XML tag name. The [ki] give an integer index indicating
* which child with the given tag name should be selected. The “[ki]” portion
* may be omitted when ki==1.
*
* @param root the root of the tree from which the selection should be made
* @param xpath the path to follow in selectign the desired node.
* @return the desired node from within the tree, or nullptr if no node matching
* the given path can be found.
*/
Node *selectByPath(Node *root, std::string xpath)
{
//*** To be implemented
return nullptr;
}
/**
* Given an XML (sub)tree, extract and concatenate the text leaves from
* that tree in the order they would be encountered in an XML listing,
* separating text from different leaf nodes by one or more blanks.
*
* @param tree the root of the tree from which the text is to be extracted.
*/
std::string extractText(const Node *tree)
{
//*** To be implemented
return “”;
}
extraction.h
#ifndef EXTRACTION_H
#define EXTRACTION_H
#include
#include “node.h”
/**
* Find a node in an XML tree usign a subset of XPATH:
* /tag1[k1]/tag2[k2]/…/tagn[kn]
* Each tagi is an XML tag name. The [ki] give an integer index indicating
* which child with the given tag name should be selected. The “[ki]” portion
* may be omitted when ki==1.
*
* @param root the root of the tree from which the selection should be made
* @param xpath the path to follow in selectign the desired node.
* @return the desired node from within the tree, or nullptr if no node matching
* the given path can be found.
*/
Node* selectByPath (Node* root, std::string xpath);
/**
* Given an XML (sub)tree, extract and concatenate the text leaves from
* that tree in the order they would be encountered in an XML listing,
* separating text from different leaf nodes by one or more blanks.
*
* @param tree the root of the tree from which the text is to be extracted.
*/
std::string extractText(const Node* tree);
#endif
lex.yy.cpp
lex.yy.cpp
#line
2
“lex.yy.cpp”
#line
4
“lex.yy.cpp”
#define
YY_INT_ALIGNED
short
int
/* A lexical scanner generated by flex */
#define
FLEX_SCANNER
#define
YY_FLEX_MAJOR_VERSION
2
#define
YY_FLEX_MINOR_VERSION
6
#define
YY_FLEX_SUBMINOR_VERSION
4
#if
YY_FLEX_SUBMINOR_VERSION
>
0
#define
FLEX_BETA
#endif
/* The c++ scanner is a mess. The FlexLexer.h header file relies on the
* following macro. This is required in order to pass the c++-multiple-scanners
* test in the regression suite. We get reports that it breaks inheritance.
* We will address this in a future release of flex, or omit the C++ scanner
* altogether.
*/
#define
yyFlexLexer yyFlexLexer
/* First, we deal with platform-specific or compiler-specific issues. */
/* begin standard C headers. */
/* end standard C headers. */
/* flex integer type definitions */
#ifndef
FLEXINT_H
#define
FLEXINT_H
/* C99 systems have
#if
defined
(
__STDC_VERSION__
)
&&
__STDC_VERSION__
>=
199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
* if you want the limit (max/min) macros for int types.
*/
#ifndef
__STDC_LIMIT_MACROS
#define
__STDC_LIMIT_MACROS
1
#endif
#include
<
inttypes
.
h
>
typedef
int8_t flex_int8_t
;
typedef
uint8_t flex_uint8_t
;
typedef
int16_t flex_int16_t
;
typedef
uint16_t flex_uint16_t
;
typedef
int32_t flex_int32_t
;
typedef
uint32_t flex_uint32_t
;
#else
typedef
signed
char
flex_int8_t
;
typedef
short
int
flex_int16_t
;
typedef
int
flex_int32_t
;
typedef
unsigned
char
flex_uint8_t
;
typedef
unsigned
short
int
flex_uint16_t
;
typedef
unsigned
int
flex_uint32_t
;
/* Limits of integral types. */
#ifndef
INT8_MIN
#define
INT8_MIN
(
–
128
)
#endif
#ifndef
INT16_MIN
#define
INT16_MIN
(
–
32767
–
1
)
#endif
#ifndef
INT32_MIN
#define
INT32_MIN
(
–
2147483647
–
1
)
#endif
#ifndef
INT8_MAX
#define
INT8_MAX
(
127
)
#endif
#ifndef
INT16_MAX
#define
INT16_MAX
(
32767
)
#endif
#ifndef
INT32_MAX
#define
INT32_MAX
(
2147483647
)
#endif
#ifndef
UINT8_MAX
#define
UINT8_MAX
(
255U
)
#endif
#ifndef
UINT16_MAX
#define
UINT16_MAX
(
65535U
)
#endif
#ifndef
UINT32_MAX
#define
UINT32_MAX
(
4294967295U
)
#endif
#ifndef
SIZE_MAX
#define
SIZE_MAX
(
~
(
size_t
)
0
)
#endif
#endif
/* ! C99 */
#endif
/* ! FLEXINT_H */
/* begin standard C++ headers. */
#include
<
iostream
>
#include
<
errno
.
h
>
#include
<
cstdlib
>
#include
<
cstdio
>
#include
<
cstring
>
/* end standard C++ headers. */
/* TODO: this is always defined, so inline it */
#define
yyconst
const
#if
defined
(
__GNUC__
)
&&
__GNUC__
>=
3
#define
yynoreturn __attribute__
((
__noreturn__
))
#else
#define
yynoreturn
#endif
/* Returned upon end-of-file. */
#define
YY_NULL
0
/* Promotes a possibly negative, possibly signed char to an
* integer in range [0..255] for use as an array index.
*/
#define
YY_SC_TO_UI
(
c
)
((
YY_CHAR
)
(
c
))
/* Enter a start condition. This macro really ought to take a parameter,
* but we do it the disgusting crufty way forced on us by the ()-less
* definition of BEGIN.
*/
#define
BEGIN
(
yy_start
)
=
1
+
2
*
/* Translate the current start state into a value that can be later handed
* to BEGIN to return to the state. The YYSTATE alias is for lex
* compatibility.
*/
#define
YY_START
(((
yy_start
)
–
1
)
/
2
)
#define
YYSTATE YY_START
/* Action number for EOF rule of a given start state. */
#define
YY_STATE_EOF
(
state
)
(
YY_END_OF_BUFFER
+
state
+
1
)
/* Special action meaning “start processing a new file”. */
#define
YY_NEW_FILE yyrestart
(
yyin
)
#define
YY_END_OF_BUFFER_CHAR
0
/* Size of default input buffer. */
#ifndef
YY_BUF_SIZE
#ifdef
__ia64__
/* On IA-64, the buffer size is 16k, not 8k.
* Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
* Ditto for the __ia64__ case accordingly.
*/
#define
YY_BUF_SIZE
32768
#else
#define
YY_BUF_SIZE
16384
#endif
/* __ia64__ */
#endif
/* The state buf must be large enough to hold one state per character in the main buffer.
*/
#define
YY_STATE_BUF_SIZE
((
YY_BUF_SIZE
+
2
)
*
sizeof
(
yy_state_type
))
#ifndef
YY_TYPEDEF_YY_BUFFER_STATE
#define
YY_TYPEDEF_YY_BUFFER_STATE
typedef
struct
yy_buffer_state
*
YY_BUFFER_STATE
;
#endif
#ifndef
YY_TYPEDEF_YY_SIZE_T
#define
YY_TYPEDEF_YY_SIZE_T
typedef
size_t yy_size_t
;
#endif
extern
int
yyleng
;
#define
EOB_ACT_CONTINUE_SCAN
0
#define
EOB_ACT_END_OF_FILE
1
#define
EOB_ACT_LAST_MATCH
2
#define
YY_LESS_LINENO
(
n
)
#define
YY_LINENO_REWIND_TO
(
ptr
)
/* Return all but the first “n” matched characters back to the input stream. */
#define
yyless
(
n
)
\
do
\
{
\
/* Undo effects of setting up yytext. */
\
int
yyless_macro_arg
=
(
n
);
\
YY_LESS_LINENO
(
yyless_macro_arg
);
\
*
yy_cp
=
(
yy_hold_char
);
\
YY_RESTORE_YY_MORE_OFFSET \
(
yy_c_buf_p
)
=
yy_cp
=
yy_bp
+
yyless_macro_arg
–
YY_MORE_ADJ
;
\
YY_DO_BEFORE_ACTION
;
/* set up yytext again */
\
}
\
while
(
0
)
#define
unput
(
c
)
yyunput
(
c
,
(
yytext_ptr
)
)
#ifndef
YY_STRUCT_YY_BUFFER_STATE
#define
YY_STRUCT_YY_BUFFER_STATE
struct
yy_buffer_state
{
std
::
streambuf
*
yy_input_file
;
char
*
yy_ch_buf
;
/* input buffer */
char
*
yy_buf_pos
;
/* current position in input buffer */
/* Size of input buffer in bytes, not including room for EOB
* characters.
*/
int
yy_buf_size
;
/* Number of characters read into yy_ch_buf, not including EOB
* characters.
*/
int
yy_n_chars
;
/* Whether we “own” the buffer – i.e., we know we created it,
* and can realloc() it to grow it, and should free() it to
* delete it.
*/
int
yy_is_our_buffer
;
/* Whether this is an “interactive” input source; if so, and
* if we’re using stdio for input, then we want to use getc()
* instead of fread(), to make sure we stop fetching input after
* each newline.
*/
int
yy_is_interactive
;
/* Whether we’re considered to be at the beginning of a line.
* If so, ‘^’ rules will be active on the next match, otherwise
* not.
*/
int
yy_at_bol
;
int
yy_bs_lineno
;
/**< The line count. */
int
yy_bs_column
;
/**< The column count. */
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
int
yy_fill_buffer
;
int
yy_buffer_status
;
#define
YY_BUFFER_NEW
0
#define
YY_BUFFER_NORMAL
1
/* When an EOF's been seen but there's still some text to process
* then we mark the buffer as YY_EOF_PENDING, to indicate that we
* shouldn't try reading from the input source any more. We might
* still have a bunch of tokens to match, though, because of
* possible backing-up.
*
* When we actually see the EOF, we change the status to "new"
* (via yyrestart()), so that the user can continue scanning by
* just pointing yyin at a new input file.
*/
#define
YY_BUFFER_EOF_PENDING
2
};
#endif
/* !YY_STRUCT_YY_BUFFER_STATE */
/* We provide macros for accessing buffer states in case in the
* future we want to put the buffer states in a more general
* "scanner state".
*
* Returns the top of the stack, or NULL.
*/
#define
YY_CURRENT_BUFFER
(
(
yy_buffer_stack
)
\
?
(
yy_buffer_stack
)[(
yy_buffer_stack_top
)]
\
:
NULL
)
/* Same as previous macro, but useful when we know that the buffer stack is not
* NULL or when we need an lvalue. For internal use only.
*/
#define
YY_CURRENT_BUFFER_LVALUE
(
yy_buffer_stack
)[(
yy_buffer_stack_top
)]
void
*
yyalloc
(
yy_size_t
);
void
*
yyrealloc
(
void
*
,
yy_size_t
);
void
yyfree
(
void
*
);
#define
yy_new_buffer yy_create_buffer
#define
yy_set_interactive
(
is_interactive
)
\
{
\
if
(
!
YY_CURRENT_BUFFER
){
\
yyensure_buffer_stack
();
\
YY_CURRENT_BUFFER_LVALUE
=
\
yy_create_buffer
(
yyin
,
YY_BUF_SIZE
);
\
}
\
YY_CURRENT_BUFFER_LVALUE
->
yy_is_interactive
=
is_interactive
;
\
}
#define
yy_set_bol
(
at_bol
)
\
{
\
if
(
!
YY_CURRENT_BUFFER
){
\
yyensure_buffer_stack
();
\
YY_CURRENT_BUFFER_LVALUE
=
\
yy_create_buffer
(
yyin
,
YY_BUF_SIZE
);
\
}
\
YY_CURRENT_BUFFER_LVALUE
->
yy_at_bol
=
at_bol
;
\
}
#define
YY_AT_BOL
()
(
YY_CURRENT_BUFFER_LVALUE
->
yy_at_bol
)
/* Begin user sect3 */
#define
YY_SKIP_YYWRAP
typedef
flex_uint8_t YY_CHAR
;
#define
yytext_ptr yytext
#define
YY_INTERACTIVE
#include
“FlexLexer.h”
int
yyFlexLexer
::
yywrap
()
{
return
1
;
}
/* Done after the current pattern has been matched and before the
* corresponding action – sets up yytext.
*/
#define
YY_DO_BEFORE_ACTION \
(
yytext_ptr
)
=
yy_bp
;
\
yyleng
=
(
int
)
(
yy_cp
–
yy_bp
);
\
(
yy_hold_char
)
=
*
yy_cp
;
\
*
yy_cp
=
‘\0’
;
\
(
yy_c_buf_p
)
=
yy_cp
;
#define
YY_NUM_RULES
10
#define
YY_END_OF_BUFFER
11
/* This struct is not used in this scanner,
but its presence is necessary. */
struct
yy_trans_info
{
flex_int32_t yy_verify
;
flex_int32_t yy_nxt
;
};
static
const
flex_int16_t yy_accept
[
34
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
11
,
6
,
1
,
1
,
9
,
9
,
7
,
10
,
7
,
6
,
6
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
3
,
8
,
2
,
0
,
0
,
5
,
0
}
;
static
const
YY_CHAR yy_ec
[
256
]
=
{
0
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
3
,
1
,
1
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
4
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
5
,
1
,
6
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
1
,
8
,
1
,
9
,
1
,
1
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
1
,
1
,
1
,
1
,
1
,
1
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
}
;
static
const
YY_CHAR yy_meta
[
11
]
=
{
0
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
3
,
3
,
2
}
;
static
const
flex_int16_t yy_base
[
41
]
=
{
0
,
0
,
8
,
15
,
18
,
0
,
0
,
29
,
22
,
24
,
0
,
24
,
70
,
70
,
70
,
17
,
33
,
0
,
0
,
14
,
0
,
30
,
6
,
9
,
34
,
4
,
38
,
70
,
70
,
70
,
3
,
42
,
70
,
70
,
52
,
55
,
58
,
61
,
2
,
63
,
66
}
;
static
const
flex_int16_t yy_def
[
41
]
=
{
0
,
34
,
34
,
35
,
35
,
36
,
36
,
33
,
37
,
37
,
9
,
33
,
33
,
33
,
33
,
33
,
9
,
16
,
9
,
33
,
38
,
39
,
33
,
33
,
40
,
39
,
39
,
33
,
33
,
33
,
40
,
40
,
33
,
0
,
33
,
33
,
33
,
33
,
33
,
33
,
33
}
;
static
const
flex_int16_t yy_nxt
[
81
]
=
{
0
,
33
,
9
,
10
,
24
,
33
,
33
,
33
,
11
,
12
,
9
,
10
,
32
,
27
,
29
,
28
,
11
,
12
,
14
,
23
,
15
,
14
,
22
,
15
,
17
,
17
,
18
,
18
,
19
,
33
,
20
,
33
,
33
,
33
,
21
,
17
,
17
,
26
,
33
,
27
,
26
,
31
,
33
,
32
,
31
,
26
,
33
,
27
,
26
,
31
,
33
,
32
,
31
,
8
,
8
,
8
,
13
,
13
,
13
,
14
,
14
,
14
,
16
,
16
,
25
,
25
,
25
,
30
,
30
,
30
,
7
,
33
,
33
,
33
,
33
,
33
,
33
,
33
,
33
,
33
,
33
}
;
static
const
flex_int16_t yy_chk
[
81
]
=
{
0
,
0
,
1
,
1
,
38
,
0
,
0
,
0
,
1
,
1
,
2
,
2
,
30
,
25
,
23
,
22
,
2
,
2
,
3
,
19
,
3
,
4
,
15
,
4
,
8
,
8
,
9
,
9
,
11
,
7
,
11
,
0
,
0
,
0
,
11
,
16
,
16
,
21
,
0
,
21
,
21
,
24
,
0
,
24
,
24
,
26
,
0
,
26
,
26
,
31
,
0
,
31
,
31
,
34
,
34
,
34
,
35
,
35
,
35
,
36
,
36
,
36
,
37
,
37
,
39
,
39
,
39
,
40
,
40
,
40
,
33
,
33
,
33
,
33
,
33
,
33
,
33
,
33
,
33
,
33
,
33
}
;
/* The intent behind this definition is that it’ll catch
* any uses of REJECT which flex missed.
*/
#define
REJECT reject_used_but_not_detected
#define
yymore
()
yymore_used_but_not_detected
#define
YY_MORE_ADJ
0
#define
YY_RESTORE_YY_MORE_OFFSET
#line
1
“scanner.l”
#line
4
“scanner.l”
#include
<
fstream
>
#include
“scanner.h”
using
namespace
std
;
int
savedState
,
hrefMode
;
std
::
string lexeme
;
#undef
ECHO
//#define ECHO cerr << yytext << flush; #define ECHO #undef LEXEME #define LEXEME lexeme = yytext ; int yy_more_offset , yy_prev_more_offset ; #line 442 "lex.yy.cpp" /* regular definitions */ #line 445 "lex.yy.cpp" #define INITIAL 0 #define INCOMMENT 1 #define HTMLTAG 2 #ifndef YY_NO_UNISTD_H /* Special case for "unistd.h", since it is non-ANSI. We include it way * down here because we want the user's section 1 to have been scanned first. * The user has a chance to override it with an option. */ #include < unistd . h >
#endif
#ifndef
YY_EXTRA_TYPE
#define
YY_EXTRA_TYPE
void
*
#endif
#ifndef
yytext_ptr
static
void
yy_flex_strncpy
(
char
*
,
const
char
*
,
int
);
#endif
#ifdef
YY_NEED_STRLEN
static
int
yy_flex_strlen
(
const
char
*
);
#endif
#ifndef
YY_NO_INPUT
#endif
/* Amount of stuff to slurp up with each read. */
#ifndef
YY_READ_BUF_SIZE
#ifdef
__ia64__
/* On IA-64, the buffer size is 16k, not 8k */
#define
YY_READ_BUF_SIZE
16384
#else
#define
YY_READ_BUF_SIZE
8192
#endif
/* __ia64__ */
#endif
/* Copy whatever the last rule matched to the standard output. */
#ifndef
ECHO
#define
ECHO
LexerOutput
(
yytext
,
yyleng
)
#endif
/* Gets input and stuffs it into “buf”. number of characters read, or YY_NULL,
* is returned in “result”.
*/
#ifndef
YY_INPUT
#define
YY_INPUT
(
buf
,
result
,
max_size
)
\
\
if
(
(
int
)(
result
=
LexerInput
(
(
char
*
)
buf
,
max_size
))
<
0
)
\
YY_FATAL_ERROR
(
"input in flex scanner failed"
);
#endif
/* No semi-colon after return; correct usage is to write "yyterminate();" -
* we don't want an extra ';' after the "return" because that will cause
* some compilers to complain about unreachable statements.
*/
#ifndef
yyterminate
#define
yyterminate
()
return
YY_NULL
#endif
/* Number of entries by which start-condition stack grows. */
#ifndef
YY_START_STACK_INCR
#define
YY_START_STACK_INCR
25
#endif
/* Report a fatal error. */
#ifndef
YY_FATAL_ERROR
#define
YY_FATAL_ERROR
(
msg
)
LexerError
(
msg
)
#endif
/* end tables serialization structures and prototypes */
/* Default declaration of generated scanner - a define so the user can
* easily add parameters.
*/
#ifndef
YY_DECL
#define
YY_DECL_IS_OURS
1
#define
YY_DECL
int
yyFlexLexer
::
yylex
()
#endif
/* !YY_DECL */
/* Code executed at the beginning of each rule, after yytext and yyleng
* have been set up.
*/
#ifndef
YY_USER_ACTION
#define
YY_USER_ACTION
#endif
/* Code executed at the end of each rule. */
#ifndef
YY_BREAK
#define
YY_BREAK
/*LINTED*/
break
;
#endif
#define
YY_RULE_SETUP \
YY_USER_ACTION
/** The main scanner function which does all the work.
*/
YY_DECL
{
yy_state_type yy_current_state
;
char
*
yy_cp
,
*
yy_bp
;
int
yy_act
;
if
(
!
(
yy_init
)
)
{
(
yy_init
)
=
1
;
#ifdef
YY_USER_INIT
YY_USER_INIT
;
#endif
if
(
!
(
yy_start
)
)
(
yy_start
)
=
1
;
/* first start state */
if
(
!
yyin
)
yyin
.
rdbuf
(
std
::
cin
.
rdbuf
());
if
(
!
yyout
)
yyout
.
rdbuf
(
std
::
cout
.
rdbuf
());
if
(
!
YY_CURRENT_BUFFER
)
{
yyensure_buffer_stack
();
YY_CURRENT_BUFFER_LVALUE
=
yy_create_buffer
(
yyin
,
YY_BUF_SIZE
);
}
yy_load_buffer_state
(
);
}
{
#line
44
"scanner.l"
#line
582
"lex.yy.cpp"
while
(
/*CONSTCOND*/
1
)
/* loops until end-of-file is reached */
{
yy_cp
=
(
yy_c_buf_p
);
/* Support of yytext. */
*
yy_cp
=
(
yy_hold_char
);
/* yy_bp points to the position in yy_ch_buf of the start of
* the current run.
*/
yy_bp
=
yy_cp
;
yy_current_state
=
(
yy_start
);
yy_match
:
do
{
YY_CHAR yy_c
=
yy_ec
[
YY_SC_TO_UI
(
*
yy_cp
)]
;
if
(
yy_accept
[
yy_current_state
]
)
{
(
yy_last_accepting_state
)
=
yy_current_state
;
(
yy_last_accepting_cpos
)
=
yy_cp
;
}
while
(
yy_chk
[
yy_base
[
yy_current_state
]
+
yy_c
]
!=
yy_current_state
)
{
yy_current_state
=
(
int
)
yy_def
[
yy_current_state
];
if
(
yy_current_state
>=
34
)
yy_c
=
yy_meta
[
yy_c
];
}
yy_current_state
=
yy_nxt
[
yy_base
[
yy_current_state
]
+
yy_c
];
++
yy_cp
;
}
while
(
yy_base
[
yy_current_state
]
!=
70
);
yy_find_action
:
yy_act
=
yy_accept
[
yy_current_state
];
if
(
yy_act
==
0
)
{
/* have to back up */
yy_cp
=
(
yy_last_accepting_cpos
);
yy_current_state
=
(
yy_last_accepting_state
);
yy_act
=
yy_accept
[
yy_current_state
];
}
YY_DO_BEFORE_ACTION
;
do_action
:
/* This label is used only to access EOF actions. */
switch
(
yy_act
)
{
/* beginning of action switch */
case
0
:
/* must back up */
/* undo the effects of YY_DO_BEFORE_ACTION */
*
yy_cp
=
(
yy_hold_char
);
yy_cp
=
(
yy_last_accepting_cpos
);
yy_current_state
=
(
yy_last_accepting_state
);
goto
yy_find_action
;
case
1
:
/* rule 1 can match eol */
YY_RULE_SETUP
#line
46
“scanner.l”
{}
YY_BREAK
case
2
:
YY_RULE_SETUP
#line
48
“scanner.l”
{
savedState
=
YY_START
;
BEGIN
(
INCOMMENT
);}
YY_BREAK
case
3
:
/* rule 3 can match eol */
YY_RULE_SETUP
#line
50
“scanner.l”
{
lexeme
=
yytext
;
return
TAG
;}
YY_BREAK
case
4
:
/* rule 4 can match eol */
*
yy_cp
=
(
yy_hold_char
);
/* undo effects of setting up yytext */
YY_LINENO_REWIND_TO
(
yy_cp
–
1
);
(
yy_c_buf_p
)
=
yy_cp
-=
1
;
YY_DO_BEFORE_ACTION
;
/* set up yytext again */
YY_RULE_SETUP
#line
51
“scanner.l”
{
lexeme
=
yytext
;
return
TAG
;}
YY_BREAK
case
5
:
/* rule 5 can match eol */
YY_RULE_SETUP
#line
52
“scanner.l”
{
lexeme
=
yytext
;
return
TAG
;}
YY_BREAK
case
6
:
/* rule 6 can match eol */
YY_RULE_SETUP
#line
53
“scanner.l”
{
LEXEME
;
return
TEXT
;}
YY_BREAK
case
7
:
YY_RULE_SETUP
#line
55
“scanner.l”
{}
YY_BREAK
case
8
:
YY_RULE_SETUP
#line
56
“scanner.l”
{
BEGIN
(
savedState
);}
YY_BREAK
case
YY_STATE_EOF
(
INITIAL
)
:
case YY_STATE_EOF(INCOMMENT):
case YY_STATE_EOF(HTMLTAG):
#line 59 “scanner.l”
{return ENDOFFILE;}
YY_BREAKcase 9:
YY_RULE_SETUP#line 60 “scanner.l”
{}
YY_BREAKcase 10:
YY_RULE_SETUP#line 62 “scanner.l”
ECHO;
YY_BREAK#line 704 “lex.yy.cpp”
case YY_END_OF_BUFFER:
{
/* Amount of text matched not including the EOB char. */
int yy_amount_of_matched_text = (int) (yy_cp – (yytext_ptr)) – 1;
/* Undo the effects of YY_DO_BEFORE_ACTION. */
*yy_cp = (yy_hold_char);
YY_RESTORE_YY_MORE_OFFSET
if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
{
/* We’re scanning a new file or input source. It’s * possible that this happened because the user * just pointed yyin at a new source and called * yylex(). If so, then we have to assure * consistency between YY_CURRENT_BUFFER and our * globals. Here is the right place to do so, because * this is the first action (other than possibly a * back-up) that will match for the new input source. */
(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin.rdbuf();
YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
}
/* Note that here we test for yy_c_buf_p “<=" to the position * of the first EOB in the buffer, since yy_c_buf_p will * already have been incremented past the NUL character * (since all states make transitions on EOB to the * end-of-buffer state). Contrast this with the test * in input(). */ if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
{ /* This was really a NUL. */
yy_state_type yy_next_state;
(yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
yy_current_state = yy_get_previous_state( );
/* Okay, we’re now positioned to make the NUL * transition. We couldn’t have * yy_get_previous_state() go ahead and do it * for us because it doesn’t know how to deal * with the possibility of jamming (and we don’t * want to build jamming into it because then it * will run more slowly). */
yy_next_state = yy_try_NUL_trans( yy_current_state );
yy_bp = (yytext_ptr) + YY_MORE_ADJ;
if ( yy_next_state )
{
/* Consume the NUL. */
yy_cp = ++(yy_c_buf_p);
yy_current_state = yy_next_state;
goto yy_match;
}
else
{
yy_cp = (yy_c_buf_p);
goto yy_find_action;
}
}
else switch ( yy_get_next_buffer( ) )
{
case EOB_ACT_END_OF_FILE:
{
(yy_did_buffer_switch_on_eof) = 0;
if ( yywrap( ) )
{
/* Note: because we’ve taken care in * yy_get_next_buffer() to have set up * yytext, we can now set up * yy_c_buf_p so that if some total * hoser (like flex itself) wants to * call the scanner after we return the * YY_NULL, it’ll still work – another * YY_NULL will get returned. */
(yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
yy_act = YY_STATE_EOF(YY_START);
goto do_action;
}
else
{
if ( ! (yy_did_buffer_switch_on_eof) )
YY_NEW_FILE;
}
break;
}
case EOB_ACT_CONTINUE_SCAN:
(yy_c_buf_p) =
(yytext_ptr) + yy_amount_of_matched_text;
yy_current_state = yy_get_previous_state( );
yy_cp = (yy_c_buf_p);
yy_bp = (yytext_ptr) + YY_MORE_ADJ;
goto yy_match;
case EOB_ACT_LAST_MATCH:
(yy_c_buf_p) =
&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
yy_current_state = yy_get_previous_state( );
yy_cp = (yy_c_buf_p);
yy_bp = (yytext_ptr) + YY_MORE_ADJ;
goto yy_find_action;
}
break;
}
default:
YY_FATAL_ERROR(
“fatal flex scanner internal error–no action found” );
} /* end of action switch */
} /* end of scanning one token */
} /* end of user’s declarations */
} /* end of yylex */
/* The contents of this function are C++ specific, so the () macro is not used. * This constructor simply maintains backward compatibility. * DEPRECATED */
yyFlexLexer::yyFlexLexer( std::istream* arg_yyin, std::ostream* arg_yyout ):
yyin(arg_yyin ? arg_yyin->rdbuf() : std::cin.rdbuf()),
yyout(arg_yyout ? arg_yyout->rdbuf() : std::cout.rdbuf())
{
ctor_common();
}
/* The contents of this function are C++ specific, so the () macro is not used. */
yyFlexLexer::yyFlexLexer( std::istream& arg_yyin, std::ostream& arg_yyout ):
yyin(arg_yyin.rdbuf()),
yyout(arg_yyout.rdbuf())
{
ctor_common();
}
/* The contents of this function are C++ specific, so the () macro is not used. */
void yyFlexLexer::ctor_common()
{
yy_c_buf_p = 0;
yy_init = 0;
yy_start = 0;
yy_flex_debug = 0;
yylineno = 1; // this will only get updated if %option yylineno
yy_did_buffer_switch_on_eof = 0;
yy_looking_for_trail_begin = 0;
yy_more_flag = 0;
yy_more_len = 0;
yy_more_offset = yy_prev_more_offset = 0;
yy_start_stack_ptr = yy_start_stack_depth = 0;
yy_start_stack = NULL;
yy_buffer_stack = NULL;
yy_buffer_stack_top = 0;
yy_buffer_stack_max = 0;
yy_state_buf = 0;
}
/* The contents of this function are C++ specific, so the () macro is not used. */
yyFlexLexer::~yyFlexLexer()
{
delete [] yy_state_buf;
yyfree( yy_start_stack );
yy_delete_buffer( YY_CURRENT_BUFFER );
yyfree( yy_buffer_stack );
}
/* The contents of this function are C++ specific, so the () macro is not used. */
void yyFlexLexer::switch_streams( std::istream& new_in, std::ostream& new_out )
{
// was if( new_in ) yy_delete_buffer( YY_CURRENT_BUFFER );
yy_switch_to_buffer( yy_create_buffer( new_in, YY_BUF_SIZE ) );
// was if( new_out ) yyout.rdbuf(new_out.rdbuf());
}
/* The contents of this function are C++ specific, so the () macro is not used. */
void yyFlexLexer::switch_streams( std::istream* new_in, std::ostream* new_out )
{
if( ! new_in ) {
new_in = &yyin;
}
if ( ! new_out ) {
new_out = &yyout;
}
switch_streams(*new_in, *new_out);
}
#ifdef YY_INTERACTIVEint yyFlexLexer::LexerInput( char* buf, int /* max_size */ )
#else
int yyFlexLexer::LexerInput( char* buf, int max_size )
#endif
{
if ( yyin.eof() || yyin.fail() )
return 0;
#ifdef YY_INTERACTIVE yyin.get( buf[0] );
if ( yyin.eof() )
return 0;
if ( yyin.bad() )
return -1;
return 1;
#else
(void) yyin.read( buf, max_size );
if ( yyin.bad() )
return -1;
else
return yyin.gcount();
#endif
}
void yyFlexLexer::LexerOutput( const char* buf, int size )
{
(void) yyout.write( buf, size );
}
/* yy_get_next_buffer – try to read in a new buffer * * Returns a code representing an action: * EOB_ACT_LAST_MATCH – * EOB_ACT_CONTINUE_SCAN – continue scanning from current position * EOB_ACT_END_OF_FILE – end of file */
int yyFlexLexer::yy_get_next_buffer()
{
char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
char *source = (yytext_ptr);
int number_to_move, i;
int ret_val;
if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
YY_FATAL_ERROR(
“fatal flex scanner internal error–end of buffer missed” );
if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
{ /* Don’t try to fill the buffer, so this is an EOF. */
if ( (yy_c_buf_p) – (yytext_ptr) – YY_MORE_ADJ == 1 )
{
/* We matched a single character, the EOB, so * treat this as a final EOF. */
return EOB_ACT_END_OF_FILE;
}
else
{
/* We matched some text prior to the EOB, first * process it. */
return EOB_ACT_LAST_MATCH;
}
}
/* Try to read more data. */
/* First move last chars to start of buffer. */
number_to_move = (int) ((yy_c_buf_p) – (yytext_ptr) – 1);
for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
/* don’t do the read, it’s not guaranteed to return an EOF, * just force an EOF */
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
else
{
int num_to_read =
YY_CURRENT_BUFFER_LVALUE->yy_buf_size – number_to_move – 1;
while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; int yy_c_buf_p_offset = (int) ((yy_c_buf_p) - b->yy_ch_buf);
if ( b->yy_is_our_buffer )
{
int new_size = b->yy_buf_size * 2;
if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8;
else
b->yy_buf_size *= 2;
b->yy_ch_buf = (char *)
/* Include room in for 2 EOB chars. */
yyrealloc( (void *) b->yy_ch_buf,
(yy_size_t) (b->yy_buf_size + 2) );
}
else
/* Can’t grow it, we don’t own it. */
b->yy_ch_buf = NULL;
if ( ! b->yy_ch_buf )
YY_FATAL_ERROR(
“fatal error – scanner input buffer overflow” );
(yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size –
number_to_move – 1;
}
if ( num_to_read > YY_READ_BUF_SIZE )
num_to_read = YY_READ_BUF_SIZE;
/* Read in more data. */
YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
(yy_n_chars), num_to_read );
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
if ( (yy_n_chars) == 0 )
{
if ( number_to_move == YY_MORE_ADJ )
{
ret_val = EOB_ACT_END_OF_FILE;
yyrestart( yyin );
}
else
{
ret_val = EOB_ACT_LAST_MATCH;
YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
YY_BUFFER_EOF_PENDING;
}
}
else
ret_val = EOB_ACT_CONTINUE_SCAN;
if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
/* Extend the array by 50%, plus the number we really need. */
int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc(
(void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size );
if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
YY_FATAL_ERROR( “out of dynamic memory in yy_get_next_buffer()” );
/* “- 2” to take care of EOB’s */
YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size – 2);
}
(yy_n_chars) += number_to_move;
YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
(yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
return ret_val;
}
/* yy_get_previous_state – get the state just before the EOB char was reached */
yy_state_type yyFlexLexer::yy_get_previous_state()
{
yy_state_type yy_current_state;
char *yy_cp;
yy_current_state = (yy_start);
for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) { YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; (yy_last_accepting_cpos) = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 34 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
}
return yy_current_state;
}
/* yy_try_NUL_trans – try to make a transition on the NUL character * * synopsis * next_state = yy_try_NUL_trans( current_state ); */
yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state )
{
int yy_is_jam;
char *yy_cp = (yy_c_buf_p);
YY_CHAR yy_c = 1;
if ( yy_accept[yy_current_state] )
{
(yy_last_accepting_state) = yy_current_state;
(yy_last_accepting_cpos) = yy_cp;
}
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 34 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
yy_is_jam = (yy_current_state == 33);
return yy_is_jam ? 0 : yy_current_state;
}
#ifndef YY_NO_UNPUT void yyFlexLexer::yyunput( int c, char* yy_bp)
{
char *yy_cp;
yy_cp = (yy_c_buf_p);
/* undo effects of setting up yytext */
*yy_cp = (yy_hold_char);
if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
{ /* need to shift things up to make room */
/* +2 for EOB chars. */
int number_to_move = (yy_n_chars) + 2;
char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
char *source =
&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
*–dest = *–source;
yy_cp += (int) (dest – source);
yy_bp += (int) (dest – source);
YY_CURRENT_BUFFER_LVALUE->yy_n_chars =
(yy_n_chars) = (int) YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
YY_FATAL_ERROR( “flex scanner push-back overflow” );
}
*–yy_cp = (char) c;
(yytext_ptr) = yy_bp;
(yy_hold_char) = *yy_cp;
(yy_c_buf_p) = yy_cp;
}
#endif
int yyFlexLexer::yyinput()
{
int c;
*(yy_c_buf_p) = (yy_hold_char);
if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
{
/* yy_c_buf_p now points to the character we want to return. * If this occurs *before* the EOB characters, then it’s a * valid NUL; if not, then we’ve hit the end of the buffer. */
if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
/* This was really a NUL. */
*(yy_c_buf_p) = ‘\0’;
else
{ /* need more input */
int offset = (int) ((yy_c_buf_p) – (yytext_ptr));
++(yy_c_buf_p);
switch ( yy_get_next_buffer( ) )
{
case EOB_ACT_LAST_MATCH:
/* This happens because yy_g_n_b() * sees that we’ve accumulated a * token and flags that we need to * try matching the token before * proceeding. But for input(), * there’s no matching to consider. * So convert the EOB_ACT_LAST_MATCH * to EOB_ACT_END_OF_FILE. */
/* Reset buffer status. */
yyrestart( yyin );
/*FALLTHROUGH*/
case EOB_ACT_END_OF_FILE:
{
if ( yywrap( ) )
return 0;
if ( ! (yy_did_buffer_switch_on_eof) )
YY_NEW_FILE;
#ifdef __cplusplus return yyinput();
#else
return input();
#endif
}
case EOB_ACT_CONTINUE_SCAN:
(yy_c_buf_p) = (yytext_ptr) + offset;
break;
}
}
}
c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char’s */
*(yy_c_buf_p) = ‘\0’; /* preserve yytext */
(yy_hold_char) = *++(yy_c_buf_p);
return c;
}
/** Immediately switch to a different input stream. * @param input_file A readable stream. * * @note This function does not reset the start condition to @c INITIAL . */
void yyFlexLexer::yyrestart( std::istream& input_file )
{
if ( ! YY_CURRENT_BUFFER ){
yyensure_buffer_stack ();
YY_CURRENT_BUFFER_LVALUE =
yy_create_buffer( yyin, YY_BUF_SIZE );
}
yy_init_buffer( YY_CURRENT_BUFFER, input_file );
yy_load_buffer_state( );
}
/** Delegate to the new version that takes an istream reference. * @param input_file A readable stream. * * @note This function does not reset the start condition to @c INITIAL . */
void yyFlexLexer::yyrestart( std::istream* input_file )
{
if( ! input_file ) {
input_file = &yyin;
}
yyrestart( *input_file );
}
/** Switch to a different input buffer. * @param new_buffer The new input buffer. * */
void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
{
/* TODO. We should be able to replace this entire function body * with * yypop_buffer_state(); * yypush_buffer_state(new_buffer); */
yyensure_buffer_stack ();
if ( YY_CURRENT_BUFFER == new_buffer )
return;
if ( YY_CURRENT_BUFFER )
{
/* Flush out information for old buffer. */
*(yy_c_buf_p) = (yy_hold_char);
YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
YY_CURRENT_BUFFER_LVALUE = new_buffer;
yy_load_buffer_state( );
/* We don’t actually know whether we did this switch during * EOF (yywrap()) processing, but the only time this flag * is looked at is after yywrap() is called, so it’s safe * to go ahead and always set it. */
(yy_did_buffer_switch_on_eof) = 1;
}
void yyFlexLexer::yy_load_buffer_state()
{
(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
(yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
yyin.rdbuf(YY_CURRENT_BUFFER_LVALUE->yy_input_file);
(yy_hold_char) = *(yy_c_buf_p);
}
/** Allocate and initialize an input buffer state. * @param file A readable stream. * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. * * @return the allocated buffer state. */
YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( std::istream& file, int size )
{
YY_BUFFER_STATE b;
b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) );
if ( ! b )
YY_FATAL_ERROR( “out of dynamic memory in yy_create_buffer()” );
b->yy_buf_size = size;
/* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */
b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) );
if ( ! b->yy_ch_buf )
YY_FATAL_ERROR( “out of dynamic memory in yy_create_buffer()” );
b->yy_is_our_buffer = 1;
yy_init_buffer( b, file );
return b;
}
/** Delegate creation of buffers to the new version that takes an istream reference. * @param file A readable stream. * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. * * @return the allocated buffer state. */
YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( std::istream* file, int size )
{
return yy_create_buffer( *file, size );
}
/** Destroy the buffer. * @param b a buffer created with yy_create_buffer() * */
void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b )
{
if ( ! b )
return;
if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
if ( b->yy_is_our_buffer )
yyfree( (void *) b->yy_ch_buf );
yyfree( (void *) b );
}
/* Initializes or reinitializes a buffer. * This function is sometimes called more than once on the same buffer, * such as during a yyrestart() or at EOF. */
void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, std::istream& file )
{
int oerrno = errno;
yy_flush_buffer( b );
b->yy_input_file = file.rdbuf();
b->yy_fill_buffer = 1;
/* If b is the current buffer, then yy_init_buffer was _probably_ * called from yyrestart() or through yy_get_next_buffer. * In that case, we don’t want to reset the lineno or column. */
if (b != YY_CURRENT_BUFFER){
b->yy_bs_lineno = 1;
b->yy_bs_column = 0;
}
b->yy_is_interactive = 0;
errno = oerrno;
}
/** Discard all buffered characters. On the next scan, YY_INPUT will be called. * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. * */
void yyFlexLexer::yy_flush_buffer( YY_BUFFER_STATE b )
{
if ( ! b )
return;
b->yy_n_chars = 0;
/* We always need two end-of-buffer characters. The first causes * a transition to the end-of-buffer state. The second causes * a jam in that state. */
b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
b->yy_buf_pos = &b->yy_ch_buf[0];
b->yy_at_bol = 1;
b->yy_buffer_status = YY_BUFFER_NEW;
if ( b == YY_CURRENT_BUFFER )
yy_load_buffer_state( );
}
/** Pushes the new state onto the stack. The new state becomes * the current state. This function will allocate the stack * if necessary. * @param new_buffer The new state. * */
void yyFlexLexer::yypush_buffer_state (YY_BUFFER_STATE new_buffer)
{
if (new_buffer == NULL)
return;
yyensure_buffer_stack();
/* This block is copied from yy_switch_to_buffer. */
if ( YY_CURRENT_BUFFER )
{
/* Flush out information for old buffer. */
*(yy_c_buf_p) = (yy_hold_char);
YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
/* Only push if top exists. Otherwise, replace top. */
if (YY_CURRENT_BUFFER)
(yy_buffer_stack_top)++;
YY_CURRENT_BUFFER_LVALUE = new_buffer;
/* copied from yy_switch_to_buffer. */
yy_load_buffer_state( );
(yy_did_buffer_switch_on_eof) = 1;
}
/** Removes and deletes the top of the stack, if present. * The next element becomes the new top. * */
void yyFlexLexer::yypop_buffer_state (void)
{
if (!YY_CURRENT_BUFFER)
return;
yy_delete_buffer(YY_CURRENT_BUFFER );
YY_CURRENT_BUFFER_LVALUE = NULL;
if ((yy_buffer_stack_top) > 0)
–(yy_buffer_stack_top);
if (YY_CURRENT_BUFFER) {
yy_load_buffer_state( );
(yy_did_buffer_switch_on_eof) = 1;
}
}
/* Allocates the stack if it does not exist. * Guarantees space for at least one push. */
void yyFlexLexer::yyensure_buffer_stack(void)
{
yy_size_t num_to_alloc;
if (!(yy_buffer_stack)) {
/* First allocation is just for 2 elements, since we don’t know if this * scanner will even need a stack. We use 2 instead of 1 to avoid an * immediate realloc on the next call. */
num_to_alloc = 1; /* After all that talk, this was set to 1 anyways… */
(yy_buffer_stack) = (struct yy_buffer_state**)yyalloc (num_to_alloc * sizeof(struct yy_buffer_state*)
);
if ( ! (yy_buffer_stack) )
YY_FATAL_ERROR( “out of dynamic memory in yyensure_buffer_stack()” );
memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
(yy_buffer_stack_max) = num_to_alloc;
(yy_buffer_stack_top) = 0;
return;
}
if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) – 1){
/* Increase the buffer to prepare for a possible push. */
yy_size_t grow_size = 8 /* arbitrary grow size */;
num_to_alloc = (yy_buffer_stack_max) + grow_size;
(yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc ((yy_buffer_stack),
num_to_alloc * sizeof(struct yy_buffer_state*)
);
if ( ! (yy_buffer_stack) )
YY_FATAL_ERROR( “out of dynamic memory in yyensure_buffer_stack()” );
/* zero only the new slots.*/
memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
(yy_buffer_stack_max) = num_to_alloc;
}
}
void yyFlexLexer::yy_push_state( int _new_state )
{
if ( (yy_start_stack_ptr) >= (yy_start_stack_depth) )
{
yy_size_t new_size;
(yy_start_stack_depth) += YY_START_STACK_INCR;
new_size = (yy_size_t) (yy_start_stack_depth) * sizeof( int );
if ( ! (yy_start_stack) )
(yy_start_stack) = (int *) yyalloc( new_size );
else
(yy_start_stack) = (int *) yyrealloc(
(void *) (yy_start_stack), new_size );
if ( ! (yy_start_stack) )
YY_FATAL_ERROR( “out of memory expanding start-condition stack” );
}
(yy_start_stack)[(yy_start_stack_ptr)++] = YY_START;
BEGIN(_new_state);
}
void yyFlexLexer::yy_pop_state()
{
if ( –(yy_start_stack_ptr) < 0 )
YY_FATAL_ERROR( "start-condition stack underflow" );
BEGIN((yy_start_stack)[(yy_start_stack_ptr)]);
}
int yyFlexLexer::yy_top_state()
{
return (yy_start_stack)[(yy_start_stack_ptr) - 1];
}
#ifndef YY_EXIT_FAILURE#define YY_EXIT_FAILURE 2
#endif
void yyFlexLexer::LexerError( const char* msg )
{
std::cerr << msg << std::endl;
exit( YY_EXIT_FAILURE );
}
/* Redefine yyless() so it works in section 3 code. */
#undef yyless#define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ int yyless_macro_arg = (n); \ YY_LESS_LINENO(yyless_macro_arg);\ yytext[yyleng] = (yy_hold_char); \ (yy_c_buf_p) = yytext + yyless_macro_arg; \ (yy_hold_char) = *(yy_c_buf_p); \ *(yy_c_buf_p) = '\0'; \ yyleng = yyless_macro_arg; \ } \ while ( 0 )
/* Accessor methods (get/set functions) to struct members. */
/* * Internal utility routines. */
#ifndef yytext_ptrstatic void yy_flex_strncpy (char* s1, const char * s2, int n )
{
int i;
for ( i = 0; i < n; ++i )
s1[i] = s2[i];
}
#endif
#ifdef YY_NEED_STRLENstatic int yy_flex_strlen (const char * s )
{
int n;
for ( n = 0; s[n]; ++n )
;
return n;
}
#endif
void *yyalloc (yy_size_t size )
{
return malloc(size);
}
void *yyrealloc (void * ptr, yy_size_t size )
{
/* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those * that use void* generic pointers. It works with the latter * because both ANSI C and C++ allow castless assignment from * any pointer type to void*, and deal with argument conversions * as though doing an assignment. */
return realloc(ptr, size);
}
void yyfree (void * ptr )
{
free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
}
#define YYTABLES_NAME "yytables"
#line 62 "scanner.l"
makefile
MAINPROG=xmlextract
DIR=${PWD}
ASST=$(notdir ${DIR})
CC=gcc
CXX=g++
##
# Adjust settings for different compilers
#
ifeq ($(OS),Windows_NT)
#
# Flags for Windows compilers
CPPFLAGS=-g -std=c++17 -MMD -pthread -D_GLIBCXX_DEBUG -Wall
LFLAGS=
RM=del /q
EXE=.exe
else
#
# Flags for Linux & MacOS
CPPFLAGS=-g -std=c++17 -MMD -pthread -D_GLIBCXX_DEBUG -Wall
LFLAGSx=-fuse-ld=gold -pthread
RM=/bin/rm -rf
EXE=
endif
#
########################################################################
# Macro definitions for "standard" C and C++ compilations
#
CFLAGS=-g
TARGET=$(MAINPROG)$(EXE)
CPPS=$(wildcard *.cpp)
MAINCPPS=$(filter-out unittest.cpp test%.cpp, $(CPPS))
TESTCPPS=$(filter-out $(MAINPROG).cpp, $(CPPS))
LINK=g++ $(CPPFLAGS)
#
#
# In most cases, you should not change anything below this line.
#
# The following is "boilerplate" to set up the standard compilation
# commands:
#
MAINOBJS=$(MAINCPPS:%.cpp=%.o)
TESTOBJS=$(TESTCPPS:%.cpp=%.o)
DEPENDENCIES = $(CPPS:%.cpp=%.d)
%.d: %.cpp
touch $@
%.o: %.cpp
$(CXX) $(CPPFLAGS) -o $@ -c $*.cpp
#
# Targets:
#
all: $(TARGET) unittest$(EXE)
$(TARGET): $(MAINOBJS)
$(LINK) $(FLAGS) -o $(TARGET) $^ $(LFLAGS)
# Suppress this rule for student work -- they will not have flex++
#lex.yy.cpp: scanner.l
# flex++ -o lex.yy.cpp scanner.l
clean:
-/bin/rm -rf *.d *.o $(TARGET) unittest$(EXE) docs
documentation:
-mkdir docs
doxygen Doxyfile
unittest$(EXE): $(TESTOBJS)
$(LINK) $(FLAGS) -o $@ $^ $(LFLAGS)
make.dep: $(DEPENDENCIES)
-cat $(DEPENDENCIES) > $@
include make.dep
makefile~
MAINPROG=xmlextract
DIR=${PWD}
ASST=$(notdir ${DIR})
CC=gcc
CXX=g++
##
# Adjust settings for different compilers
#
ifeq ($(OS),Windows_NT)
#
# Flags for Windows compilers
CPPFLAGS=-g -std=c++17 -MMD -pthread -D_GLIBCXX_DEBUG -Wall
LFLAGS=
RM=del /q
EXE=.exe
else
#
# Flags for Linux & MacOS
CPPFLAGS=-g -std=c++17 -MMD -pthread -D_GLIBCXX_DEBUG -Wall
LFLAGSx=-fuse-ld=gold -pthread
RM=/bin/rm -rf
EXE=
endif
#
########################################################################
# Macro definitions for “standard” C and C++ compilations
#
CFLAGS=-g
TARGET=$(MAINPROG)$(EXE)
CPPS=$(wildcard *.cpp)
MAINCPPS=$(filter-out unittest.cpp test%.cpp, $(CPPS))
TESTCPPS=$(filter-out $(MAINPROG).cpp, $(CPPS))
LINK=g++ $(CPPFLAGS)
#
#
# In most cases, you should not change anything below this line.
#
# The following is “boilerplate” to set up the standard compilation
# commands:
#
MAINOBJS=$(MAINCPPS:%.cpp=%.o)
TESTOBJS=$(TESTCPPS:%.cpp=%.o)
DEPENDENCIES = $(CPPS:%.cpp=%.d)
%.d: %.cpp
touch $@
%.o: %.cpp
$(CXX) $(CPPFLAGS) -o $@ -c $*.cpp
#
# Targets:
#
all: $(TARGET) unittest$(EXE)
$(TARGET): $(MAINOBJS)
$(LINK) $(FLAGS) -o $(TARGET) $^ $(LFLAGS)
# Suppress this rule for student work — they will not have flex++
#lex.yy.cpp: scanner.l
# flex++ -o lex.yy.cpp scanner.l
clean:
-/bin/rm -rf *.d *.o $(TARGET) unittest$(EXE) docs
documentation:
-mkdir docs
doxygen Doxyfile
unittest$(EXE): $(TESTOBJS)
$(LINK) $(FLAGS) -o $@ $^ $(LFLAGS)
make.dep: $(DEPENDENCIES)
-cat $(DEPENDENCIES) > $@
include make.dep
node.cpp
#include “node.h”
using namespace std;
/**
* Create a node containing either an element (with no children)
* or a leaf with the given tag name/leaf content.
*
* @param elementq true iff this is to be an element
* @param content the tag name or leaf content
*/
Node::Node(bool elementq, std::string content)
: isAnElement(elementq), label(content)
{
}
Node::~Node()
{
for (Node *child : children)
{
delete child;
}
}
node.h
#ifndef NODE_H
#define NODE_H
#include
#include
struct Node {
/**
* true if this node represents an HTML element, false if
* it is a text string (leaf)
* */
bool isAnElement;
/**
* If isAnElement, contains the tag name of the element.
* If !isAnEelement, contains the text string of a leaf.
*/
std::string label;
/**
* If isAnElement, contains pointers to the children of this element.
* If !isAnEelement, this is empty and unused.
*/
std::vector
/**
* Create a node containing either an element (with no children)
* or a leaf with the given tag name/leaf content.
*
* @param elementq true iff this is to be an element
* @param content the tag name or leaf content
*/
Node(bool elementq, std::string content);
~Node();
};
#endif
parseState.cpp
#include “parseState.h”
using namespace std;
ParseState::ParseState()
: error(false)
{
}
/**
* Present a text string to the ParseState. If the ParseState
* is nto inside an HTML element, this is ignored.
* Otherwise it is remembered as a child of the current element.
*
* @param textContent the lexeme of a text string within an HTML element.
*/
void ParseState::text(std::string textContent)
{
if (!ParseStateStack.empty())
{
Node* n = new Node(false, textContent);
ParseStateStack.top()->children.push_back(n);
}
}
Node *ParseState::tag(std::string atag)
{
if (error)
return nullptr;
if (atag[1] == ‘/’)
{
// This is a closing tag.
string::size_type start = atag.find_first_not_of(‘ ‘, 2);
string::size_type stop = atag.find_first_of(” >”, start);
string tagName = atag.substr(start, stop – start);
if (ParseStateStack.empty() || tagName != ParseStateStack.top()->label)
{
error = true;
}
else
{
Node *n = ParseStateStack.top();
ParseStateStack.pop();
if (!ParseStateStack.empty())
{
ParseStateStack.top()->children.push_back(n);
}
return n;
}
}
else if (atag[atag.size() – 2] != ‘/’)
{
// This is an opening tag;
string::size_type start = atag.find_first_not_of(‘ ‘, 1);
string::size_type stop = atag.find_first_of(” >”, start);
string tagName = atag.substr(start, stop – start);
ParseStateStack.push(new Node(true, tagName));
}
else
{
// This is a singleton tag.
string::size_type start = atag.find_first_not_of(‘ ‘, 1);
string::size_type stop = atag.find_first_of(” />”, start);
string tagName = atag.substr(start, stop – start);
Node *n = new Node(true, tagName);
if (!ParseStateStack.empty())
{
ParseStateStack.top()->children.push_back(n);
}
return n;
}
return nullptr;
}
int ParseState::status() const
{
if (error)
return -1;
else if (ParseStateStack.empty())
return 1;
else
return 0;
}
parseState.h
#ifndef ParseState_H
#define ParseState_H
#include
#include
#include
#include “node.h”
/**
* XHTML ParseState class
*/
class ParseState {
public:
/**
* Create a new ParseState.
*/
ParseState();
/**
* Present a tag to the ParseState. If this is a closing tag,
* return a tree node representing the parsed HTML for the
* corresponding element
*
* @param atag the lexeme (full string text) of a tag.
* @return a tree node or null if this is not a closing tag
*/
Node* tag (std::string atag);
/**
* Present a text string to the ParseState. If the ParseState
* is nto inside an HTML element, this is ignored.
* Otherwise it is remembered as a child of the current element.
*
* @param textContent the lexeme of a text string within an HTML element.
*/
void text (std::string textContent);
/**
* Inquire as to the balance status ofthe tags seen so far.
*
* @return 1 if all opening tags seen so far have been properly matched by
* a closing tag
*
* 0 if no mismatches have been detected, but at least one opening tag
* seen so far has not been properly matched by a closing tag
*
* -1 if we have seen at least one instance of a closing tag that
* did not match the most recently added and unmatched opening tag.
*/
int status () const;
private:
bool error;
std::stack
};
#endif
parser.cpp
#include “parser.h”
#include “parseState.h”
#include “node.h”
#include “FlexLexer.h”
#include “scanner.h”
using namespace std;
Node* Parser::parse(std::istream& input) const
{
ParseState pstate;
yyFlexLexer scanner (&input);
int tokenKind = scanner.yylex();
Node* root = nullptr;
while (tokenKind != ENDOFFILE)
{
if (tokenKind == TAG)
{
root = pstate.tag(lexeme);
} else if (tokenKind == TEXT)
{
pstate.text(lexeme);
} else if (tokenKind == ERROR) {
return nullptr;
}
tokenKind = scanner.yylex();
}
return root;
}
parser.h
#ifndef PARSER_H
#define PARSER_H
#include
#include
#include “node.h”
class Parser {
public:
Node* parse(std::istream& input) const;
};
#endif
scanner.h
#ifndef SCANNER_H
#define SCANNER_H
#include
extern std::string lexeme;
// token kinds
const int ENDOFFILE = 0;
const int TEXT = 1;
const int LINK = 2;
const int NAME = 3;
const int TAG = 4;
const int ERROR = -1;
#endif
scanner.l
%option noyywrap
%{
#include
#include “scanner.h”
using namespace std;
int savedState, hrefMode;
std::string lexeme;
#undef ECHO
//#define ECHO cerr << yytext << flush;
#define ECHO
#undef LEXEME
#define LEXEME lexeme = yytext;
int yy_more_offset, yy_prev_more_offset;
%}
%x INCOMMENT HTMLTAG
/* regular definitions */
delim [ \t\r\n]
tagdelim [ \t\r\n\>]
whitespace {delim}+
digit [0-9]
alpha [a-zA-Z]
nota [b-zB-Z]
alphanum [a-zA-Z0-9]
tagchars [a-zA-Z0-9:]
pathchars [-~a-zA-Z0-9_/\\.]
number {digit}+(\.{digit}+)?
quote \”
apostrophe [‘]
htmltext [^<>]|[ \t\r\n]
%%
{whitespace} {}