This commit was generated by cvs2svn to compensate for changes in r2258,

which included commits to RCS files with non-trunk default branches.
This commit is contained in:
Geoff Rehmet 1994-08-24 13:10:34 +00:00
commit 4738f75303
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=2259
28 changed files with 19354 additions and 0 deletions

38
usr.bin/lex/COPYING Normal file
View File

@ -0,0 +1,38 @@
Flex carries the copyright used for BSD software, slightly modified
because it originated at the Lawrence Berkeley (not Livermore!) Laboratory,
which operates under a contract with the Department of Energy:
Copyright (c) 1990 The Regents of the University of California.
All rights reserved.
This code is derived from software contributed to Berkeley by
Vern Paxson.
The United States Government has rights in this work pursuant
to contract no. DE-AC03-76SF00098 between the United States
Department of Energy and the University of California.
Redistribution and use in source and binary forms are permitted
provided that: (1) source distributions retain this entire
copyright notice and comment, and (2) distributions including
binaries display the following acknowledgement: ``This product
includes software developed by the University of California,
Berkeley and its contributors'' in the documentation or other
materials provided with the distribution and in all advertising
materials mentioning features or use of this software. Neither the
name of the University nor the names of its contributors may be
used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.
This basically says "do whatever you please with this software except
remove this notice or take advantage of the University's (or the flex
authors') name".
Note that the "flex.skl" scanner skeleton carries no copyright notice.
You are free to do whatever you please with scanners generated using flex;
for them, you are not even bound by the above copyright.

175
usr.bin/lex/FlexLexer.h Normal file
View File

@ -0,0 +1,175 @@
// $Header: FlexLexer.h,v 1.2 94/01/04 14:57:26 vern Exp $
// FlexLexer.h -- define classes for lexical analyzers generated by flex
// Copyright (c) 1993 The Regents of the University of California.
// All rights reserved.
//
// This code is derived from software contributed to Berkeley by
// Kent Williams and Tom Epperly.
//
// Redistribution and use in source and binary forms are permitted provided
// that: (1) source distributions retain this entire copyright notice and
// comment, and (2) distributions including binaries display the following
// acknowledgement: ``This product includes software developed by the
// University of California, Berkeley and its contributors'' in the
// documentation or other materials provided with the distribution and in
// all advertising materials mentioning features or use of this software.
// Neither the name of the University nor the names of its contributors may
// be used to endorse or promote products derived from this software without
// specific prior written permission.
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#ifndef __FLEX_LEXER_H
#define __FLEX_LEXER_H
// This file defines two classes. The first, FlexLexer, is an abstract
// class which specifies the external interface provided to flex C++
// lexer objects. The second, yyFlexLexer, fills out most of the meat
// of the lexer class; its internals may vary from lexer to lexer
// depending on things like whether REJECT is used.
//
// If you want to create multiple lexer classes, you use the -P flag
// to rename each yyFlexLexer to some other xxFlexLexer.
#include <iostream.h>
extern "C++" {
struct yy_buffer_state;
typedef int yy_state_type;
class FlexLexer {
public:
virtual ~FlexLexer() { }
const char* YYText() { return yytext; }
int YYLeng() { return yyleng; }
virtual void
yy_switch_to_buffer( struct yy_buffer_state* new_buffer ) = 0;
virtual struct yy_buffer_state*
yy_create_buffer( istream* s, int size ) = 0;
virtual void yy_delete_buffer( struct yy_buffer_state* b ) = 0;
virtual void yyrestart( istream* s ) = 0;
virtual int yylex() = 0;
protected:
char* yytext;
int yyleng;
};
class yyFlexLexer : public FlexLexer {
public:
// arg_yyin and arg_yyout default to the cin and cout, but we
// only make that assignment when initializing in yylex().
yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 )
{
yyin = arg_yyin;
yyout = arg_yyout;
yy_c_buf_p = 0;
yy_init = 1;
yy_start = 0;
yy_did_buffer_switch_on_eof = 0;
yy_looking_for_trail_begin = 0;
yy_more_flag = 0;
yy_more_len = 0;
yy_start_stack_ptr = yy_start_stack_depth = 0;
yy_start_stack = 0;
yy_current_buffer = 0;
#ifdef YY_USES_REJECT
yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2];
#else
yy_state_buf = 0;
#endif
}
virtual ~yyFlexLexer()
{
delete yy_state_buf;
}
void yy_switch_to_buffer( struct yy_buffer_state* new_buffer );
struct yy_buffer_state* yy_create_buffer( istream* s, int size );
void yy_delete_buffer( struct yy_buffer_state* b );
void yyrestart( istream* s );
virtual int yylex();
protected:
virtual int LexerInput( char* buf, int max_size );
virtual void LexerOutput( const char* buf, int size );
virtual void LexerError( const char* msg );
void yyunput( int c, char* buf_ptr );
int yyinput();
void yy_load_buffer_state();
void yy_init_buffer( struct yy_buffer_state* b, istream* s );
int yy_start_stack_ptr;
int yy_start_stack_depth;
int* yy_start_stack;
void yy_push_state( int new_state );
void yy_pop_state();
int yy_top_state();
yy_state_type yy_get_previous_state();
yy_state_type yy_try_NUL_trans( yy_state_type current_state );
int yy_get_next_buffer();
istream* yyin; // input source for default LexerInput
ostream* yyout; // output sink for default LexerOutput
struct yy_buffer_state* yy_current_buffer;
// yy_hold_char holds the character lost when yytext is formed.
char yy_hold_char;
// Number of characters read into yy_ch_buf.
int yy_n_chars;
// Points to current character in buffer.
char* yy_c_buf_p;
int yy_init; // whether we need to initialize
int yy_start; // start state number
// Flag which is used to allow yywrap()'s to do buffer switches
// instead of setting up a fresh yyin. A bit of a hack ...
int yy_did_buffer_switch_on_eof;
// The following are not always needed, but may be depending
// on use of certain flex features (like REJECT or yymore()).
yy_state_type yy_last_accepting_state;
char* yy_last_accepting_cpos;
yy_state_type* yy_state_buf;
yy_state_type* yy_state_ptr;
char* yy_full_match;
int* yy_full_state;
int yy_full_lp;
int yy_lp;
int yy_looking_for_trail_begin;
int yy_more_flag;
int yy_more_len;
};
}
#endif

57
usr.bin/lex/Makefile Normal file
View File

@ -0,0 +1,57 @@
# $Id$
#
# By default, flex will be configured to generate 8-bit scanners only if the
# -8 flag is given. If you want it to always generate 8-bit scanners, add
# "-DDEFAULT_CSIZE=256" to CFLAGS. Note that doing so will double the size
# of all uncompressed scanners.
#
# Bootstrapping of lex is handled automatically.
# ALso note that flex.skel no longer gets installed.
#
# XXX Todo:
# Install as lex++, and install FlexLexer.h
PROG= lex
LINKS= ${BINDIR}/lex ${BINDIR}/flex
#LINKS+= ${BINDIR}/lex ${BINDIR}/lex++ ${BINDIR}/flex ${BINDIR}/flex++
SRCS= ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.y \
skel.c sym.c tblcmp.c yylex.c
OBJS+= scan.o
LFLAGS+= -is
CFLAGS+= -I. -I${.CURDIR}
MAN1= flex.1 flexdoc.1
MLINKS= flex.1 lex.1 flexdoc.1 lexdoc.1
CLEANFILES+= parse.c parse.h scan.c y.tab.h
SUBDIR= lib
.depend: parse.h
parse.c parse.h: parse.y
$(YACC) -d $(.CURDIR)/parse.y
mv y.tab.c parse.c
mv y.tab.h parse.h
.if exists(/usr/bin/lex)
scan.o: parse.c
.else
# We must bootstrap
scan.o: scan.c parse.h
scan.c:
@echo "Bootstrapping flex"
@rm -f scan.c
@cp -pf ${.CURDIR}/initscan.c scan.c
.endif
test: check
check: $(PROG)
./$(PROG) $(LFLAGS) -t $(COMPRESSION) $(.CURDIR)/scan.l \
| sed s,\"$(.CURDIR)/scan.l",\"scan.l", \
| diff $(.CURDIR)/initscan.c -
@echo "Check successful"
.include <bsd.prog.mk>

703
usr.bin/lex/NEWS Normal file
View File

@ -0,0 +1,703 @@
Changes between release 2.4.7 (03Aug94) and release 2.4.6:
- Fixed serious bug in reading multiple files.
- Fixed bug in scanning NUL's.
- Fixed bug in input() returning 8-bit characters.
- Fixed bug in matching text with embedded NUL's when
using %array or lex compatibility.
- Fixed multiple invocations of YY_USER_ACTION when using '|'
continuation action.
- Minor prototyping fixes.
Changes between release 2.4.6 (04Jan94) and release 2.4.5:
- Linking with -lfl no longer required if your program includes
its own yywrap() and main() functions. (This change will cause
problems if you have a non-ANSI compiler on a system for which
sizeof(int) != sizeof(void*) or sizeof(int) != sizeof(size_t).)
- The use of 'extern "C++"' in FlexLexer.h has been modified to
get around an incompatibility with g++'s header files.
Changes between release 2.4.5 (11Dec93) and release 2.4.4:
- Fixed bug breaking C++ scanners that use REJECT or variable
trailing context.
- Fixed serious input problem for interactive scanners on
systems for which char is unsigned.
- Fixed bug in incorrectly treating '$' operator as variable
trailing context.
- Fixed bug in -CF table representation that could lead to
corrupt tables.
- Fixed fairly benign memory leak.
- Added `extern "C++"' wrapper to FlexLexer.h header. This
should overcome the g++ 2.5.X problems mentioned in the
NEWS for release 2.4.3.
- Changed #include of FlexLexer.h to use <> instead of "".
- Added feature to control whether the scanner attempts to
refill the input buffer once it's exhausted. This feature
will be documented in the 2.5 release.
Changes between release 2.4.4 (07Dec93) and release 2.4.3:
- Fixed two serious bugs in scanning 8-bit characters.
- Fixed bug in YY_USER_ACTION that caused it to be executed
inappropriately (on the scanner's own internal actions, and
with incorrect yytext/yyleng values).
- Fixed bug in pointing yyin at a new file and resuming scanning.
- Portability fix regarding min/max/abs macros conflicting with
function definitions in standard header files.
- Added a virtual LexerError() method to the C++ yyFlexLexer class
for reporting error messages instead of always using cerr.
- Added warning in flexdoc that the C++ scanning class is presently
experimental and subject to considerable change between major
releases.
Changes between release 2.4.3 (03Dec93) and release 2.4.2:
- Fixed bug causing fatal scanner messages to fail to print.
- Fixed things so FlexLexer.h can be included in other C++
sources. One side-effect of this change is that -+ and -CF
are now incompatible.
- libfl.a now supplies private versions of the the <string.h>/
<strings.h> string routines needed by flex and the scanners
it generates, to enhance portability to some BSD systems.
- More robust solution to 2.4.2's flexfatal() bug fix.
- Added ranlib of installed libfl.a.
- Some lint tweaks.
- NOTE: problems have been encountered attempting to build flex
C++ scanners using g++ version 2.5.X. The problem is due to an
unfortunate heuristic in g++ 2.5.X that attempts to discern between
C and C++ headers. Because FlexLexer.h is installed (by default)
in /usr/local/include and not /usr/local/lib/g++-include, g++ 2.5.X
decides that it's a C header :-(. So if you have problems, install
the header in /usr/local/lib/g++-include instead.
Changes between release 2.4.2 (01Dec93) and release 2.4.1:
- Fixed bug in libfl.a referring to non-existent "flexfatal" function.
- Modified to produce both compress'd and gzip'd tar files for
distributions (you probably don't care about this change!).
Changes between release 2.4.1 (30Nov93) and release 2.3.8:
- The new '-+' flag instructs flex to generate a C++ scanner class
(thanks to Kent Williams). flex writes an implementation of the
class defined in FlexLexer.h to lex.yy.cc. You may include
multiple scanner classes in your program using the -P flag. Note
that the scanner class also provides a mechanism for creating
reentrant scanners. The scanner class uses C++ streams for I/O
instead of FILE*'s (thanks to Tom Epperly). If the flex executable's
name ends in '+' then the '-+' flag is automatically on, so creating
a symlink or copy of "flex" to "flex++" results in a version of
flex that can be used exclusively for C++ scanners.
Note that without the '-+' flag, flex-generated scanners can still
be compiled using C++ compilers, though they use FILE*'s for I/O
instead of streams.
See the "GENERATING C++ SCANNERS" section of flexdoc for details.
- The new '-l' flag turns on maximum AT&T lex compatibility. In
particular, -l includes support for "yylineno" and makes yytext
be an array instead of a pointer. It does not, however, do away
with all incompatibilities. See the "INCOMPATIBILITIES WITH LEX
AND POSIX" section of flexdoc for details.
- The new '-P' option specifies a prefix to use other than "yy"
for the scanner's globally-visible variables, and for the
"lex.yy.c" filename. Using -P you can link together multiple
flex scanners in the same executable.
- The distribution includes a "texinfo" version of flexdoc.1,
contributed by Roland Pesch (thanks also to Marq Kole, who
contributed another version). It has not been brought up to
date, but reflects version 2.3. See MISC/flex.texinfo.
The flex distribution will soon include G.T. Nicol's flex
manual; he is presently bringing it up-to-date for version 2.4.
- yywrap() is now a function, and you now *must* link flex scanners
with libfl.a.
- Site-configuration is now done via an autoconf-generated
"configure" script contributed by Francois Pinard.
- Scanners now use fread() (or getc(), if interactive) and not
read() for input. A new "table compression" option, -Cr,
overrides this change and causes the scanner to use read()
(because read() is a bit faster than fread()). -f and -F
are now equivalent to -Cfr and -CFr; i.e., they imply the
-Cr option.
- In the blessed name of POSIX compliance, flex supports "%array"
and "%pointer" directives in the definitions (first) section of
the scanner specification. The former specifies that yytext
should be an array (of size YYLMAX), the latter, that it should
be a pointer. The array version of yytext is universally slower
than the pointer version, but has the advantage that its contents
remain unmodified across calls to input() and unput() (the pointer
version of yytext is, still, trashed by such calls).
"%array" cannot be used with the '-+' C++ scanner class option.
- The new '-Ca' option directs flex to trade off memory for
natural alignment when generating a scanner's tables. In
particular, table entries that would otherwise be "short"
become "long".
- The new '-h' option produces a summary of the flex flags.
- The new '-V' option reports the flex version number and exits.
- The new scanner macro YY_START returns an integer value
corresponding to the current start condition. You can return
to that start condition by passing the value to a subsequent
"BEGIN" action. You also can implement "start condition stacks"
by storing the values in an integer stack.
- You can now redefine macros such as YY_INPUT by just #define'ing
them to some other value in the first section of the flex input;
no need to first #undef them.
- flex now generates warnings for rules that can't be matched.
These warnings can be turned off using the new '-w' flag. If
your scanner uses REJECT then you will not get these warnings.
- If you specify the '-s' flag but the default rule can be matched,
flex now generates a warning.
- "yyleng" is now a global, and may be modified by the user (though
doing so and then using yymore() will yield weird results).
- Name definitions in the first section of a scanner specification
can now include a leading '^' or trailing '$' operator. In this
case, the definition is *not* pushed back inside of parentheses.
- Scanners with compressed tables are now "interactive" (-I option)
by default. You can suppress this attribute (which makes them
run slightly slower) using the new '-B' flag.
- Flex now generates 8-bit scanners by default, unless you use the
-Cf or -CF compression options (-Cfe and -CFe result in 8-bit
scanners). You can force it to generate a 7-bit scanner using
the new '-7' flag. You can build flex to generate 8-bit scanners
for -Cf and -CF, too, by adding -DDEFAULT_CSIZE=256 to CFLAGS
in the Makefile.
- You no longer need to call the scanner routine yyrestart() to
inform the scanner that you have switched to a new file after
having seen an EOF on the current input file. Instead, just
point yyin at the new file and continue scanning.
- You no longer need to invoke YY_NEW_FILE in an <<EOF>> action
to indicate you wish to continue scanning. Simply point yyin
at a new file.
- A leading '#' no longer introduces a comment in a flex input.
- flex no longer considers formfeed ('\f') a whitespace character.
- %t, I'm happy to report, has been nuked.
- The '-p' option may be given twice ('-pp') to instruct flex to
report minor performance problems as well as major ones.
- The '-v' verbose output no longer includes start/finish time
information.
- Newlines in flex inputs can optionally include leading or
trailing carriage-returns ('\r'), in support of several PC/Mac
run-time libraries that automatically include these.
- A start condition of the form "<*>" makes the following rule
active in every start condition, whether exclusive or inclusive.
- The following items have been corrected in the flex documentation:
- '-C' table compression options *are* cumulative.
- You may modify yytext but not lengthen it by appending
characters to the end. Modifying its final character
will affect '^' anchoring for the next rule matched
if the character is changed to or from a newline.
- The term "backtracking" has been renamed "backing up",
since it is a one-time repositioning and not a repeated
search. What used to be the "lex.backtrack" file is now
"lex.backup".
- Unindented "/* ... */" comments are allowed in the first
flex input section, but not in the second.
- yyless() can only be used in the flex input source, not
externally.
- You can use "yyrestart(yyin)" to throw away the
current contents of the input buffer.
- To write high-speed scanners, attempt to match as much
text as possible with each rule. See MISC/fastwc/README
for more information.
- Using the beginning-of-line operator ('^') is fairly
cheap. Using unput() is expensive. Using yyless() is
cheap.
- An example of scanning strings with embedded escape
sequences has been added.
- The example of backing-up in flexdoc was erroneous; it
has been corrected.
- A flex scanner's internal buffer now dynamically grows if needed
to match large tokens. Note that growing the buffer presently
requires rescanning the (large) token, so consuming a lot of
text this way is a slow process. Also note that presently the
buffer does *not* grow if you unput() more text than can fit
into the buffer.
- The MISC/ directory has been reorganized; see MISC/README for
details.
- yyless() can now be used in the third (user action) section
of a scanner specification, thanks to Ceriel Jacobs. yyless()
remains a macro and cannot be used outside of the scanner source.
- The skeleton file is no longer opened at run-time, but instead
compiled into a large string array (thanks to John Gilmore and
friends at Cygnus). You can still use the -S flag to point flex
at a different skeleton file, though if you use this option let
me know, as I plan to otherwise do away with -S in the near
future.
- flex no longer uses a temporary file to store the scanner's
actions.
- A number of changes have been made to decrease porting headaches.
In particular, flex no longer uses memset() or ctime(), and
provides a single simple mechanism for dealing with C compilers
that still define malloc() as returning char* instead of void*.
- Flex now detects if the scanner specification requires the -8 flag
but the flag was not given or on by default.
- A number of table-expansion fencepost bugs have been fixed,
making flex more robust for generating large scanners.
- flex more consistently identifies the location of errors in
its input.
- YY_USER_ACTION is now invoked only for "real" actions, not for
internal actions used by the scanner for things like filling
the buffer or handling EOF.
- The rule "[^]]" now matches any character other than a ']';
formerly it matched any character at all followed by a ']'.
This change was made for compatibility with AT&T lex.
- A large number of miscellaneous bugs have been found and fixed
thanks to Gerhard Wilhelms.
- The source code has been heavily reformatted, making patches
relative to previous flex releases no longer accurate.
Changes between 2.3 Patch #8 (21Feb93) and 2.3 Patch #7:
- Fixed bugs in dynamic memory allocation leading to grievous
fencepost problems when generating large scanners.
- Fixed bug causing infinite loops on character classes with 8-bit
characters in them.
- Fixed bug in matching repetitions with a lower bound of 0.
- Fixed bug in scanning NUL characters using an "interactive" scanner.
- Fixed bug in using yymore() at the end of a file.
- Fixed bug in misrecognizing rules with variable trailing context.
- Fixed bug compiling flex on Suns using gcc 2.
- Fixed bug in not recognizing that input files with the character
ASCII 128 in them require the -8 flag.
- Fixed bug that could cause an infinite loop writing out
error messages.
- Fixed bug in not recognizing old-style lex % declarations if
followed by a tab instead of a space.
- Fixed potential crash when flex terminated early (usually due
to a bad flag) and the -v flag had been given.
- Added some missing declarations of void functions.
- Changed to only use '\a' for __STDC__ compilers.
- Updated mailing addresses.
Changes between 2.3 Patch #7 (28Mar91) and 2.3 Patch #6:
- Fixed out-of-bounds array access that caused bad tables
to be produced on machines where the bad reference happened
to yield a 1. This caused problems installing or running
flex on some Suns, in particular.
Changes between 2.3 Patch #6 (29Aug90) and 2.3 Patch #5:
- Fixed a serious bug in yymore() which basically made it
completely broken. Thanks goes to Jean Christophe of
the Nethack development team for finding the problem
and passing along the fix.
Changes between 2.3 Patch #5 (16Aug90) and 2.3 Patch #4:
- An up-to-date version of initscan.c so "make test" will
work after applying the previous patches
Changes between 2.3 Patch #4 (14Aug90) and 2.3 Patch #3:
- Fixed bug in hexadecimal escapes which allowed only digits,
not letters, in escapes
- Fixed bug in previous "Changes" file!
Changes between 2.3 Patch #3 (03Aug90) and 2.3 Patch #2:
- Correction to patch #2 for gcc compilation; thanks goes to
Paul Eggert for catching this.
Changes between 2.3 Patch #2 (02Aug90) and original 2.3 release:
- Fixed (hopefully) headaches involving declaring malloc()
and free() for gcc, which defines __STDC__ but (often) doesn't
come with the standard include files such as <stdlib.h>.
Reordered #ifdef maze in the scanner skeleton in the hope of
getting the declarations right for cfront and g++, too.
- Note that this patch supercedes patch #1 for release 2.3,
which was never announced but was available briefly for
anonymous ftp.
Changes between 2.3 (full) release of 28Jun90 and 2.2 (alpha) release:
User-visible:
- A lone <<EOF>> rule (that is, one which is not qualified with
a list of start conditions) now specifies the EOF action for
*all* start conditions which haven't already had <<EOF>> actions
given. To specify an end-of-file action for just the initial
state, use <INITIAL><<EOF>>.
- -d debug output is now contigent on the global yy_flex_debug
being set to a non-zero value, which it is by default.
- A new macro, YY_USER_INIT, is provided for the user to specify
initialization action to be taken on the first call to the
scanner. This action is done before the scanner does its
own initialization.
- yy_new_buffer() has been added as an alias for yy_create_buffer()
- Comments beginning with '#' and extending to the end of the line
now work, but have been deprecated (in anticipation of making
flex recognize #line directives).
- The funky restrictions on when semi-colons could follow the
YY_NEW_FILE and yyless macros have been removed. They now
behave identically to functions.
- A bug in the sample redefinition of YY_INPUT in the documentation
has been corrected.
- A bug in the sample simple tokener in the documentation has
been corrected.
- The documentation on the incompatibilities between flex and
lex has been reordered so that the discussion of yylineno
and input() come first, as it's anticipated that these will
be the most common source of headaches.
Things which didn't used to be documented but now are:
- flex interprets "^foo|bar" differently from lex. flex interprets
it as "match either a 'foo' or a 'bar', providing it comes at the
beginning of a line", whereas lex interprets it as "match either
a 'foo' at the beginning of a line, or a 'bar' anywhere".
- flex initializes the global "yyin" on the first call to the
scanner, while lex initializes it at compile-time.
- yy_switch_to_buffer() can be used in the yywrap() macro/routine.
- flex scanners do not use stdio for their input, and hence when
writing an interactive scanner one must explictly call fflush()
after writing out a prompt.
- flex scanner can be made reentrant (after a fashion) by using
"yyrestart( yyin );". This is useful for interactive scanners
which have interrupt handlers that long-jump out of the scanner.
- a defense of why yylineno is not supported is included, along
with a suggestion on how to convert scanners which rely on it.
Other changes:
- Prototypes and proper declarations of void routines have
been added to the flex source code, courtesy of Kevin B. Kenny.
- Routines dealing with memory allocation now use void* pointers
instead of char* - see Makefile for porting implications.
- Error-checking is now done when flex closes a file.
- Various lint tweaks were added to reduce the number of gripes.
- Makefile has been further parameterized to aid in porting.
- Support for SCO Unix added.
- Flex now sports the latest & greatest UC copyright notice
(which is only slightly different from the previous one).
- A note has been added to flexdoc.1 mentioning work in progress
on modifying flex to generate straight C code rather than a
table-driven automaton, with an email address of whom to contact
if you are working along similar lines.
Changes between 2.2 Patch #3 (30Mar90) and 2.2 Patch #2:
- fixed bug which caused -I scanners to bomb
Changes between 2.2 Patch #2 (27Mar90) and 2.2 Patch #1:
- fixed bug writing past end of input buffer in yyunput()
- fixed bug detecting NUL's at the end of a buffer
Changes between 2.2 Patch #1 (23Mar90) and 2.2 (alpha) release:
- Makefile fixes: definition of MAKE variable for systems
which don't have it; installation of flexdoc.1 along with
flex.1; fixed two bugs which could cause "bigtest" to fail.
- flex.skel fix for compiling with g++.
- README and flexdoc.1 no longer list an out-of-date BITNET address
for contacting me.
- minor typos and formatting changes to flex.1 and flexdoc.1.
Changes between 2.2 (alpha) release of March '90 and previous release:
User-visible:
- Full user documentation now available.
- Support for 8-bit scanners.
- Scanners now accept NUL's.
- A facility has been added for dealing with multiple
input buffers.
- Two manual entries now. One which fully describes flex
(rather than just its differences from lex), and the
other for quick(er) reference.
- A number of changes to bring flex closer into compliance
with the latest POSIX lex draft:
%t support
flex now accepts multiple input files and concatenates
them together to form its input
previous -c (compress) flag renamed -C
do-nothing -c and -n flags added
Any indented code or code within %{}'s in section 2 is
now copied to the output
- yyleng is now a bona fide global integer.
- -d debug information now gives the line number of the
matched rule instead of which number rule it was from
the beginning of the file.
- -v output now includes a summary of the flags used to generate
the scanner.
- unput() and yyrestart() are now globally callable.
- yyrestart() no longer closes the previous value of yyin.
- C++ support; generated scanners can be compiled with C++ compiler.
- Primitive -lfl library added, containing default main()
which calls yylex(). A number of routines currently living
in the scanner skeleton will probably migrate to here
in the future (in particular, yywrap() will probably cease
to be a macro and instead be a function in the -lfl library).
- Hexadecimal (\x) escape sequences added.
- Support for MS-DOS, VMS, and Turbo-C integrated.
- The %used/%unused operators have been deprecated. They
may go away soon.
Other changes:
- Makefile enhanced for easier testing and installation.
- The parser has been tweaked to detect some erroneous
constructions which previously were missed.
- Scanner input buffer overflow is now detected.
- Bugs with missing "const" declarations fixed.
- Out-of-date Minix/Atari patches provided.
- Scanners no longer require printf() unless FLEX_DEBUG is being used.
- A subtle input() bug has been fixed.
- Line numbers for "continued action" rules (those following
the special '|' action) are now correct.
- unput() bug fixed; had been causing problems porting flex to VMS.
- yymore() handling rewritten to fix bug with interaction
between yymore() and trailing context.
- EOF in actions now generates an error message.
- Bug involving -CFe and generating equivalence classes fixed.
- Bug which made -CF be treated as -Cf fixed.
- Support for SysV tmpnam() added.
- Unused #define's for scanner no longer generated.
- Error messages which are associated with a particular input
line are now all identified with their input line in standard
format.
- % directives which are valid to lex but not to flex are
now ignored instead of generating warnings.
- -DSYS_V flag can now also be specified -DUSG for System V
compilation.
Changes between 2.1 beta-test release of June '89 and previous release:
User-visible:
- -p flag generates a performance report to stderr. The report
consists of comments regarding features of the scanner rules
which result in slower scanners.
- -b flag generates backtracking information to lex.backtrack.
This is a list of scanner states which require backtracking
and the characters on which they do so. By adding rules
one can remove backtracking states. If all backtracking states
are eliminated, the generated scanner will run faster.
Backtracking is not yet documented in the manual entry.
- Variable trailing context now works, i.e., one can have
rules like "(foo)*/[ \t]*bletch". Some trailing context
patterns still cannot be properly matched and generate
error messages. These are patterns where the ending of the
first part of the rule matches the beginning of the second
part, such as "zx*/xy*", where the 'x*' matches the 'x' at
the beginning of the trailing context. Lex won't get these
patterns right either.
- Faster scanners.
- End-of-file rules. The special rule "<<EOF>>" indicates
actions which are to be taken when an end-of-file is
encountered and yywrap() returns non-zero (i.e., indicates
no further files to process). See manual entry for example.
- The -r (reject used) flag is gone. flex now scans the input
for occurrences of the string "REJECT" to determine if the
action is needed. It tries to be intelligent about this but
can be fooled. One can force the presence or absence of
REJECT by adding a line in the first section of the form
"%used REJECT" or "%unused REJECT".
- yymore() has been implemented. Similarly to REJECT, flex
detects the use of yymore(), which can be overridden using
"%used" or "%unused".
- Patterns like "x{0,3}" now work (i.e., with lower-limit == 0).
- Removed '\^x' for ctrl-x misfeature.
- Added '\a' and '\v' escape sequences.
- \<digits> now works for octal escape sequences; previously
\0<digits> was required.
- Better error reporting; line numbers are associated with rules.
- yyleng is a macro; it cannot be accessed outside of the
scanner source file.
- yytext and yyleng should not be modified within a flex action.
- Generated scanners #define the name FLEX_SCANNER.
- Rules are internally separated by YY_BREAK in lex.yy.c rather
than break, to allow redefinition.
- The macro YY_USER_ACTION can be redefined to provide an action
which is always executed prior to the matched rule's action.
- yyrestart() is a new action which can be used to restart
the scanner after it has seen an end-of-file (a "real" one,
that is, one for which yywrap() returned non-zero). It takes
a FILE* argument indicating a new file to scan and sets
things up so that a subsequent call to yylex() will start
scanning that file.
- Internal scanner names all preceded by "yy_"
- lex.yy.c is deleted if errors are encountered during processing.
- Comments may be put in the first section of the input by preceding
them with '#'.
Other changes:
- Some portability-related bugs fixed, in particular for machines
with unsigned characters or sizeof( int* ) != sizeof( int ).
Also, tweaks for VMS and Microsoft C (MS-DOS), and identifiers all
trimmed to be 31 or fewer characters. Shortened file names
for dinosaur OS's. Checks for allocating > 64K memory
on 16 bit'ers. Amiga tweaks. Compiles using gcc on a Sun-3.
- Compressed and fast scanner skeletons merged.
- Skeleton header files done away with.
- Generated scanner uses prototypes and "const" for __STDC__.
- -DSV flag is now -DSYS_V for System V compilation.
- Removed all references to FTL language.
- Software now covered by BSD Copyright.
- flex will replace lex in subsequent BSD releases.

67
usr.bin/lex/README Normal file
View File

@ -0,0 +1,67 @@
This is release 2.4 of flex. See "version.h" for the exact patch-level.
See the file "NEWS" to find out what is new in this Flex release.
Read the file "INSTALL" for general installation directives. Peek near
the beginning of the file "Makefile.in" for special DEFS values. On most
systems, you can just run the "configure" script and type "make" to build
flex; then "make check" to test whether it built correctly; and if it did,
then "make install" to install it.
If you're feeling adventurous, you can also issue "make bigcheck" (be
prepared to wait a while).
Note that flex is distributed under a copyright very similar to that of
BSD Unix, and not under the GNU General Public License (GPL), except for
the "configure" script, which is covered by the GPL.
Many thanks to the 2.4 pre-testers for finding a bunch of bugs and helping
increase/test portability: Francois Pinard, Nathan Zelle, Gavin Nicol,
Chris Thewalt, and Matthew Jacob.
Please send bug reports and feedback to:
Vern Paxson
ICSD, 46A/1123
Lawrence Berkeley Laboratory
1 Cyclotron Rd.
Berkeley, CA 94720
vern@ee.lbl.gov
The flex distribution consists of the following files:
README This message
NEWS Differences between the various releases
INSTALL General installation information
COPYING flex's copyright
configure.in, configure, Makefile.in, install.sh, mkinstalldirs
elements of the "autoconf" auto-configuration process
flexdef.h, parse.y, scan.l, ccl.c, dfa.c, ecs.c, gen.c, main.c,
misc.c, nfa.c, sym.c, tblcmp.c, yylex.c
source files
version.h version of this flex release
flex.skl flex scanner skeleton
mkskel.sh script for converting flex.skl to C source file skel.c
liballoc.c
libmain.c flex library (-lfl) sources
libyywrap.c
initscan.c pre-flex'd version of scan.l
FlexLexer.h header file for C++ lexer class
flexdoc.1 full user documentation
flex.1 reference documentation
MISC/ a directory containing miscellaneous contributions.
See MISC/README for details.

149
usr.bin/lex/ccl.c Normal file
View File

@ -0,0 +1,149 @@
/* ccl - routines for character classes */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: /home/daffy/u0/vern/flex/RCS/ccl.c,v 2.9 93/09/16 20:32:14 vern Exp $ */
#include "flexdef.h"
/* ccladd - add a single character to a ccl */
void ccladd( cclp, ch )
int cclp;
int ch;
{
int ind, len, newpos, i;
check_char( ch );
len = ccllen[cclp];
ind = cclmap[cclp];
/* check to see if the character is already in the ccl */
for ( i = 0; i < len; ++i )
if ( ccltbl[ind + i] == ch )
return;
newpos = ind + len;
if ( newpos >= current_max_ccl_tbl_size )
{
current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
++num_reallocs;
ccltbl = reallocate_Character_array( ccltbl,
current_max_ccl_tbl_size );
}
ccllen[cclp] = len + 1;
ccltbl[newpos] = ch;
}
/* cclinit - return an empty ccl */
int cclinit()
{
if ( ++lastccl >= current_maxccls )
{
current_maxccls += MAX_CCLS_INCREMENT;
++num_reallocs;
cclmap = reallocate_integer_array( cclmap, current_maxccls );
ccllen = reallocate_integer_array( ccllen, current_maxccls );
cclng = reallocate_integer_array( cclng, current_maxccls );
}
if ( lastccl == 1 )
/* we're making the first ccl */
cclmap[lastccl] = 0;
else
/* The new pointer is just past the end of the last ccl.
* Since the cclmap points to the \first/ character of a
* ccl, adding the length of the ccl to the cclmap pointer
* will produce a cursor to the first free space.
*/
cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];
ccllen[lastccl] = 0;
cclng[lastccl] = 0; /* ccl's start out life un-negated */
return lastccl;
}
/* cclnegate - negate the given ccl */
void cclnegate( cclp )
int cclp;
{
cclng[cclp] = 1;
}
/* list_character_set - list the members of a set of characters in CCL form
*
* Writes to the given file a character-class representation of those
* characters present in the given CCL. A character is present if it
* has a non-zero value in the cset array.
*/
void list_character_set( file, cset )
FILE *file;
int cset[];
{
register int i;
putc( '[', file );
for ( i = 0; i < csize; ++i )
{
if ( cset[i] )
{
register int start_char = i;
putc( ' ', file );
fputs( readable_form( i ), file );
while ( ++i < csize && cset[i] )
;
if ( i - 1 > start_char )
/* this was a run */
fprintf( file, "-%s", readable_form( i - 1 ) );
putc( ' ', file );
}
}
putc( ']', file );
}

1085
usr.bin/lex/dfa.c Normal file

File diff suppressed because it is too large Load Diff

225
usr.bin/lex/ecs.c Normal file
View File

@ -0,0 +1,225 @@
/* ecs - equivalence class routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: /home/daffy/u0/vern/flex/RCS/ecs.c,v 2.9 93/12/07 10:18:20 vern Exp $ */
#include "flexdef.h"
/* ccl2ecl - convert character classes to set of equivalence classes */
void ccl2ecl()
{
int i, ich, newlen, cclp, ccls, cclmec;
for ( i = 1; i <= lastccl; ++i )
{
/* We loop through each character class, and for each character
* in the class, add the character's equivalence class to the
* new "character" class we are creating. Thus when we are all
* done, character classes will really consist of collections
* of equivalence classes
*/
newlen = 0;
cclp = cclmap[i];
for ( ccls = 0; ccls < ccllen[i]; ++ccls )
{
ich = ccltbl[cclp + ccls];
cclmec = ecgroup[ich];
if ( cclmec > 0 )
{
ccltbl[cclp + newlen] = cclmec;
++newlen;
}
}
ccllen[i] = newlen;
}
}
/* cre8ecs - associate equivalence class numbers with class members
*
* fwd is the forward linked-list of equivalence class members. bck
* is the backward linked-list, and num is the number of class members.
*
* Returned is the number of classes.
*/
int cre8ecs( fwd, bck, num )
int fwd[], bck[], num;
{
int i, j, numcl;
numcl = 0;
/* Create equivalence class numbers. From now on, ABS( bck(x) )
* is the equivalence class number for object x. If bck(x)
* is positive, then x is the representative of its equivalence
* class.
*/
for ( i = 1; i <= num; ++i )
if ( bck[i] == NIL )
{
bck[i] = ++numcl;
for ( j = fwd[i]; j != NIL; j = fwd[j] )
bck[j] = -numcl;
}
return numcl;
}
/* mkeccl - update equivalence classes based on character class xtions
*
* synopsis
* Char ccls[];
* int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
* void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
* int llsiz, int NUL_mapping );
*
* ccls contains the elements of the character class, lenccl is the
* number of elements in the ccl, fwd is the forward link-list of equivalent
* characters, bck is the backward link-list, and llsiz size of the link-list.
*
* NUL_mapping is the value which NUL (0) should be mapped to.
*/
void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
Char ccls[];
int lenccl, fwd[], bck[], llsiz, NUL_mapping;
{
int cclp, oldec, newec;
int cclm, i, j;
static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */
/* Note that it doesn't matter whether or not the character class is
* negated. The same results will be obtained in either case.
*/
cclp = 0;
while ( cclp < lenccl )
{
cclm = ccls[cclp];
if ( NUL_mapping && cclm == 0 )
cclm = NUL_mapping;
oldec = bck[cclm];
newec = cclm;
j = cclp + 1;
for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
{ /* look for the symbol in the character class */
for ( ; j < lenccl; ++j )
{
register int ccl_char;
if ( NUL_mapping && ccls[j] == 0 )
ccl_char = NUL_mapping;
else
ccl_char = ccls[j];
if ( ccl_char > i )
break;
if ( ccl_char == i && ! cclflags[j] )
{
/* We found an old companion of cclm
* in the ccl. Link it into the new
* equivalence class and flag it as
* having been processed.
*/
bck[i] = newec;
fwd[newec] = i;
newec = i;
/* Set flag so we don't reprocess. */
cclflags[j] = 1;
/* Get next equivalence class member. */
/* continue 2 */
goto next_pt;
}
}
/* Symbol isn't in character class. Put it in the old
* equivalence class.
*/
bck[i] = oldec;
if ( oldec != NIL )
fwd[oldec] = i;
oldec = i;
next_pt: ;
}
if ( bck[cclm] != NIL || oldec != bck[cclm] )
{
bck[cclm] = NIL;
fwd[oldec] = NIL;
}
fwd[newec] = NIL;
/* Find next ccl member to process. */
for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
{
/* Reset "doesn't need processing" flag. */
cclflags[cclp] = 0;
}
}
}
/* mkechar - create equivalence class for single character */
void mkechar( tch, fwd, bck )
int tch, fwd[], bck[];
{
/* If until now the character has been a proper subset of
* an equivalence class, break it away to create a new ec
*/
if ( fwd[tch] != NIL )
bck[fwd[tch]] = bck[tch];
if ( bck[tch] != NIL )
fwd[bck[tch]] = fwd[tch];
fwd[tch] = NIL;
bck[tch] = NIL;
}

1001
usr.bin/lex/flex.1 Normal file

File diff suppressed because it is too large Load Diff

1225
usr.bin/lex/flex.skl Normal file

File diff suppressed because it is too large Load Diff

902
usr.bin/lex/flexdef.h Normal file
View File

@ -0,0 +1,902 @@
/* flexdef - definitions file for flex */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* @(#) $Header: flexdef.h,v 1.2 94/01/04 14:33:14 vern Exp $ (LBL) */
#include <stdio.h>
#include <ctype.h>
#if HAVE_STRING_H
#include <string.h>
#else
#include <strings.h>
#endif
#if __STDC__
#include <stdlib.h>
#endif
/* Always be prepared to generate an 8-bit scanner. */
#define CSIZE 256
#define Char unsigned char
/* Size of input alphabet - should be size of ASCII set. */
#ifndef DEFAULT_CSIZE
#define DEFAULT_CSIZE 128
#endif
#ifndef PROTO
#ifdef __STDC__
#define PROTO(proto) proto
#else
#define PROTO(proto) ()
#endif
#endif
#ifdef VMS
#define unlink delete
#define SHORT_FILE_NAMES
#endif
#ifdef MS_DOS
#define SHORT_FILE_NAMES
#endif
/* Maximum line length we'll have to deal with. */
#define MAXLINE 2048
#ifndef MIN
#define MIN(x,y) ((x) < (y) ? (x) : (y))
#endif
#ifndef MAX
#define MAX(x,y) ((x) > (y) ? (x) : (y))
#endif
#ifndef ABS
#define ABS(x) ((x) < 0 ? -(x) : (x))
#endif
/* ANSI C does not guarantee that isascii() is defined */
#ifndef isascii
#define isascii(c) ((c) <= 0177)
#endif
#define true 1
#define false 0
/* Special chk[] values marking the slots taking by end-of-buffer and action
* numbers.
*/
#define EOB_POSITION -1
#define ACTION_POSITION -2
/* Number of data items per line for -f output. */
#define NUMDATAITEMS 10
/* Number of lines of data in -f output before inserting a blank line for
* readability.
*/
#define NUMDATALINES 10
/* Transition_struct_out() definitions. */
#define TRANS_STRUCT_PRINT_LENGTH 15
/* Returns true if an nfa state has an epsilon out-transition slot
* that can be used. This definition is currently not used.
*/
#define FREE_EPSILON(state) \
(transchar[state] == SYM_EPSILON && \
trans2[state] == NO_TRANSITION && \
finalst[state] != state)
/* Returns true if an nfa state has an epsilon out-transition character
* and both slots are free
*/
#define SUPER_FREE_EPSILON(state) \
(transchar[state] == SYM_EPSILON && \
trans1[state] == NO_TRANSITION) \
/* Maximum number of NFA states that can comprise a DFA state. It's real
* big because if there's a lot of rules, the initial state will have a
* huge epsilon closure.
*/
#define INITIAL_MAX_DFA_SIZE 750
#define MAX_DFA_SIZE_INCREMENT 750
/* A note on the following masks. They are used to mark accepting numbers
* as being special. As such, they implicitly limit the number of accepting
* numbers (i.e., rules) because if there are too many rules the rule numbers
* will overload the mask bits. Fortunately, this limit is \large/ (0x2000 ==
* 8192) so unlikely to actually cause any problems. A check is made in
* new_rule() to ensure that this limit is not reached.
*/
/* Mask to mark a trailing context accepting number. */
#define YY_TRAILING_MASK 0x2000
/* Mask to mark the accepting number of the "head" of a trailing context
* rule.
*/
#define YY_TRAILING_HEAD_MASK 0x4000
/* Maximum number of rules, as outlined in the above note. */
#define MAX_RULE (YY_TRAILING_MASK - 1)
/* NIL must be 0. If not, its special meaning when making equivalence classes
* (it marks the representative of a given e.c.) will be unidentifiable.
*/
#define NIL 0
#define JAM -1 /* to mark a missing DFA transition */
#define NO_TRANSITION NIL
#define UNIQUE -1 /* marks a symbol as an e.c. representative */
#define INFINITY -1 /* for x{5,} constructions */
#define INITIAL_MAX_CCLS 100 /* max number of unique character classes */
#define MAX_CCLS_INCREMENT 100
/* Size of table holding members of character classes. */
#define INITIAL_MAX_CCL_TBL_SIZE 500
#define MAX_CCL_TBL_SIZE_INCREMENT 250
#define INITIAL_MAX_RULES 100 /* default maximum number of rules */
#define MAX_RULES_INCREMENT 100
#define INITIAL_MNS 2000 /* default maximum number of nfa states */
#define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */
#define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */
#define MAX_DFAS_INCREMENT 1000
#define JAMSTATE -32766 /* marks a reference to the state that always jams */
/* Enough so that if it's subtracted from an NFA state number, the result
* is guaranteed to be negative.
*/
#define MARKER_DIFFERENCE 32000
#define MAXIMUM_MNS 31999
/* Maximum number of nxt/chk pairs for non-templates. */
#define INITIAL_MAX_XPAIRS 2000
#define MAX_XPAIRS_INCREMENT 2000
/* Maximum number of nxt/chk pairs needed for templates. */
#define INITIAL_MAX_TEMPLATE_XPAIRS 2500
#define MAX_TEMPLATE_XPAIRS_INCREMENT 2500
#define SYM_EPSILON (CSIZE + 1) /* to mark transitions on the symbol epsilon */
#define INITIAL_MAX_SCS 40 /* maximum number of start conditions */
#define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */
#define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */
#define SAME_TRANS -1 /* transition is the same as "default" entry for state */
/* The following percentages are used to tune table compression:
* The percentage the number of out-transitions a state must be of the
* number of equivalence classes in order to be considered for table
* compaction by using protos.
*/
#define PROTO_SIZE_PERCENTAGE 15
/* The percentage the number of homogeneous out-transitions of a state
* must be of the number of total out-transitions of the state in order
* that the state's transition table is first compared with a potential
* template of the most common out-transition instead of with the first
* proto in the proto queue.
*/
#define CHECK_COM_PERCENTAGE 50
/* The percentage the number of differences between a state's transition
* table and the proto it was first compared with must be of the total
* number of out-transitions of the state in order to keep the first
* proto as a good match and not search any further.
*/
#define FIRST_MATCH_DIFF_PERCENTAGE 10
/* The percentage the number of differences between a state's transition
* table and the most similar proto must be of the state's total number
* of out-transitions to use the proto as an acceptable close match.
*/
#define ACCEPTABLE_DIFF_PERCENTAGE 50
/* The percentage the number of homogeneous out-transitions of a state
* must be of the number of total out-transitions of the state in order
* to consider making a template from the state.
*/
#define TEMPLATE_SAME_PERCENTAGE 60
/* The percentage the number of differences between a state's transition
* table and the most similar proto must be of the state's total number
* of out-transitions to create a new proto from the state.
*/
#define NEW_PROTO_DIFF_PERCENTAGE 20
/* The percentage the total number of out-transitions of a state must be
* of the number of equivalence classes in order to consider trying to
* fit the transition table into "holes" inside the nxt/chk table.
*/
#define INTERIOR_FIT_PERCENTAGE 15
/* Size of region set aside to cache the complete transition table of
* protos on the proto queue to enable quick comparisons.
*/
#define PROT_SAVE_SIZE 2000
#define MSP 50 /* maximum number of saved protos (protos on the proto queue) */
/* Maximum number of out-transitions a state can have that we'll rummage
* around through the interior of the internal fast table looking for a
* spot for it.
*/
#define MAX_XTIONS_FULL_INTERIOR_FIT 4
/* Maximum number of rules which will be reported as being associated
* with a DFA state.
*/
#define MAX_ASSOC_RULES 100
/* Number that, if used to subscript an array, has a good chance of producing
* an error; should be small enough to fit into a short.
*/
#define BAD_SUBSCRIPT -32767
/* Absolute value of largest number that can be stored in a short, with a
* bit of slop thrown in for general paranoia.
*/
#define MAX_SHORT 32700
/* Declarations for global variables. */
/* Variables for symbol tables:
* sctbl - start-condition symbol table
* ndtbl - name-definition symbol table
* ccltab - character class text symbol table
*/
struct hash_entry
{
struct hash_entry *prev, *next;
char *name;
char *str_val;
int int_val;
} ;
typedef struct hash_entry **hash_table;
#define NAME_TABLE_HASH_SIZE 101
#define START_COND_HASH_SIZE 101
#define CCL_HASH_SIZE 101
extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE];
extern struct hash_entry *sctbl[START_COND_HASH_SIZE];
extern struct hash_entry *ccltab[CCL_HASH_SIZE];
/* Variables for flags:
* printstats - if true (-v), dump statistics
* syntaxerror - true if a syntax error has been found
* eofseen - true if we've seen an eof in the input file
* ddebug - if true (-d), make a "debug" scanner
* trace - if true (-T), trace processing
* nowarn - if true (-w), do not generate warnings
* spprdflt - if true (-s), suppress the default rule
* interactive - if true (-I), generate an interactive scanner
* caseins - if true (-i), generate a case-insensitive scanner
* lex_compat - if true (-l), maximize compatibility with AT&T lex
* useecs - if true (-Ce flag), use equivalence classes
* fulltbl - if true (-Cf flag), don't compress the DFA state table
* usemecs - if true (-Cm flag), use meta-equivalence classes
* fullspd - if true (-F flag), use Jacobson method of table representation
* gen_line_dirs - if true (i.e., no -L flag), generate #line directives
* performance_report - if > 0 (i.e., -p flag), generate a report relating
* to scanner performance; if > 1 (-p -p), report on minor performance
* problems, too
* backing_up_report - if true (i.e., -b flag), generate "lex.backup" file
* listing backing-up states
* C_plus_plus - if true (i.e., -+ flag), generate a C++ scanner class;
* otherwise, a standard C scanner
* long_align - if true (-Ca flag), favor long-word alignment.
* use_read - if true (-f, -F, or -Cr) then use read() for scanner input;
* otherwise, use fread().
* yytext_is_array - if true (i.e., %array directive), then declare
* yytext as a array instead of a character pointer. Nice and inefficient.
* csize - size of character set for the scanner we're generating;
* 128 for 7-bit chars and 256 for 8-bit
* yymore_used - if true, yymore() is used in input rules
* reject - if true, generate back-up tables for REJECT macro
* real_reject - if true, scanner really uses REJECT (as opposed to just
* having "reject" set for variable trailing context)
* continued_action - true if this rule's action is to "fall through" to
* the next rule's action (i.e., the '|' action)
* yymore_really_used - has a REALLY_xxx value indicating whether a
* %used or %notused was used with yymore()
* reject_really_used - same for REJECT
*/
extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
extern int interactive, caseins, lex_compat, useecs, fulltbl, usemecs;
extern int fullspd, gen_line_dirs, performance_report, backing_up_report;
extern int C_plus_plus, long_align, use_read, yytext_is_array, csize;
extern int yymore_used, reject, real_reject, continued_action;
#define REALLY_NOT_DETERMINED 0
#define REALLY_USED 1
#define REALLY_NOT_USED 2
extern int yymore_really_used, reject_really_used;
/* Variables used in the flex input routines:
* datapos - characters on current output line
* dataline - number of contiguous lines of data in current data
* statement. Used to generate readable -f output
* linenum - current input line number
* skelfile - the skeleton file
* skel - compiled-in skeleton array
* skel_ind - index into "skel" array, if skelfile is nil
* yyin - input file
* backing_up_file - file to summarize backing-up states to
* infilename - name of input file
* input_files - array holding names of input files
* num_input_files - size of input_files array
* program_name - name with which program was invoked
*
* action_array - array to hold the rule actions
* action_size - size of action_array
* defs1_offset - index where the user's section 1 definitions start
* in action_array
* prolog_offset - index where the prolog starts in action_array
* action_offset - index where the non-prolog starts in action_array
* action_index - index where the next action should go, with respect
* to "action_array"
*/
extern int datapos, dataline, linenum;
extern FILE *skelfile, *yyin, *backing_up_file;
extern char *skel[];
extern int skel_ind;
extern char *infilename;
extern char **input_files;
extern int num_input_files;
extern char *program_name;
extern char *action_array;
extern int action_size;
extern int defs1_offset, prolog_offset, action_offset, action_index;
/* Variables for stack of states having only one out-transition:
* onestate - state number
* onesym - transition symbol
* onenext - target state
* onedef - default base entry
* onesp - stack pointer
*/
extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
/* Variables for nfa machine data:
* current_mns - current maximum on number of NFA states
* num_rules - number of the last accepting state; also is number of
* rules created so far
* num_eof_rules - number of <<EOF>> rules
* default_rule - number of the default rule
* current_max_rules - current maximum number of rules
* lastnfa - last nfa state number created
* firstst - physically the first state of a fragment
* lastst - last physical state of fragment
* finalst - last logical state of fragment
* transchar - transition character
* trans1 - transition state
* trans2 - 2nd transition state for epsilons
* accptnum - accepting number
* assoc_rule - rule associated with this NFA state (or 0 if none)
* state_type - a STATE_xxx type identifying whether the state is part
* of a normal rule, the leading state in a trailing context
* rule (i.e., the state which marks the transition from
* recognizing the text-to-be-matched to the beginning of
* the trailing context), or a subsequent state in a trailing
* context rule
* rule_type - a RULE_xxx type identifying whether this a ho-hum
* normal rule or one which has variable head & trailing
* context
* rule_linenum - line number associated with rule
* rule_useful - true if we've determined that the rule can be matched
*/
extern int current_mns, num_rules, num_eof_rules, default_rule;
extern int current_max_rules, lastnfa;
extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
extern int *accptnum, *assoc_rule, *state_type;
extern int *rule_type, *rule_linenum, *rule_useful;
/* Different types of states; values are useful as masks, as well, for
* routines like check_trailing_context().
*/
#define STATE_NORMAL 0x1
#define STATE_TRAILING_CONTEXT 0x2
/* Global holding current type of state we're making. */
extern int current_state_type;
/* Different types of rules. */
#define RULE_NORMAL 0
#define RULE_VARIABLE 1
/* True if the input rules include a rule with both variable-length head
* and trailing context, false otherwise.
*/
extern int variable_trailing_context_rules;
/* Variables for protos:
* numtemps - number of templates created
* numprots - number of protos created
* protprev - backlink to a more-recently used proto
* protnext - forward link to a less-recently used proto
* prottbl - base/def table entry for proto
* protcomst - common state of proto
* firstprot - number of the most recently used proto
* lastprot - number of the least recently used proto
* protsave contains the entire state array for protos
*/
extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
/* Variables for managing equivalence classes:
* numecs - number of equivalence classes
* nextecm - forward link of Equivalence Class members
* ecgroup - class number or backward link of EC members
* nummecs - number of meta-equivalence classes (used to compress
* templates)
* tecfwd - forward link of meta-equivalence classes members
* tecbck - backward link of MEC's
*/
/* Reserve enough room in the equivalence class arrays so that we
* can use the CSIZE'th element to hold equivalence class information
* for the NUL character. Later we'll move this information into
* the 0th element.
*/
extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs;
/* Meta-equivalence classes are indexed starting at 1, so it's possible
* that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1
* slots total (since the arrays are 0-based). nextecm[] and ecgroup[]
* don't require the extra position since they're indexed from 1 .. CSIZE - 1.
*/
extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1];
/* Variables for start conditions:
* lastsc - last start condition created
* current_max_scs - current limit on number of start conditions
* scset - set of rules active in start condition
* scbol - set of rules active only at the beginning of line in a s.c.
* scxclu - true if start condition is exclusive
* sceof - true if start condition has EOF rule
* scname - start condition name
* actvsc - stack of active start conditions for the current rule;
* a negative entry means that the start condition is *not*
* active for the current rule. Start conditions may appear
* multiple times on the stack; the entry for it closest
* to the top of the stack (i.e., actvsc[actvp]) is the
* one to use. Others are present from "<sc>{" scoping
* constructs.
*/
extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
extern char **scname;
/* Variables for dfa machine data:
* current_max_dfa_size - current maximum number of NFA states in DFA
* current_max_xpairs - current maximum number of non-template xtion pairs
* current_max_template_xpairs - current maximum number of template pairs
* current_max_dfas - current maximum number DFA states
* lastdfa - last dfa state number created
* nxt - state to enter upon reading character
* chk - check value to see if "nxt" applies
* tnxt - internal nxt table for templates
* base - offset into "nxt" for given state
* def - where to go if "chk" disallows "nxt" entry
* nultrans - NUL transition for each state
* NUL_ec - equivalence class of the NUL character
* tblend - last "nxt/chk" table entry being used
* firstfree - first empty entry in "nxt/chk" table
* dss - nfa state set for each dfa
* dfasiz - size of nfa state set for each dfa
* dfaacc - accepting set for each dfa state (if using REJECT), or accepting
* number, if not
* accsiz - size of accepting set for each dfa state
* dhash - dfa state hash value
* numas - number of DFA accepting states created; note that this
* is not necessarily the same value as num_rules, which is the analogous
* value for the NFA
* numsnpairs - number of state/nextstate transition pairs
* jambase - position in base/def where the default jam table starts
* jamstate - state number corresponding to "jam" state
* end_of_buffer_state - end-of-buffer dfa state number
*/
extern int current_max_dfa_size, current_max_xpairs;
extern int current_max_template_xpairs, current_max_dfas;
extern int lastdfa, *nxt, *chk, *tnxt;
extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz;
extern union dfaacc_union
{
int *dfaacc_set;
int dfaacc_state;
} *dfaacc;
extern int *accsiz, *dhash, numas;
extern int numsnpairs, jambase, jamstate;
extern int end_of_buffer_state;
/* Variables for ccl information:
* lastccl - ccl index of the last created ccl
* current_maxccls - current limit on the maximum number of unique ccl's
* cclmap - maps a ccl index to its set pointer
* ccllen - gives the length of a ccl
* cclng - true for a given ccl if the ccl is negated
* cclreuse - counts how many times a ccl is re-used
* current_max_ccl_tbl_size - current limit on number of characters needed
* to represent the unique ccl's
* ccltbl - holds the characters in each ccl - indexed by cclmap
*/
extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
extern int current_max_ccl_tbl_size;
extern Char *ccltbl;
/* Variables for miscellaneous information:
* nmstr - last NAME scanned by the scanner
* sectnum - section number currently being parsed
* nummt - number of empty nxt/chk table entries
* hshcol - number of hash collisions detected by snstods
* dfaeql - number of times a newly created dfa was equal to an old one
* numeps - number of epsilon NFA states created
* eps2 - number of epsilon states which have 2 out-transitions
* num_reallocs - number of times it was necessary to realloc() a group
* of arrays
* tmpuses - number of DFA states that chain to templates
* totnst - total number of NFA states used to make DFA states
* peakpairs - peak number of transition pairs we had to store internally
* numuniq - number of unique transitions
* numdup - number of duplicate transitions
* hshsave - number of hash collisions saved by checking number of states
* num_backing_up - number of DFA states requiring backing up
* bol_needed - whether scanner needs beginning-of-line recognition
*/
extern char nmstr[MAXLINE];
extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
extern int num_backing_up, bol_needed;
void *allocate_array PROTO((int, int));
void *reallocate_array PROTO((void*, int, int));
void *flex_alloc PROTO((unsigned int));
void *flex_realloc PROTO((void*, unsigned int));
void flex_free PROTO((void*));
#define allocate_integer_array(size) \
(int *) allocate_array( size, sizeof( int ) )
#define reallocate_integer_array(array,size) \
(int *) reallocate_array( (void *) array, size, sizeof( int ) )
#define allocate_int_ptr_array(size) \
(int **) allocate_array( size, sizeof( int * ) )
#define allocate_char_ptr_array(size) \
(char **) allocate_array( size, sizeof( char * ) )
#define allocate_dfaacc_union(size) \
(union dfaacc_union *) \
allocate_array( size, sizeof( union dfaacc_union ) )
#define reallocate_int_ptr_array(array,size) \
(int **) reallocate_array( (void *) array, size, sizeof( int * ) )
#define reallocate_char_ptr_array(array,size) \
(char **) reallocate_array( (void *) array, size, sizeof( char * ) )
#define reallocate_dfaacc_union(array, size) \
(union dfaacc_union *) \
reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) )
#define allocate_character_array(size) \
(char *) allocate_array( size, sizeof( char ) )
#define reallocate_character_array(array,size) \
(char *) reallocate_array( (void *) array, size, sizeof( char ) )
#define allocate_Character_array(size) \
(Char *) allocate_array( size, sizeof( Char ) )
#define reallocate_Character_array(array,size) \
(Char *) reallocate_array( (void *) array, size, sizeof( Char ) )
/* Used to communicate between scanner and parser. The type should really
* be YYSTYPE, but we can't easily get our hands on it.
*/
extern int yylval;
/* External functions that are cross-referenced among the flex source files. */
/* from file ccl.c */
extern void ccladd PROTO((int, int)); /* add a single character to a ccl */
extern int cclinit PROTO((void)); /* make an empty ccl */
extern void cclnegate PROTO((int)); /* negate a ccl */
/* List the members of a set of characters in CCL form. */
extern void list_character_set PROTO((FILE*, int[]));
/* from file dfa.c */
/* Increase the maximum number of dfas. */
extern void increase_max_dfas PROTO((void));
extern void ntod PROTO((void)); /* convert a ndfa to a dfa */
/* from file ecs.c */
/* Convert character classes to set of equivalence classes. */
extern void ccl2ecl PROTO((void));
/* Associate equivalence class numbers with class members. */
extern int cre8ecs PROTO((int[], int[], int));
/* Update equivalence classes based on character class transitions. */
extern void mkeccl PROTO((Char[], int, int[], int[], int, int));
/* Create equivalence class for single character. */
extern void mkechar PROTO((int, int[], int[]));
/* from file gen.c */
extern void make_tables PROTO((void)); /* generate transition tables */
/* from file main.c */
extern void flexend PROTO((int));
extern void usage PROTO((void));
/* from file misc.c */
/* Add the given text to the stored actions. */
extern void add_action PROTO(( char *new_text ));
/* True if a string is all lower case. */
extern int all_lower PROTO((register char *));
/* True if a string is all upper case. */
extern int all_upper PROTO((register char *));
/* Bubble sort an integer array. */
extern void bubble PROTO((int [], int));
/* Check a character to make sure it's in the expected range. */
extern void check_char PROTO((int c));
/* Shell sort a character array. */
extern void cshell PROTO((Char [], int, int));
/* Finish up a block of data declarations. */
extern void dataend PROTO((void));
/* Report an error message and terminate. */
extern void flexerror PROTO((char[]));
/* Report a fatal error message and terminate. */
extern void flexfatal PROTO((char[]));
/* Report an error message formatted with one integer argument. */
extern void lerrif PROTO((char[], int));
/* Report an error message formatted with one string argument. */
extern void lerrsf PROTO((char[], char[]));
/* Spit out a "# line" statement. */
extern void line_directive_out PROTO((FILE*));
/* Mark the current position in the action array as the end of the section 1
* user defs.
*/
extern void mark_defs1 PROTO((void));
/* Mark the current position in the action array as the end of the prolog. */
extern void mark_prolog PROTO((void));
/* Generate a data statment for a two-dimensional array. */
extern void mk2data PROTO((int));
extern void mkdata PROTO((int)); /* generate a data statement */
/* Return the integer represented by a string of digits. */
extern int myctoi PROTO((char []));
/* Return a printable version of the given character, which might be
* 8-bit.
*/
extern char *readable_form PROTO((int));
/* Write out one section of the skeleton file. */
extern void skelout PROTO((void));
/* Output a yy_trans_info structure. */
extern void transition_struct_out PROTO((int, int));
/* Only needed when using certain broken versions of bison to build parse.c. */
extern void *yy_flex_xmalloc PROTO(( int ));
/* Set a region of memory to 0. */
extern void zero_out PROTO((char *, int));
/* from file nfa.c */
/* Add an accepting state to a machine. */
extern void add_accept PROTO((int, int));
/* Make a given number of copies of a singleton machine. */
extern int copysingl PROTO((int, int));
/* Debugging routine to write out an nfa. */
extern void dumpnfa PROTO((int));
/* Finish up the processing for a rule. */
extern void finish_rule PROTO((int, int, int, int));
/* Connect two machines together. */
extern int link_machines PROTO((int, int));
/* Mark each "beginning" state in a machine as being a "normal" (i.e.,
* not trailing context associated) state.
*/
extern void mark_beginning_as_normal PROTO((register int));
/* Make a machine that branches to two machines. */
extern int mkbranch PROTO((int, int));
extern int mkclos PROTO((int)); /* convert a machine into a closure */
extern int mkopt PROTO((int)); /* make a machine optional */
/* Make a machine that matches either one of two machines. */
extern int mkor PROTO((int, int));
/* Convert a machine into a positive closure. */
extern int mkposcl PROTO((int));
extern int mkrep PROTO((int, int, int)); /* make a replicated machine */
/* Create a state with a transition on a given symbol. */
extern int mkstate PROTO((int));
extern void new_rule PROTO((void)); /* initialize for a new rule */
/* from file parse.y */
/* Write out a message formatted with one string, pinpointing its location. */
extern void format_pinpoint_message PROTO((char[], char[]));
/* Write out a message, pinpointing its location. */
extern void pinpoint_message PROTO((char[]));
/* Write out a warning, pinpointing it at the given line. */
void line_warning PROTO(( char[], int ));
/* Write out a message, pinpointing it at the given line. */
void line_pinpoint PROTO(( char[], int ));
/* Report a formatted syntax error. */
extern void format_synerr PROTO((char [], char[]));
extern void synerr PROTO((char [])); /* report a syntax error */
extern void warn PROTO((char [])); /* report a warning */
extern int yyparse PROTO((void)); /* the YACC parser */
/* from file scan.l */
/* The Flex-generated scanner for flex. */
extern int flexscan PROTO((void));
/* Open the given file (if NULL, stdin) for scanning. */
extern void set_input_file PROTO((char*));
/* Wrapup a file in the lexical analyzer. */
extern int yywrap PROTO((void));
/* from file sym.c */
/* Save the text of a character class. */
extern void cclinstal PROTO ((Char [], int));
/* Lookup the number associated with character class. */
extern int ccllookup PROTO((Char []));
extern void ndinstal PROTO((char[], Char[])); /* install a name definition */
/* Increase maximum number of SC's. */
extern void scextend PROTO((void));
extern void scinstal PROTO((char[], int)); /* make a start condition */
/* Lookup the number associated with a start condition. */
extern int sclookup PROTO((char[]));
/* from file tblcmp.c */
/* Build table entries for dfa state. */
extern void bldtbl PROTO((int[], int, int, int, int));
extern void cmptmps PROTO((void)); /* compress template table entries */
extern void expand_nxt_chk PROTO((void)); /* increase nxt/chk arrays */
extern void inittbl PROTO((void)); /* initialize transition tables */
/* Make the default, "jam" table entries. */
extern void mkdeftbl PROTO((void));
/* Create table entries for a state (or state fragment) which has
* only one out-transition.
*/
extern void mk1tbl PROTO((int, int, int, int));
/* Place a state into full speed transition table. */
extern void place_state PROTO((int*, int, int));
/* Save states with only one out-transition to be processed later. */
extern void stack1 PROTO((int, int, int, int));
/* from file yylex.c */
extern int yylex PROTO((void));

3045
usr.bin/lex/flexdoc.1 Normal file

File diff suppressed because it is too large Load Diff

1461
usr.bin/lex/gen.c Normal file

File diff suppressed because it is too large Load Diff

2723
usr.bin/lex/initscan.c Normal file

File diff suppressed because it is too large Load Diff

20
usr.bin/lex/lib/Makefile Normal file
View File

@ -0,0 +1,20 @@
# $Id$
LIB= ln
SRCS= libmain.c libyywrap.c
LINKS= ${LIBDIR}/libln.a ${LIBDIR}/libl.a \
.if !defined(NOPROFILE)
LINKS+= ${LIBDIR}/libln_p.a ${LIBDIR}/libl_p.a
.endif
.if !defined(NOSHARED)
LINKS+= ${LIBDIR}/libln.so.$(SHLIB_MAJOR).$(SHLIB_MINOR) \
${LIBDIR}/libl.so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
.endif
#This is where we get our SHLIB_MAJOR and SHLIB_MINOR
.include "${.CURDIR}/../../../lib/Makefile.inc"
.include <bsd.lib.mk>

12
usr.bin/lex/lib/libmain.c Normal file
View File

@ -0,0 +1,12 @@
/* libmain - flex run-time support library "main" function */
/* $Header: /home/daffy/u0/vern/flex/RCS/libmain.c,v 1.3 93/04/14 22:41:55 vern Exp $ */
extern int yylex();
int main( argc, argv )
int argc;
char *argv[];
{
return yylex();
}

View File

@ -0,0 +1,8 @@
/* libyywrap - flex run-time support library "yywrap" function */
/* $Header: /home/daffy/u0/vern/flex/RCS/libyywrap.c,v 1.1 93/10/02 15:23:09 vern Exp $ */
int yywrap()
{
return 1;
}

989
usr.bin/lex/main.c Normal file
View File

@ -0,0 +1,989 @@
/* flex - tool to generate fast lexical analyzers */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef lint
char copyright[] =
"@(#) Copyright (c) 1990 The Regents of the University of California.\n\
All rights reserved.\n";
#endif /* not lint */
/* $Header: main.c,v 1.2 94/01/04 14:33:11 vern Exp $ */
#include "flexdef.h"
#include "version.h"
static char flex_version[] = FLEX_VERSION;
/* declare functions that have forward references */
void flexinit PROTO((int, char**));
void readin PROTO((void));
void set_up_initial_allocations PROTO((void));
/* these globals are all defined and commented in flexdef.h */
int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
int interactive, caseins, lex_compat, useecs, fulltbl, usemecs;
int fullspd, gen_line_dirs, performance_report, backing_up_report;
int C_plus_plus, long_align, use_read, yytext_is_array, csize;
int yymore_used, reject, real_reject, continued_action;
int yymore_really_used, reject_really_used;
int datapos, dataline, linenum;
FILE *skelfile = NULL;
int skel_ind = 0;
char *action_array;
int action_size, defs1_offset, prolog_offset, action_offset, action_index;
char *infilename = NULL;
int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
int current_mns, num_rules, num_eof_rules, default_rule;
int current_max_rules, lastnfa;
int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
int *accptnum, *assoc_rule, *state_type;
int *rule_type, *rule_linenum, *rule_useful;
int current_state_type;
int variable_trailing_context_rules;
int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1];
int tecbck[CSIZE + 1];
int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
char **scname;
int current_max_dfa_size, current_max_xpairs;
int current_max_template_xpairs, current_max_dfas;
int lastdfa, *nxt, *chk, *tnxt;
int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz;
union dfaacc_union *dfaacc;
int *accsiz, *dhash, numas;
int numsnpairs, jambase, jamstate;
int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
int current_max_ccl_tbl_size;
Char *ccltbl;
char nmstr[MAXLINE];
int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
int num_backing_up, bol_needed;
FILE *backing_up_file;
int end_of_buffer_state;
char **input_files;
int num_input_files;
char *program_name;
#ifndef SHORT_FILE_NAMES
static char *outfile_template = "lex.%s.%s";
#else
static char *outfile_template = "lex%s.%s";
#endif
static char outfile_path[64];
static int outfile_created = 0;
static int use_stdout;
static char *skelname = NULL;
static char *prefix = "yy";
int main( argc, argv )
int argc;
char **argv;
{
int i;
flexinit( argc, argv );
readin();
ntod();
for ( i = 1; i <= num_rules; ++i )
if ( ! rule_useful[i] && i != default_rule )
line_warning( "rule cannot be matched",
rule_linenum[i] );
if ( spprdflt && ! reject && rule_useful[default_rule] )
line_warning( "-s option given but default rule can be matched",
rule_linenum[default_rule] );
/* Generate the C state transition tables from the DFA. */
make_tables();
/* Note, flexend does not return. It exits with its argument
* as status.
*/
flexend( 0 );
return 0; /* keep compilers/lint happy */
}
/* flexend - terminate flex
*
* note
* This routine does not return.
*/
void flexend( exit_status )
int exit_status;
{
int tblsiz;
int unlink();
if ( skelfile != NULL )
{
if ( ferror( skelfile ) )
flexfatal(
"error occurred when reading skeleton file" );
else if ( fclose( skelfile ) )
flexfatal(
"error occurred when closing skeleton file" );
}
if ( exit_status != 0 && outfile_created )
{
if ( ferror( stdout ) )
flexfatal( "error occurred when writing output file" );
else if ( fclose( stdout ) )
flexfatal( "error occurred when closing output file" );
else if ( unlink( outfile_path ) )
flexfatal( "error occurred when deleting output file" );
}
if ( backing_up_report && backing_up_file )
{
if ( num_backing_up == 0 )
fprintf( backing_up_file, "No backing up.\n" );
else if ( fullspd || fulltbl )
fprintf( backing_up_file,
"%d backing up (non-accepting) states.\n",
num_backing_up );
else
fprintf( backing_up_file,
"Compressed tables always back up.\n" );
if ( ferror( backing_up_file ) )
flexfatal( "error occurred when writing backup file" );
else if ( fclose( backing_up_file ) )
flexfatal( "error occurred when closing backup file" );
}
if ( printstats )
{
fprintf( stderr, "%s version %s usage statistics:\n",
program_name, flex_version );
fprintf( stderr, " scanner options: -" );
if ( C_plus_plus )
putc( '+', stderr );
if ( backing_up_report )
putc( 'b', stderr );
if ( ddebug )
putc( 'd', stderr );
if ( caseins )
putc( 'i', stderr );
if ( lex_compat )
putc( 'l', stderr );
if ( performance_report > 0 )
putc( 'p', stderr );
if ( performance_report > 1 )
putc( 'p', stderr );
if ( spprdflt )
putc( 's', stderr );
if ( use_stdout )
putc( 't', stderr );
if ( printstats )
putc( 'v', stderr ); /* always true! */
if ( nowarn )
putc( 'w', stderr );
if ( ! interactive )
putc( 'B', stderr );
if ( interactive )
putc( 'I', stderr );
if ( ! gen_line_dirs )
putc( 'L', stderr );
if ( trace )
putc( 'T', stderr );
if ( csize == 128 )
putc( '7', stderr );
else
putc( '8', stderr );
fprintf( stderr, " -C" );
if ( long_align )
putc( 'a', stderr );
if ( fulltbl )
putc( 'f', stderr );
if ( fullspd )
putc( 'F', stderr );
if ( useecs )
putc( 'e', stderr );
if ( usemecs )
putc( 'm', stderr );
if ( use_read )
putc( 'r', stderr );
if ( skelname )
fprintf( stderr, " -S%s", skelname );
if ( strcmp( prefix, "yy" ) )
fprintf( stderr, " -P%s", prefix );
putc( '\n', stderr );
fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns );
fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa,
current_max_dfas, totnst );
fprintf( stderr, " %d rules\n",
num_rules + num_eof_rules - 1 /* - 1 for def. rule */ );
if ( num_backing_up == 0 )
fprintf( stderr, " No backing up\n" );
else if ( fullspd || fulltbl )
fprintf( stderr,
" %d backing-up (non-accepting) states\n",
num_backing_up );
else
fprintf( stderr,
" Compressed tables always back-up\n" );
if ( bol_needed )
fprintf( stderr,
" Beginning-of-line patterns used\n" );
fprintf( stderr, " %d/%d start conditions\n", lastsc,
current_max_scs );
fprintf( stderr,
" %d epsilon states, %d double epsilon states\n",
numeps, eps2 );
if ( lastccl == 0 )
fprintf( stderr, " no character classes\n" );
else
fprintf( stderr,
" %d/%d character classes needed %d/%d words of storage, %d reused\n",
lastccl, current_maxccls,
cclmap[lastccl] + ccllen[lastccl],
current_max_ccl_tbl_size, cclreuse );
fprintf( stderr, " %d state/nextstate pairs created\n",
numsnpairs );
fprintf( stderr, " %d/%d unique/duplicate transitions\n",
numuniq, numdup );
if ( fulltbl )
{
tblsiz = lastdfa * numecs;
fprintf( stderr, " %d table entries\n", tblsiz );
}
else
{
tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend;
fprintf( stderr, " %d/%d base-def entries created\n",
lastdfa + numtemps, current_max_dfas );
fprintf( stderr,
" %d/%d (peak %d) nxt-chk entries created\n",
tblend, current_max_xpairs, peakpairs );
fprintf( stderr,
" %d/%d (peak %d) template nxt-chk entries created\n",
numtemps * nummecs, current_max_template_xpairs,
numtemps * numecs );
fprintf( stderr, " %d empty table entries\n", nummt );
fprintf( stderr, " %d protos created\n", numprots );
fprintf( stderr, " %d templates created, %d uses\n",
numtemps, tmpuses );
}
if ( useecs )
{
tblsiz = tblsiz + csize;
fprintf( stderr,
" %d/%d equivalence classes created\n",
numecs, csize );
}
if ( usemecs )
{
tblsiz = tblsiz + numecs;
fprintf( stderr,
" %d/%d meta-equivalence classes created\n",
nummecs, csize );
}
fprintf( stderr,
" %d (%d saved) hash collisions, %d DFAs equal\n",
hshcol, hshsave, dfaeql );
fprintf( stderr, " %d sets of reallocations needed\n",
num_reallocs );
fprintf( stderr, " %d total table entries needed\n", tblsiz );
}
#ifndef VMS
exit( exit_status );
#else
exit( exit_status + 1 );
#endif
}
/* flexinit - initialize flex */
void flexinit( argc, argv )
int argc;
char **argv;
{
int i, sawcmpflag;
int csize_given, interactive_given;
char *arg, *mktemp();
printstats = syntaxerror = trace = spprdflt = caseins = false;
lex_compat = false;
C_plus_plus = backing_up_report = ddebug = fulltbl = fullspd = false;
long_align = nowarn = yymore_used = continued_action = reject = false;
yytext_is_array = yymore_really_used = reject_really_used = false;
gen_line_dirs = usemecs = useecs = true;
performance_report = 0;
sawcmpflag = false;
use_read = use_stdout = false;
csize_given = false;
interactive_given = false;
/* Initialize dynamic array for holding the rule actions. */
action_size = 2048; /* default size of action array in bytes */
action_array = allocate_character_array( action_size );
defs1_offset = prolog_offset = action_offset = action_index = 0;
action_array[0] = '\0';
program_name = argv[0];
if ( program_name[0] != '\0' &&
program_name[strlen( program_name ) - 1] == '+' )
C_plus_plus = true;
/* read flags */
for ( --argc, ++argv; argc ; --argc, ++argv )
{
if ( argv[0][0] != '-' || argv[0][1] == '\0' )
break;
arg = argv[0];
for ( i = 1; arg[i] != '\0'; ++i )
switch ( arg[i] )
{
case '+':
C_plus_plus = true;
break;
case 'B':
interactive = false;
interactive_given = true;
break;
case 'b':
backing_up_report = true;
break;
case 'c':
fprintf( stderr,
"%s: Assuming use of deprecated -c flag is really intended to be -C\n",
program_name );
/* fall through */
case 'C':
if ( i != 1 )
flexerror(
"-C flag must be given separately" );
if ( ! sawcmpflag )
{
useecs = false;
usemecs = false;
fulltbl = false;
sawcmpflag = true;
}
for ( ++i; arg[i] != '\0'; ++i )
switch ( arg[i] )
{
case 'a':
long_align =
true;
break;
case 'e':
useecs = true;
break;
case 'F':
fullspd = true;
break;
case 'f':
fulltbl = true;
break;
case 'm':
usemecs = true;
break;
case 'r':
use_read = true;
break;
default:
lerrif(
"unknown -C option '%c'",
(int) arg[i] );
break;
}
goto get_next_arg;
case 'd':
ddebug = true;
break;
case 'f':
useecs = usemecs = false;
use_read = fulltbl = true;
break;
case 'F':
useecs = usemecs = false;
use_read = fullspd = true;
break;
case 'h':
usage();
exit( 0 );
case 'I':
interactive = true;
interactive_given = true;
break;
case 'i':
caseins = true;
break;
case 'l':
lex_compat = true;
break;
case 'L':
gen_line_dirs = false;
break;
case 'n':
/* Stupid do-nothing deprecated
* option.
*/
break;
case 'P':
if ( i != 1 )
flexerror(
"-P flag must be given separately" );
prefix = arg + i + 1;
goto get_next_arg;
case 'p':
++performance_report;
break;
case 'S':
if ( i != 1 )
flexerror(
"-S flag must be given separately" );
skelname = arg + i + 1;
goto get_next_arg;
case 's':
spprdflt = true;
break;
case 't':
use_stdout = true;
break;
case 'T':
trace = true;
break;
case 'v':
printstats = true;
break;
case 'V':
fprintf( stderr, "%s version %s\n",
program_name, flex_version );
exit( 0 );
case 'w':
nowarn = true;
break;
case '7':
csize = 128;
csize_given = true;
break;
case '8':
csize = CSIZE;
csize_given = true;
break;
default:
fprintf( stderr,
"%s: unknown flag '%c'\n",
program_name, (int) arg[i] );
usage();
exit( 1 );
}
/* Used by -C, -S and -P flags in lieu of a "continue 2"
* control.
*/
get_next_arg: ;
}
if ( ! csize_given )
{
if ( (fulltbl || fullspd) && ! useecs )
csize = DEFAULT_CSIZE;
else
csize = CSIZE;
}
if ( ! interactive_given )
{
if ( fulltbl || fullspd )
interactive = false;
else
interactive = true;
}
if ( lex_compat )
{
if ( C_plus_plus )
flexerror( "Can't use -+ with -l option" );
if ( fulltbl || fullspd )
flexerror( "Can't use -f or -F with -l option" );
/* Don't rely on detecting use of yymore() and REJECT,
* just assume they'll be used.
*/
yymore_really_used = reject_really_used = true;
yytext_is_array = true;
use_read = false;
}
if ( (fulltbl || fullspd) && usemecs )
flexerror( "-Cf/-CF and -Cm don't make sense together" );
if ( (fulltbl || fullspd) && interactive )
flexerror( "-Cf/-CF and -I are incompatible" );
if ( fulltbl && fullspd )
flexerror( "-Cf and -CF are mutually exclusive" );
if ( C_plus_plus && fullspd )
flexerror( "Can't use -+ with -CF option" );
if ( ! use_stdout )
{
FILE *prev_stdout;
char *suffix;
if ( C_plus_plus )
suffix = "cc";
else
suffix = "c";
sprintf( outfile_path, outfile_template, prefix, suffix );
prev_stdout = freopen( outfile_path, "w", stdout );
if ( prev_stdout == NULL )
lerrsf( "could not create %s", outfile_path );
outfile_created = 1;
}
num_input_files = argc;
input_files = argv;
set_input_file( num_input_files > 0 ? input_files[0] : NULL );
if ( backing_up_report )
{
#ifndef SHORT_FILE_NAMES
backing_up_file = fopen( "lex.backup", "w" );
#else
backing_up_file = fopen( "lex.bck", "w" );
#endif
if ( backing_up_file == NULL )
flexerror( "could not create lex.backup" );
}
else
backing_up_file = NULL;
lastccl = 0;
lastsc = 0;
if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL )
lerrsf( "can't open skeleton file %s", skelname );
if ( strcmp( prefix, "yy" ) )
{
#define GEN_PREFIX(name) printf( "#define yy%s %s%s\n", name, prefix, name );
GEN_PREFIX( "FlexLexer" );
GEN_PREFIX( "_create_buffer" );
GEN_PREFIX( "_delete_buffer" );
GEN_PREFIX( "_flex_debug" );
GEN_PREFIX( "_init_buffer" );
GEN_PREFIX( "_load_buffer_state" );
GEN_PREFIX( "_switch_to_buffer" );
GEN_PREFIX( "in" );
GEN_PREFIX( "leng" );
GEN_PREFIX( "lex" );
GEN_PREFIX( "out" );
GEN_PREFIX( "restart" );
GEN_PREFIX( "text" );
GEN_PREFIX( "wrap" );
printf( "\n" );
}
lastdfa = lastnfa = 0;
num_rules = num_eof_rules = default_rule = 0;
numas = numsnpairs = tmpuses = 0;
numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0;
numuniq = numdup = hshsave = eofseen = datapos = dataline = 0;
num_backing_up = onesp = numprots = 0;
variable_trailing_context_rules = bol_needed = false;
linenum = sectnum = 1;
firstprot = NIL;
/* Used in mkprot() so that the first proto goes in slot 1
* of the proto queue.
*/
lastprot = 1;
if ( useecs )
{
/* Set up doubly-linked equivalence classes. */
/* We loop all the way up to csize, since ecgroup[csize] is
* the position used for NUL characters.
*/
ecgroup[1] = NIL;
for ( i = 2; i <= csize; ++i )
{
ecgroup[i] = i - 1;
nextecm[i - 1] = i;
}
nextecm[csize] = NIL;
}
else
{
/* Put everything in its own equivalence class. */
for ( i = 1; i <= csize; ++i )
{
ecgroup[i] = i;
nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */
}
}
set_up_initial_allocations();
}
/* readin - read in the rules section of the input file(s) */
void readin()
{
skelout();
line_directive_out( (FILE *) 0 );
if ( yyparse() )
{
pinpoint_message( "fatal parse error" );
flexend( 1 );
}
if ( syntaxerror )
flexend( 1 );
if ( yymore_really_used == REALLY_USED )
yymore_used = true;
else if ( yymore_really_used == REALLY_NOT_USED )
yymore_used = false;
if ( reject_really_used == REALLY_USED )
reject = true;
else if ( reject_really_used == REALLY_NOT_USED )
reject = false;
if ( performance_report > 0 )
{
if ( lex_compat )
{
fprintf( stderr,
"-l AT&T lex compatibility option entails a large performance penalty\n" );
fprintf( stderr,
" and may be the actual source of other reported performance penalties\n" );
}
if ( performance_report > 1 )
{
if ( interactive )
fprintf( stderr,
"-I (interactive) entails a minor performance penalty\n" );
if ( yymore_used )
fprintf( stderr,
"yymore() entails a minor performance penalty\n" );
}
if ( reject )
fprintf( stderr,
"REJECT entails a large performance penalty\n" );
if ( variable_trailing_context_rules )
fprintf( stderr,
"Variable trailing context rules entail a large performance penalty\n" );
}
if ( reject )
real_reject = true;
if ( variable_trailing_context_rules )
reject = true;
if ( (fulltbl || fullspd) && reject )
{
if ( real_reject )
flexerror( "REJECT cannot be used with -f or -F" );
else
flexerror(
"variable trailing context rules cannot be used with -f or -F" );
}
if ( csize == 256 )
puts( "typedef unsigned char YY_CHAR;" );
else
puts( "typedef char YY_CHAR;" );
if ( C_plus_plus )
{
puts( "#define yytext_ptr yytext" );
if ( interactive )
puts( "#define YY_INTERACTIVE" );
}
if ( fullspd )
printf(
"typedef const struct yy_trans_info *yy_state_type;\n" );
else if ( ! C_plus_plus )
printf( "typedef int yy_state_type;\n" );
if ( reject )
printf( "\n#define YY_USES_REJECT\n" );
if ( ddebug )
puts( "\n#define FLEX_DEBUG" );
if ( lex_compat )
{
printf( "FILE *yyin = stdin, *yyout = stdout;\n" );
printf( "extern int yylineno;\n" );
printf( "int yylineno = 1;\n" );
}
else if ( ! C_plus_plus )
printf( "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;\n" );
if ( C_plus_plus )
printf( "\n#include <FlexLexer.h>\n" );
else
{
if ( yytext_is_array )
puts( "extern char yytext[];\n" );
else
{
puts( "extern char *yytext;" );
puts( "#define yytext_ptr yytext" );
}
}
if ( useecs )
numecs = cre8ecs( nextecm, ecgroup, csize );
else
numecs = csize;
/* Now map the equivalence class for NUL to its expected place. */
ecgroup[0] = ecgroup[csize];
NUL_ec = ABS( ecgroup[0] );
if ( useecs )
ccl2ecl();
}
/* set_up_initial_allocations - allocate memory for internal tables */
void set_up_initial_allocations()
{
current_mns = INITIAL_MNS;
firstst = allocate_integer_array( current_mns );
lastst = allocate_integer_array( current_mns );
finalst = allocate_integer_array( current_mns );
transchar = allocate_integer_array( current_mns );
trans1 = allocate_integer_array( current_mns );
trans2 = allocate_integer_array( current_mns );
accptnum = allocate_integer_array( current_mns );
assoc_rule = allocate_integer_array( current_mns );
state_type = allocate_integer_array( current_mns );
current_max_rules = INITIAL_MAX_RULES;
rule_type = allocate_integer_array( current_max_rules );
rule_linenum = allocate_integer_array( current_max_rules );
rule_useful = allocate_integer_array( current_max_rules );
current_max_scs = INITIAL_MAX_SCS;
scset = allocate_integer_array( current_max_scs );
scbol = allocate_integer_array( current_max_scs );
scxclu = allocate_integer_array( current_max_scs );
sceof = allocate_integer_array( current_max_scs );
scname = allocate_char_ptr_array( current_max_scs );
actvsc = allocate_integer_array( current_max_scs );
current_maxccls = INITIAL_MAX_CCLS;
cclmap = allocate_integer_array( current_maxccls );
ccllen = allocate_integer_array( current_maxccls );
cclng = allocate_integer_array( current_maxccls );
current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE;
ccltbl = allocate_Character_array( current_max_ccl_tbl_size );
current_max_dfa_size = INITIAL_MAX_DFA_SIZE;
current_max_xpairs = INITIAL_MAX_XPAIRS;
nxt = allocate_integer_array( current_max_xpairs );
chk = allocate_integer_array( current_max_xpairs );
current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS;
tnxt = allocate_integer_array( current_max_template_xpairs );
current_max_dfas = INITIAL_MAX_DFAS;
base = allocate_integer_array( current_max_dfas );
def = allocate_integer_array( current_max_dfas );
dfasiz = allocate_integer_array( current_max_dfas );
accsiz = allocate_integer_array( current_max_dfas );
dhash = allocate_integer_array( current_max_dfas );
dss = allocate_int_ptr_array( current_max_dfas );
dfaacc = allocate_dfaacc_union( current_max_dfas );
nultrans = (int *) 0;
}
void usage()
{
fprintf( stderr,
"%s [-bcdfhilnpstvwBFILTV78+ -C[aefFmr] -Pprefix -Sskeleton] [file ...]\n",
program_name );
fprintf( stderr,
"\t-b generate backing-up information to lex.backup\n" );
fprintf( stderr, "\t-c do-nothing POSIX option\n" );
fprintf( stderr, "\t-d turn on debug mode in generated scanner\n" );
fprintf( stderr, "\t-f generate fast, large scanner\n" );
fprintf( stderr, "\t-h produce this help message\n" );
fprintf( stderr, "\t-i generate case-insensitive scanner\n" );
fprintf( stderr, "\t-l maximal compatibility with original lex\n" );
fprintf( stderr, "\t-n do-nothing POSIX option\n" );
fprintf( stderr, "\t-p generate performance report to stderr\n" );
fprintf( stderr,
"\t-s suppress default rule to ECHO unmatched text\n" );
fprintf( stderr,
"\t-t write generated scanner on stdout instead of lex.yy.c\n" );
fprintf( stderr,
"\t-v write summary of scanner statistics to stderr\n" );
fprintf( stderr, "\t-w do not generate warnings\n" );
fprintf( stderr, "\t-B generate batch scanner (opposite of -I)\n" );
fprintf( stderr,
"\t-F use alternative fast scanner representation\n" );
fprintf( stderr,
"\t-I generate interactive scanner (opposite of -B)\n" );
fprintf( stderr, "\t-L suppress #line directives in scanner\n" );
fprintf( stderr, "\t-T %s should run in trace mode\n", program_name );
fprintf( stderr, "\t-V report %s version\n", program_name );
fprintf( stderr, "\t-7 generate 7-bit scanner\n" );
fprintf( stderr, "\t-8 generate 8-bit scanner\n" );
fprintf( stderr, "\t-+ generate C++ scanner class\n" );
fprintf( stderr,
"\t-C specify degree of table compression (default is -Cem):\n" );
fprintf( stderr,
"\t\t-Ca trade off larger tables for better memory alignment\n" );
fprintf( stderr, "\t\t-Ce construct equivalence classes\n" );
fprintf( stderr,
"\t\t-Cf do not compress scanner tables; use -f representation\n" );
fprintf( stderr,
"\t\t-CF do not compress scanner tables; use -F representation\n" );
fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" );
fprintf( stderr,
"\t\t-Cr use read() instead of stdio for scanner input\n" );
fprintf( stderr, "\t-P specify scanner prefix other than \"yy\"\n" );
fprintf( stderr, "\t-S specify skeleton file\n" );
}

773
usr.bin/lex/misc.c Normal file
View File

@ -0,0 +1,773 @@
/* misc - miscellaneous flex routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: misc.c,v 1.2 94/01/04 14:33:10 vern Exp $ */
#include "flexdef.h"
/* declare functions that have forward references */
void dataflush PROTO((void));
int otoi PROTO((Char []));
void add_action( new_text )
char *new_text;
{
int len = strlen( new_text );
while ( len + action_index >= action_size - 10 /* slop */ )
{
action_size *= 2;
action_array =
reallocate_character_array( action_array, action_size );
}
strcpy( &action_array[action_index], new_text );
action_index += len;
}
/* allocate_array - allocate memory for an integer array of the given size */
void *allocate_array( size, element_size )
int size, element_size;
{
register void *mem;
/* On 16-bit int machines (e.g., 80286) we might be trying to
* allocate more than a signed int can hold, and that won't
* work. Cheap test:
*/
if ( element_size * size <= 0 )
flexfatal( "request for < 1 byte in allocate_array()" );
mem = flex_alloc( element_size * size );
if ( mem == NULL )
flexfatal( "memory allocation failed in allocate_array()" );
return mem;
}
/* all_lower - true if a string is all lower-case */
int all_lower( str )
register char *str;
{
while ( *str )
{
if ( ! isascii( (Char) *str ) || ! islower( *str ) )
return 0;
++str;
}
return 1;
}
/* all_upper - true if a string is all upper-case */
int all_upper( str )
register char *str;
{
while ( *str )
{
if ( ! isascii( (Char) *str ) || ! isupper( *str ) )
return 0;
++str;
}
return 1;
}
/* bubble - bubble sort an integer array in increasing order
*
* synopsis
* int v[n], n;
* void bubble( v, n );
*
* description
* sorts the first n elements of array v and replaces them in
* increasing order.
*
* passed
* v - the array to be sorted
* n - the number of elements of 'v' to be sorted
*/
void bubble( v, n )
int v[], n;
{
register int i, j, k;
for ( i = n; i > 1; --i )
for ( j = 1; j < i; ++j )
if ( v[j] > v[j + 1] ) /* compare */
{
k = v[j]; /* exchange */
v[j] = v[j + 1];
v[j + 1] = k;
}
}
/* check_char - checks a character to make sure it's within the range
* we're expecting. If not, generates fatal error message
* and exits.
*/
void check_char( c )
int c;
{
if ( c >= CSIZE )
lerrsf( "bad character '%s' detected in check_char()",
readable_form( c ) );
if ( c >= csize )
lerrsf( "scanner requires -8 flag to use the character '%s'",
readable_form( c ) );
}
/* clower - replace upper-case letter to lower-case */
Char clower( c )
register int c;
{
return (Char) ((isascii( c ) && isupper( c )) ? tolower( c ) : c);
}
/* copy_string - returns a dynamically allocated copy of a string */
char *copy_string( str )
register char *str;
{
register char *c;
char *copy;
/* find length */
for ( c = str; *c; ++c )
;
copy = (char *) flex_alloc( (c - str + 1) * sizeof( char ) );
if ( copy == NULL )
flexfatal( "dynamic memory failure in copy_string()" );
for ( c = copy; (*c++ = *str++); )
;
return copy;
}
/* copy_unsigned_string -
* returns a dynamically allocated copy of a (potentially) unsigned string
*/
Char *copy_unsigned_string( str )
register Char *str;
{
register Char *c;
Char *copy;
/* find length */
for ( c = str; *c; ++c )
;
copy = allocate_Character_array( c - str + 1 );
for ( c = copy; (*c++ = *str++); )
;
return copy;
}
/* cshell - shell sort a character array in increasing order
*
* synopsis
*
* Char v[n];
* int n, special_case_0;
* cshell( v, n, special_case_0 );
*
* description
* Does a shell sort of the first n elements of array v.
* If special_case_0 is true, then any element equal to 0
* is instead assumed to have infinite weight.
*
* passed
* v - array to be sorted
* n - number of elements of v to be sorted
*/
void cshell( v, n, special_case_0 )
Char v[];
int n, special_case_0;
{
int gap, i, j, jg;
Char k;
for ( gap = n / 2; gap > 0; gap = gap / 2 )
for ( i = gap; i < n; ++i )
for ( j = i - gap; j >= 0; j = j - gap )
{
jg = j + gap;
if ( special_case_0 )
{
if ( v[jg] == 0 )
break;
else if ( v[j] != 0 && v[j] <= v[jg] )
break;
}
else if ( v[j] <= v[jg] )
break;
k = v[j];
v[j] = v[jg];
v[jg] = k;
}
}
/* dataend - finish up a block of data declarations */
void dataend()
{
if ( datapos > 0 )
dataflush();
/* add terminator for initialization; { for vi */
puts( " } ;\n" );
dataline = 0;
datapos = 0;
}
/* dataflush - flush generated data statements */
void dataflush()
{
putchar( '\n' );
if ( ++dataline >= NUMDATALINES )
{
/* Put out a blank line so that the table is grouped into
* large blocks that enable the user to find elements easily.
*/
putchar( '\n' );
dataline = 0;
}
/* Reset the number of characters written on the current line. */
datapos = 0;
}
/* flexerror - report an error message and terminate */
void flexerror( msg )
char msg[];
{
fprintf( stderr, "%s: %s\n", program_name, msg );
flexend( 1 );
}
/* flexfatal - report a fatal error message and terminate */
void flexfatal( msg )
char msg[];
{
fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg );
exit( 1 );
}
/* lerrif - report an error message formatted with one integer argument */
void lerrif( msg, arg )
char msg[];
int arg;
{
char errmsg[MAXLINE];
(void) sprintf( errmsg, msg, arg );
flexerror( errmsg );
}
/* lerrsf - report an error message formatted with one string argument */
void lerrsf( msg, arg )
char msg[], arg[];
{
char errmsg[MAXLINE];
(void) sprintf( errmsg, msg, arg );
flexerror( errmsg );
}
/* htoi - convert a hexadecimal digit string to an integer value */
int htoi( str )
Char str[];
{
unsigned int result;
(void) sscanf( (char *) str, "%x", &result );
return result;
}
/* is_hex_digit - returns true if a character is a valid hex digit, false
* otherwise
*/
int is_hex_digit( ch )
int ch;
{
if ( isdigit( ch ) )
return 1;
switch ( clower( ch ) )
{
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
return 1;
default:
return 0;
}
}
/* line_directive_out - spit out a "# line" statement */
void line_directive_out( output_file )
FILE *output_file;
{
if ( infilename && gen_line_dirs )
{
char directive[MAXLINE];
sprintf( directive, "# line %d \"%s\"\n", linenum, infilename );
/* If output_file is nil then we should put the directive in
* the accumulated actions.
*/
if ( output_file )
fputs( directive, output_file );
else
add_action( directive );
}
}
/* mark_defs1 - mark the current position in the action array as
* representing where the user's section 1 definitions end
* and the prolog begins
*/
void mark_defs1()
{
defs1_offset = 0;
action_array[action_index++] = '\0';
action_offset = prolog_offset = action_index;
action_array[action_index] = '\0';
}
/* mark_prolog - mark the current position in the action array as
* representing the end of the action prolog
*/
void mark_prolog()
{
action_array[action_index++] = '\0';
action_offset = action_index;
action_array[action_index] = '\0';
}
/* mk2data - generate a data statement for a two-dimensional array
*
* Generates a data statement initializing the current 2-D array to "value".
*/
void mk2data( value )
int value;
{
if ( datapos >= NUMDATAITEMS )
{
putchar( ',' );
dataflush();
}
if ( datapos == 0 )
/* Indent. */
fputs( " ", stdout );
else
putchar( ',' );
++datapos;
printf( "%5d", value );
}
/* mkdata - generate a data statement
*
* Generates a data statement initializing the current array element to
* "value".
*/
void mkdata( value )
int value;
{
if ( datapos >= NUMDATAITEMS )
{
putchar( ',' );
dataflush();
}
if ( datapos == 0 )
/* Indent. */
fputs( " ", stdout );
else
putchar( ',' );
++datapos;
printf( "%5d", value );
}
/* myctoi - return the integer represented by a string of digits */
int myctoi( array )
char array[];
{
int val = 0;
(void) sscanf( array, "%d", &val );
return val;
}
/* myesc - return character corresponding to escape sequence */
Char myesc( array )
Char array[];
{
Char c, esc_char;
switch ( array[1] )
{
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
#ifdef __STDC__
case 'a': return '\a';
case 'v': return '\v';
#else
case 'a': return '\007';
case 'v': return '\013';
#endif
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
{ /* \<octal> */
int sptr = 1;
while ( isascii( array[sptr] ) &&
isdigit( array[sptr] ) )
/* Don't increment inside loop control
* because if isdigit() is a macro it might
* expand into multiple increments ...
*/
++sptr;
c = array[sptr];
array[sptr] = '\0';
esc_char = otoi( array + 1 );
array[sptr] = c;
return esc_char;
}
case 'x':
{ /* \x<hex> */
int sptr = 2;
while ( isascii( array[sptr] ) &&
is_hex_digit( (char) array[sptr] ) )
/* Don't increment inside loop control
* because if isdigit() is a macro it might
* expand into multiple increments ...
*/
++sptr;
c = array[sptr];
array[sptr] = '\0';
esc_char = htoi( array + 2 );
array[sptr] = c;
return esc_char;
}
default:
return array[1];
}
}
/* otoi - convert an octal digit string to an integer value */
int otoi( str )
Char str[];
{
unsigned int result;
(void) sscanf( (char *) str, "%o", &result );
return result;
}
/* readable_form - return the the human-readable form of a character
*
* The returned string is in static storage.
*/
char *readable_form( c )
register int c;
{
static char rform[10];
if ( (c >= 0 && c < 32) || c >= 127 )
{
switch ( c )
{
case '\b': return "\\b";
case '\f': return "\\f";
case '\n': return "\\n";
case '\r': return "\\r";
case '\t': return "\\t";
#ifdef __STDC__
case '\a': return "\\a";
case '\v': return "\\v";
#endif
default:
(void) sprintf( rform, "\\%.3o",
(unsigned int) c );
return rform;
}
}
else if ( c == ' ' )
return "' '";
else
{
rform[0] = c;
rform[1] = '\0';
return rform;
}
}
/* reallocate_array - increase the size of a dynamic array */
void *reallocate_array( array, size, element_size )
void *array;
int size, element_size;
{
register void *new_array;
/* Same worry as in allocate_array(): */
if ( size * element_size <= 0 )
flexfatal(
"attempt to increase array size by less than 1 byte" );
new_array = flex_realloc( array, size * element_size );
if ( new_array == NULL )
flexfatal( "attempt to increase array size failed" );
return new_array;
}
/* skelout - write out one section of the skeleton file
*
* Description
* Copies skelfile or skel array to stdout until a line beginning with
* "%%" or EOF is found.
*/
void skelout()
{
char buf_storage[MAXLINE];
char *buf = buf_storage;
int do_copy = 1;
/* Loop pulling lines either from the skelfile, if we're using
* one, or from the skel[] array.
*/
while ( skelfile ?
(fgets( buf, MAXLINE, skelfile ) != NULL) :
((buf = skel[skel_ind++]) != 0) )
{ /* copy from skel array */
if ( buf[0] == '%' )
{ /* control line */
switch ( buf[1] )
{
case '%':
return;
case '+':
do_copy = C_plus_plus;
break;
case '-':
do_copy = ! C_plus_plus;
break;
case '*':
do_copy = 1;
break;
default:
flexfatal(
"bad line in skeleton file" );
}
}
else if ( do_copy )
{
if ( skelfile )
/* Skeleton file reads include final
* newline, skel[] array does not.
*/
fputs( buf, stdout );
else
printf( "%s\n", buf );
}
}
}
/* transition_struct_out - output a yy_trans_info structure
*
* outputs the yy_trans_info structure with the two elements, element_v and
* element_n. Formats the output with spaces and carriage returns.
*/
void transition_struct_out( element_v, element_n )
int element_v, element_n;
{
printf( "%7d, %5d,", element_v, element_n );
datapos += TRANS_STRUCT_PRINT_LENGTH;
if ( datapos >= 75 )
{
putchar( '\n' );
if ( ++dataline % 10 == 0 )
putchar( '\n' );
datapos = 0;
}
}
/* The following is only needed when building flex's parser using certain
* broken versions of bison.
*/
void *yy_flex_xmalloc( size )
int size;
{
void *result = flex_alloc( size );
if ( ! result )
flexfatal( "memory allocation failed in yy_flex_xmalloc()" );
return result;
}
/* zero_out - set a region of memory to 0
*
* Sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero.
*/
void zero_out( region_ptr, size_in_bytes )
char *region_ptr;
int size_in_bytes;
{
register char *rp, *rp_end;
rp = region_ptr;
rp_end = region_ptr + size_in_bytes;
while ( rp < rp_end )
*rp++ = 0;
}

16
usr.bin/lex/mkskel.sh Executable file
View File

@ -0,0 +1,16 @@
#! /bin/sh
cat <<!
/* File created from flex.skel via mkskel.sh */
#include "flexdef.h"
char *skel[] = {
!
sed 's/\\/&&/g' $* | sed 's/"/\\"/g' | sed 's/.*/ "&",/'
cat <<!
0
};
!

709
usr.bin/lex/nfa.c Normal file
View File

@ -0,0 +1,709 @@
/* nfa - NFA construction routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: /home/daffy/u0/vern/flex/flex-2.4.7/RCS/nfa.c,v 1.2 94/08/03 11:13:29 vern Exp $ */
#include "flexdef.h"
/* declare functions that have forward references */
int dupmachine PROTO((int));
void mkxtion PROTO((int, int));
/* add_accept - add an accepting state to a machine
*
* accepting_number becomes mach's accepting number.
*/
void add_accept( mach, accepting_number )
int mach, accepting_number;
{
/* Hang the accepting number off an epsilon state. if it is associated
* with a state that has a non-epsilon out-transition, then the state
* will accept BEFORE it makes that transition, i.e., one character
* too soon.
*/
if ( transchar[finalst[mach]] == SYM_EPSILON )
accptnum[finalst[mach]] = accepting_number;
else
{
int astate = mkstate( SYM_EPSILON );
accptnum[astate] = accepting_number;
(void) link_machines( mach, astate );
}
}
/* copysingl - make a given number of copies of a singleton machine
*
* synopsis
*
* newsng = copysingl( singl, num );
*
* newsng - a new singleton composed of num copies of singl
* singl - a singleton machine
* num - the number of copies of singl to be present in newsng
*/
int copysingl( singl, num )
int singl, num;
{
int copy, i;
copy = mkstate( SYM_EPSILON );
for ( i = 1; i <= num; ++i )
copy = link_machines( copy, dupmachine( singl ) );
return copy;
}
/* dumpnfa - debugging routine to write out an nfa */
void dumpnfa( state1 )
int state1;
{
int sym, tsp1, tsp2, anum, ns;
fprintf( stderr,
"\n\n********** beginning dump of nfa with start state %d\n",
state1 );
/* We probably should loop starting at firstst[state1] and going to
* lastst[state1], but they're not maintained properly when we "or"
* all of the rules together. So we use our knowledge that the machine
* starts at state 1 and ends at lastnfa.
*/
/* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
for ( ns = 1; ns <= lastnfa; ++ns )
{
fprintf( stderr, "state # %4d\t", ns );
sym = transchar[ns];
tsp1 = trans1[ns];
tsp2 = trans2[ns];
anum = accptnum[ns];
fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 );
if ( anum != NIL )
fprintf( stderr, " [%d]", anum );
fprintf( stderr, "\n" );
}
fprintf( stderr, "********** end of dump\n" );
}
/* dupmachine - make a duplicate of a given machine
*
* synopsis
*
* copy = dupmachine( mach );
*
* copy - holds duplicate of mach
* mach - machine to be duplicated
*
* note that the copy of mach is NOT an exact duplicate; rather, all the
* transition states values are adjusted so that the copy is self-contained,
* as the original should have been.
*
* also note that the original MUST be contiguous, with its low and high
* states accessible by the arrays firstst and lastst
*/
int dupmachine( mach )
int mach;
{
int i, init, state_offset;
int state = 0;
int last = lastst[mach];
for ( i = firstst[mach]; i <= last; ++i )
{
state = mkstate( transchar[i] );
if ( trans1[i] != NO_TRANSITION )
{
mkxtion( finalst[state], trans1[i] + state - i );
if ( transchar[i] == SYM_EPSILON &&
trans2[i] != NO_TRANSITION )
mkxtion( finalst[state],
trans2[i] + state - i );
}
accptnum[state] = accptnum[i];
}
if ( state == 0 )
flexfatal( "empty machine in dupmachine()" );
state_offset = state - i + 1;
init = mach + state_offset;
firstst[init] = firstst[mach] + state_offset;
finalst[init] = finalst[mach] + state_offset;
lastst[init] = lastst[mach] + state_offset;
return init;
}
/* finish_rule - finish up the processing for a rule
*
* An accepting number is added to the given machine. If variable_trail_rule
* is true then the rule has trailing context and both the head and trail
* are variable size. Otherwise if headcnt or trailcnt is non-zero then
* the machine recognizes a pattern with trailing context and headcnt is
* the number of characters in the matched part of the pattern, or zero
* if the matched part has variable length. trailcnt is the number of
* trailing context characters in the pattern, or zero if the trailing
* context has variable length.
*/
void finish_rule( mach, variable_trail_rule, headcnt, trailcnt )
int mach, variable_trail_rule, headcnt, trailcnt;
{
char action_text[MAXLINE];
add_accept( mach, num_rules );
/* We did this in new_rule(), but it often gets the wrong
* number because we do it before we start parsing the current rule.
*/
rule_linenum[num_rules] = linenum;
/* If this is a continued action, then the line-number has already
* been updated, giving us the wrong number.
*/
if ( continued_action )
--rule_linenum[num_rules];
sprintf( action_text, "case %d:\n", num_rules );
add_action( action_text );
if ( variable_trail_rule )
{
rule_type[num_rules] = RULE_VARIABLE;
if ( performance_report > 0 )
fprintf( stderr,
"Variable trailing context rule at line %d\n",
rule_linenum[num_rules] );
variable_trailing_context_rules = true;
}
else
{
rule_type[num_rules] = RULE_NORMAL;
if ( headcnt > 0 || trailcnt > 0 )
{
/* Do trailing context magic to not match the trailing
* characters.
*/
char *scanner_cp = "yy_c_buf_p = yy_cp";
char *scanner_bp = "yy_bp";
add_action(
"*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
if ( headcnt > 0 )
{
sprintf( action_text, "%s = %s + %d;\n",
scanner_cp, scanner_bp, headcnt );
add_action( action_text );
}
else
{
sprintf( action_text, "%s -= %d;\n",
scanner_cp, trailcnt );
add_action( action_text );
}
add_action(
"YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
}
}
/* Okay, in the action code at this point yytext and yyleng have
* their proper final values for this rule, so here's the point
* to do any user action. But don't do it for continued actions,
* as that'll result in multiple YY_USER_ACTION's.
*/
if ( ! continued_action )
add_action( "YY_USER_ACTION\n" );
line_directive_out( (FILE *) 0 );
}
/* link_machines - connect two machines together
*
* synopsis
*
* new = link_machines( first, last );
*
* new - a machine constructed by connecting first to last
* first - the machine whose successor is to be last
* last - the machine whose predecessor is to be first
*
* note: this routine concatenates the machine first with the machine
* last to produce a machine new which will pattern-match first first
* and then last, and will fail if either of the sub-patterns fails.
* FIRST is set to new by the operation. last is unmolested.
*/
int link_machines( first, last )
int first, last;
{
if ( first == NIL )
return last;
else if ( last == NIL )
return first;
else
{
mkxtion( finalst[first], last );
finalst[first] = finalst[last];
lastst[first] = MAX( lastst[first], lastst[last] );
firstst[first] = MIN( firstst[first], firstst[last] );
return first;
}
}
/* mark_beginning_as_normal - mark each "beginning" state in a machine
* as being a "normal" (i.e., not trailing context-
* associated) states
*
* The "beginning" states are the epsilon closure of the first state
*/
void mark_beginning_as_normal( mach )
register int mach;
{
switch ( state_type[mach] )
{
case STATE_NORMAL:
/* Oh, we've already visited here. */
return;
case STATE_TRAILING_CONTEXT:
state_type[mach] = STATE_NORMAL;
if ( transchar[mach] == SYM_EPSILON )
{
if ( trans1[mach] != NO_TRANSITION )
mark_beginning_as_normal(
trans1[mach] );
if ( trans2[mach] != NO_TRANSITION )
mark_beginning_as_normal(
trans2[mach] );
}
break;
default:
flexerror(
"bad state type in mark_beginning_as_normal()" );
break;
}
}
/* mkbranch - make a machine that branches to two machines
*
* synopsis
*
* branch = mkbranch( first, second );
*
* branch - a machine which matches either first's pattern or second's
* first, second - machines whose patterns are to be or'ed (the | operator)
*
* Note that first and second are NEITHER destroyed by the operation. Also,
* the resulting machine CANNOT be used with any other "mk" operation except
* more mkbranch's. Compare with mkor()
*/
int mkbranch( first, second )
int first, second;
{
int eps;
if ( first == NO_TRANSITION )
return second;
else if ( second == NO_TRANSITION )
return first;
eps = mkstate( SYM_EPSILON );
mkxtion( eps, first );
mkxtion( eps, second );
return eps;
}
/* mkclos - convert a machine into a closure
*
* synopsis
* new = mkclos( state );
*
* new - a new state which matches the closure of "state"
*/
int mkclos( state )
int state;
{
return mkopt( mkposcl( state ) );
}
/* mkopt - make a machine optional
*
* synopsis
*
* new = mkopt( mach );
*
* new - a machine which optionally matches whatever mach matched
* mach - the machine to make optional
*
* notes:
* 1. mach must be the last machine created
* 2. mach is destroyed by the call
*/
int mkopt( mach )
int mach;
{
int eps;
if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
{
eps = mkstate( SYM_EPSILON );
mach = link_machines( mach, eps );
}
/* Can't skimp on the following if FREE_EPSILON(mach) is true because
* some state interior to "mach" might point back to the beginning
* for a closure.
*/
eps = mkstate( SYM_EPSILON );
mach = link_machines( eps, mach );
mkxtion( mach, finalst[mach] );
return mach;
}
/* mkor - make a machine that matches either one of two machines
*
* synopsis
*
* new = mkor( first, second );
*
* new - a machine which matches either first's pattern or second's
* first, second - machines whose patterns are to be or'ed (the | operator)
*
* note that first and second are both destroyed by the operation
* the code is rather convoluted because an attempt is made to minimize
* the number of epsilon states needed
*/
int mkor( first, second )
int first, second;
{
int eps, orend;
if ( first == NIL )
return second;
else if ( second == NIL )
return first;
else
{
/* See comment in mkopt() about why we can't use the first
* state of "first" or "second" if they satisfy "FREE_EPSILON".
*/
eps = mkstate( SYM_EPSILON );
first = link_machines( eps, first );
mkxtion( first, second );
if ( SUPER_FREE_EPSILON(finalst[first]) &&
accptnum[finalst[first]] == NIL )
{
orend = finalst[first];
mkxtion( finalst[second], orend );
}
else if ( SUPER_FREE_EPSILON(finalst[second]) &&
accptnum[finalst[second]] == NIL )
{
orend = finalst[second];
mkxtion( finalst[first], orend );
}
else
{
eps = mkstate( SYM_EPSILON );
first = link_machines( first, eps );
orend = finalst[first];
mkxtion( finalst[second], orend );
}
}
finalst[first] = orend;
return first;
}
/* mkposcl - convert a machine into a positive closure
*
* synopsis
* new = mkposcl( state );
*
* new - a machine matching the positive closure of "state"
*/
int mkposcl( state )
int state;
{
int eps;
if ( SUPER_FREE_EPSILON(finalst[state]) )
{
mkxtion( finalst[state], state );
return state;
}
else
{
eps = mkstate( SYM_EPSILON );
mkxtion( eps, state );
return link_machines( state, eps );
}
}
/* mkrep - make a replicated machine
*
* synopsis
* new = mkrep( mach, lb, ub );
*
* new - a machine that matches whatever "mach" matched from "lb"
* number of times to "ub" number of times
*
* note
* if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
*/
int mkrep( mach, lb, ub )
int mach, lb, ub;
{
int base_mach, tail, copy, i;
base_mach = copysingl( mach, lb - 1 );
if ( ub == INFINITY )
{
copy = dupmachine( mach );
mach = link_machines( mach,
link_machines( base_mach, mkclos( copy ) ) );
}
else
{
tail = mkstate( SYM_EPSILON );
for ( i = lb; i < ub; ++i )
{
copy = dupmachine( mach );
tail = mkopt( link_machines( copy, tail ) );
}
mach = link_machines( mach, link_machines( base_mach, tail ) );
}
return mach;
}
/* mkstate - create a state with a transition on a given symbol
*
* synopsis
*
* state = mkstate( sym );
*
* state - a new state matching sym
* sym - the symbol the new state is to have an out-transition on
*
* note that this routine makes new states in ascending order through the
* state array (and increments LASTNFA accordingly). The routine DUPMACHINE
* relies on machines being made in ascending order and that they are
* CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
* that it admittedly is)
*/
int mkstate( sym )
int sym;
{
if ( ++lastnfa >= current_mns )
{
if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
lerrif(
"input rules are too complicated (>= %d NFA states)",
current_mns );
++num_reallocs;
firstst = reallocate_integer_array( firstst, current_mns );
lastst = reallocate_integer_array( lastst, current_mns );
finalst = reallocate_integer_array( finalst, current_mns );
transchar = reallocate_integer_array( transchar, current_mns );
trans1 = reallocate_integer_array( trans1, current_mns );
trans2 = reallocate_integer_array( trans2, current_mns );
accptnum = reallocate_integer_array( accptnum, current_mns );
assoc_rule =
reallocate_integer_array( assoc_rule, current_mns );
state_type =
reallocate_integer_array( state_type, current_mns );
}
firstst[lastnfa] = lastnfa;
finalst[lastnfa] = lastnfa;
lastst[lastnfa] = lastnfa;
transchar[lastnfa] = sym;
trans1[lastnfa] = NO_TRANSITION;
trans2[lastnfa] = NO_TRANSITION;
accptnum[lastnfa] = NIL;
assoc_rule[lastnfa] = num_rules;
state_type[lastnfa] = current_state_type;
/* Fix up equivalence classes base on this transition. Note that any
* character which has its own transition gets its own equivalence
* class. Thus only characters which are only in character classes
* have a chance at being in the same equivalence class. E.g. "a|b"
* puts 'a' and 'b' into two different equivalence classes. "[ab]"
* puts them in the same equivalence class (barring other differences
* elsewhere in the input).
*/
if ( sym < 0 )
{
/* We don't have to update the equivalence classes since
* that was already done when the ccl was created for the
* first time.
*/
}
else if ( sym == SYM_EPSILON )
++numeps;
else
{
check_char( sym );
if ( useecs )
/* Map NUL's to csize. */
mkechar( sym ? sym : csize, nextecm, ecgroup );
}
return lastnfa;
}
/* mkxtion - make a transition from one state to another
*
* synopsis
*
* mkxtion( statefrom, stateto );
*
* statefrom - the state from which the transition is to be made
* stateto - the state to which the transition is to be made
*/
void mkxtion( statefrom, stateto )
int statefrom, stateto;
{
if ( trans1[statefrom] == NO_TRANSITION )
trans1[statefrom] = stateto;
else if ( (transchar[statefrom] != SYM_EPSILON) ||
(trans2[statefrom] != NO_TRANSITION) )
flexfatal( "found too many transitions in mkxtion()" );
else
{ /* second out-transition for an epsilon state */
++eps2;
trans2[statefrom] = stateto;
}
}
/* new_rule - initialize for a new rule */
void new_rule()
{
if ( ++num_rules >= current_max_rules )
{
++num_reallocs;
current_max_rules += MAX_RULES_INCREMENT;
rule_type = reallocate_integer_array( rule_type,
current_max_rules );
rule_linenum = reallocate_integer_array( rule_linenum,
current_max_rules );
rule_useful = reallocate_integer_array( rule_useful,
current_max_rules );
}
if ( num_rules > MAX_RULE )
lerrif( "too many rules (> %d)!", MAX_RULE );
rule_linenum[num_rules] = linenum;
rule_useful[num_rules] = false;
}

817
usr.bin/lex/parse.y Normal file
View File

@ -0,0 +1,817 @@
/* parse.y - parser for flex input */
%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
%{
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.15 93/12/09 13:57:23 vern Exp $ */
/* Some versions of bison are broken in that they use alloca() but don't
* declare it properly. The following is the patented (just kidding!)
* #ifdef chud to fix the problem, courtesy of Francois Pinard.
*/
#ifdef YYBISON
/* AIX requires this to be the first thing in the file. */
#ifdef __GNUC__
#define alloca __builtin_alloca
#else /* not __GNUC__ */
#if HAVE_ALLOCA_H
#include <alloca.h>
#else /* not HAVE_ALLOCA_H */
#ifdef _AIX
#pragma alloca
#else /* not _AIX */
char *alloca ();
#endif /* not _AIX */
#endif /* not HAVE_ALLOCA_H */
#endif /* not __GNUC__ */
#endif /* YYBISON */
/* Bletch, ^^^^ that was ugly! */
#include "flexdef.h"
int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
int *active_ss;
Char clower();
void build_eof_action();
void yyerror();
static int madeany = false; /* whether we've made the '.' character class */
int previous_continued_action; /* whether the previous rule's action was '|' */
/* On some over-ambitious machines, such as DEC Alpha's, the default
* token type is "long" instead of "int"; this leads to problems with
* declaring yylval in flexdef.h. But so far, all the yacc's I've seen
* wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
* following should ensure that the default token type is "int".
*/
#define YYSTYPE int
%}
%%
goal : initlex sect1 sect1end sect2 initforrule
{ /* add default rule */
int def_rule;
pat = cclinit();
cclnegate( pat );
def_rule = mkstate( -pat );
/* Remember the number of the default rule so we
* don't generate "can't match" warnings for it.
*/
default_rule = num_rules;
finish_rule( def_rule, false, 0, 0 );
for ( i = 1; i <= lastsc; ++i )
scset[i] = mkbranch( scset[i], def_rule );
if ( spprdflt )
add_action(
"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
else
add_action( "ECHO" );
add_action( ";\n\tYY_BREAK\n" );
}
;
initlex :
{ /* initialize for processing rules */
/* Create default DFA start condition. */
scinstal( "INITIAL", false );
/* Initially, the start condition scoping is
* "no start conditions active".
*/
actvp = 0;
}
;
sect1 : sect1 startconddecl WHITESPACE namelist1 '\n'
|
| error '\n'
{ synerr( "unknown error processing section 1" ); }
;
sect1end : SECTEND
{
/* We now know how many start conditions there
* are, so create the "activity" map indicating
* which conditions are active.
*/
active_ss = allocate_integer_array( lastsc + 1 );
for ( i = 1; i <= lastsc; ++i )
active_ss[i] = 0;
}
;
startconddecl : SCDECL
{ xcluflg = false; }
| XSCDECL
{ xcluflg = true; }
;
namelist1 : namelist1 WHITESPACE NAME
{ scinstal( nmstr, xcluflg ); }
| NAME
{ scinstal( nmstr, xcluflg ); }
| error
{ synerr( "bad start condition list" ); }
;
sect2 : sect2 initforrule flexrule '\n'
|
;
initforrule :
{
/* Initialize for a parse of one rule. */
trlcontxt = variable_trail_rule = varlength = false;
trailcnt = headcnt = rulelen = 0;
current_state_type = STATE_NORMAL;
previous_continued_action = continued_action;
new_rule();
}
;
flexrule : scon '^' rule
{
pat = $3;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
for ( i = 1; i <= actvp; ++i )
scbol[actvsc[i]] =
mkbranch( scbol[actvsc[i]], pat );
if ( ! bol_needed )
{
bol_needed = true;
if ( performance_report > 1 )
pinpoint_message(
"'^' operator results in sub-optimal performance" );
}
}
| scon rule
{
pat = $2;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
for ( i = 1; i <= actvp; ++i )
scset[actvsc[i]] =
mkbranch( scset[actvsc[i]], pat );
}
| '^' rule
{
pat = $2;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
/* Add to all non-exclusive start conditions,
* including the default (0) start condition.
*/
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
scbol[i] = mkbranch( scbol[i], pat );
if ( ! bol_needed )
{
bol_needed = true;
if ( performance_report > 1 )
pinpoint_message(
"'^' operator results in sub-optimal performance" );
}
}
| rule
{
pat = $1;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
scset[i] = mkbranch( scset[i], pat );
}
| scon EOF_OP
{ build_eof_action(); }
| EOF_OP
{
/* This EOF applies to all start conditions
* which don't already have EOF actions.
*/
actvp = 0;
for ( i = 1; i <= lastsc; ++i )
if ( ! sceof[i] )
actvsc[++actvp] = i;
if ( actvp == 0 )
warn(
"all start conditions already have <<EOF>> rules" );
else
build_eof_action();
}
| error
{ synerr( "unrecognized rule" ); }
;
scon : '<' namelist2 '>'
| '<' '*' '>'
{
actvp = 0;
for ( i = 1; i <= lastsc; ++i )
actvsc[++actvp] = i;
}
;
namelist2 : namelist2 ',' sconname
| { actvp = 0; } sconname
| error
{ synerr( "bad start condition list" ); }
;
sconname : NAME
{
if ( (scnum = sclookup( nmstr )) == 0 )
format_pinpoint_message(
"undeclared start condition %s",
nmstr );
else
{
if ( ++actvp >= current_max_scs )
/* Some bozo has included multiple
* instances of start condition names.
*/
pinpoint_message(
"too many start conditions in <> construct!" );
else
actvsc[actvp] = scnum;
}
}
;
rule : re2 re
{
if ( transchar[lastst[$2]] != SYM_EPSILON )
/* Provide final transition \now/ so it
* will be marked as a trailing context
* state.
*/
$2 = link_machines( $2,
mkstate( SYM_EPSILON ) );
mark_beginning_as_normal( $2 );
current_state_type = STATE_NORMAL;
if ( previous_continued_action )
{
/* We need to treat this as variable trailing
* context so that the backup does not happen
* in the action but before the action switch
* statement. If the backup happens in the
* action, then the rules "falling into" this
* one's action will *also* do the backup,
* erroneously.
*/
if ( ! varlength || headcnt != 0 )
warn(
"trailing context made variable due to preceding '|' action" );
/* Mark as variable. */
varlength = true;
headcnt = 0;
}
if ( lex_compat || (varlength && headcnt == 0) )
{ /* variable trailing context rule */
/* Mark the first part of the rule as the
* accepting "head" part of a trailing
* context rule.
*
* By the way, we didn't do this at the
* beginning of this production because back
* then current_state_type was set up for a
* trail rule, and add_accept() can create
* a new state ...
*/
add_accept( $1,
num_rules | YY_TRAILING_HEAD_MASK );
variable_trail_rule = true;
}
else
trailcnt = rulelen;
$$ = link_machines( $1, $2 );
}
| re2 re '$'
{ synerr( "trailing context used twice" ); }
| re '$'
{
headcnt = 0;
trailcnt = 1;
rulelen = 1;
varlength = false;
current_state_type = STATE_TRAILING_CONTEXT;
if ( trlcontxt )
{
synerr( "trailing context used twice" );
$$ = mkstate( SYM_EPSILON );
}
else if ( previous_continued_action )
{
/* See the comment in the rule for "re2 re"
* above.
*/
warn(
"trailing context made variable due to preceding '|' action" );
varlength = true;
}
if ( lex_compat || varlength )
{
/* Again, see the comment in the rule for
* "re2 re" above.
*/
add_accept( $1,
num_rules | YY_TRAILING_HEAD_MASK );
variable_trail_rule = true;
}
trlcontxt = true;
eps = mkstate( SYM_EPSILON );
$$ = link_machines( $1,
link_machines( eps, mkstate( '\n' ) ) );
}
| re
{
$$ = $1;
if ( trlcontxt )
{
if ( lex_compat || (varlength && headcnt == 0) )
/* Both head and trail are
* variable-length.
*/
variable_trail_rule = true;
else
trailcnt = rulelen;
}
}
;
re : re '|' series
{
varlength = true;
$$ = mkor( $1, $3 );
}
| series
{ $$ = $1; }
;
re2 : re '/'
{
/* This rule is written separately so the
* reduction will occur before the trailing
* series is parsed.
*/
if ( trlcontxt )
synerr( "trailing context used twice" );
else
trlcontxt = true;
if ( varlength )
/* We hope the trailing context is
* fixed-length.
*/
varlength = false;
else
headcnt = rulelen;
rulelen = 0;
current_state_type = STATE_TRAILING_CONTEXT;
$$ = $1;
}
;
series : series singleton
{
/* This is where concatenation of adjacent patterns
* gets done.
*/
$$ = link_machines( $1, $2 );
}
| singleton
{ $$ = $1; }
;
singleton : singleton '*'
{
varlength = true;
$$ = mkclos( $1 );
}
| singleton '+'
{
varlength = true;
$$ = mkposcl( $1 );
}
| singleton '?'
{
varlength = true;
$$ = mkopt( $1 );
}
| singleton '{' NUMBER ',' NUMBER '}'
{
varlength = true;
if ( $3 > $5 || $3 < 0 )
{
synerr( "bad iteration values" );
$$ = $1;
}
else
{
if ( $3 == 0 )
{
if ( $5 <= 0 )
{
synerr(
"bad iteration values" );
$$ = $1;
}
else
$$ = mkopt(
mkrep( $1, 1, $5 ) );
}
else
$$ = mkrep( $1, $3, $5 );
}
}
| singleton '{' NUMBER ',' '}'
{
varlength = true;
if ( $3 <= 0 )
{
synerr( "iteration value must be positive" );
$$ = $1;
}
else
$$ = mkrep( $1, $3, INFINITY );
}
| singleton '{' NUMBER '}'
{
/* The singleton could be something like "(foo)",
* in which case we have no idea what its length
* is, so we punt here.
*/
varlength = true;
if ( $3 <= 0 )
{
synerr( "iteration value must be positive" );
$$ = $1;
}
else
$$ = link_machines( $1,
copysingl( $1, $3 - 1 ) );
}
| '.'
{
if ( ! madeany )
{
/* Create the '.' character class. */
anyccl = cclinit();
ccladd( anyccl, '\n' );
cclnegate( anyccl );
if ( useecs )
mkeccl( ccltbl + cclmap[anyccl],
ccllen[anyccl], nextecm,
ecgroup, csize, csize );
madeany = true;
}
++rulelen;
$$ = mkstate( -anyccl );
}
| fullccl
{
if ( ! cclsorted )
/* Sort characters for fast searching. We
* use a shell sort since this list could
* be large.
*/
cshell( ccltbl + cclmap[$1], ccllen[$1], true );
if ( useecs )
mkeccl( ccltbl + cclmap[$1], ccllen[$1],
nextecm, ecgroup, csize, csize );
++rulelen;
$$ = mkstate( -$1 );
}
| PREVCCL
{
++rulelen;
$$ = mkstate( -$1 );
}
| '"' string '"'
{ $$ = $2; }
| '(' re ')'
{ $$ = $2; }
| CHAR
{
++rulelen;
if ( caseins && $1 >= 'A' && $1 <= 'Z' )
$1 = clower( $1 );
$$ = mkstate( $1 );
}
;
fullccl : '[' ccl ']'
{ $$ = $2; }
| '[' '^' ccl ']'
{
cclnegate( $3 );
$$ = $3;
}
;
ccl : ccl CHAR '-' CHAR
{
if ( caseins )
{
if ( $2 >= 'A' && $2 <= 'Z' )
$2 = clower( $2 );
if ( $4 >= 'A' && $4 <= 'Z' )
$4 = clower( $4 );
}
if ( $2 > $4 )
synerr( "negative range in character class" );
else
{
for ( i = $2; i <= $4; ++i )
ccladd( $1, i );
/* Keep track if this ccl is staying in
* alphabetical order.
*/
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $4;
}
$$ = $1;
}
| ccl CHAR
{
if ( caseins && $2 >= 'A' && $2 <= 'Z' )
$2 = clower( $2 );
ccladd( $1, $2 );
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $2;
$$ = $1;
}
|
{
cclsorted = true;
lastchar = 0;
$$ = cclinit();
}
;
string : string CHAR
{
if ( caseins && $2 >= 'A' && $2 <= 'Z' )
$2 = clower( $2 );
++rulelen;
$$ = link_machines( $1, mkstate( $2 ) );
}
|
{ $$ = mkstate( SYM_EPSILON ); }
;
%%
/* build_eof_action - build the "<<EOF>>" action for the active start
* conditions
*/
void build_eof_action()
{
register int i;
char action_text[MAXLINE];
for ( i = 1; i <= actvp; ++i )
{
if ( sceof[actvsc[i]] )
format_pinpoint_message(
"multiple <<EOF>> rules for start condition %s",
scname[actvsc[i]] );
else
{
sceof[actvsc[i]] = true;
sprintf( action_text, "case YY_STATE_EOF(%s):\n",
scname[actvsc[i]] );
add_action( action_text );
}
}
line_directive_out( (FILE *) 0 );
/* This isn't a normal rule after all - don't count it as
* such, so we don't have any holes in the rule numbering
* (which make generating "rule can never match" warnings
* more difficult.
*/
--num_rules;
++num_eof_rules;
}
/* format_synerr - write out formatted syntax error */
void format_synerr( msg, arg )
char msg[], arg[];
{
char errmsg[MAXLINE];
(void) sprintf( errmsg, msg, arg );
synerr( errmsg );
}
/* synerr - report a syntax error */
void synerr( str )
char str[];
{
syntaxerror = true;
pinpoint_message( str );
}
/* warn - report a warning, unless -w was given */
void warn( str )
char str[];
{
line_warning( str, linenum );
}
/* format_pinpoint_message - write out a message formatted with one string,
* pinpointing its location
*/
void format_pinpoint_message( msg, arg )
char msg[], arg[];
{
char errmsg[MAXLINE];
(void) sprintf( errmsg, msg, arg );
pinpoint_message( errmsg );
}
/* pinpoint_message - write out a message, pinpointing its location */
void pinpoint_message( str )
char str[];
{
line_pinpoint( str, linenum );
}
/* line_warning - report a warning at a given line, unless -w was given */
void line_warning( str, line )
char str[];
int line;
{
char warning[MAXLINE];
if ( ! nowarn )
{
sprintf( warning, "warning, %s", str );
line_pinpoint( warning, line );
}
}
/* line_pinpoint - write out a message, pinpointing it at the given line */
void line_pinpoint( str, line )
char str[];
int line;
{
fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
}
/* yyerror - eat up an error message from the parser;
* currently, messages are ignore
*/
void yyerror( msg )
char msg[];
{
}

572
usr.bin/lex/scan.l Normal file
View File

@ -0,0 +1,572 @@
/* scan.l - scanner for flex input */
%{
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: scan.l,v 1.2 94/01/04 14:33:09 vern Exp $ */
#include "flexdef.h"
#include "parse.h"
#define ACTION_ECHO add_action( yytext )
#define MARK_END_OF_PROLOG mark_prolog();
#define YY_DECL \
int flexscan()
#define RETURNCHAR \
yylval = (unsigned char) yytext[0]; \
return CHAR;
#define RETURNNAME \
strcpy( nmstr, yytext ); \
return NAME;
#define PUT_BACK_STRING(str, start) \
for ( i = strlen( str ) - 1; i >= start; --i ) \
unput((str)[i])
#define CHECK_REJECT(str) \
if ( all_upper( str ) ) \
reject = true;
#define CHECK_YYMORE(str) \
if ( all_lower( str ) ) \
yymore_used = true;
%}
%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT
%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2
WS [ \t]+
OPTWS [ \t]*
NOT_WS [^ \t\n]
NL (\n|\r\n|\n\r)
NAME ([a-z_][a-z_0-9-]*)
NOT_NAME [^a-z_*\n]+
SCNAME {NAME}
ESCSEQ (\\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}))
FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
CCL_CHAR ([^\\\n\]]|{ESCSEQ})
%%
static int bracelevel, didadef, indented_code, checking_used;
int doing_codeblock = false;
int i;
Char nmdef[MAXLINE], myesc();
^{WS} indented_code = true; BEGIN(CODEBLOCK);
^"/*" ACTION_ECHO; BEGIN(C_COMMENT);
^"%s"{NAME}? return SCDECL;
^"%x"{NAME}? return XSCDECL;
^"%{".*{NL} {
++linenum;
line_directive_out( (FILE *) 0 );
indented_code = false;
BEGIN(CODEBLOCK);
}
{WS} return WHITESPACE;
^"%%".* {
sectnum = 2;
bracelevel = 0;
mark_defs1();
line_directive_out( (FILE *) 0 );
BEGIN(SECT2PROLOG);
return SECTEND;
}
^"%pointer".*{NL} {
if ( lex_compat )
warn( "%pointer incompatible with -l option" );
else
yytext_is_array = false;
++linenum;
}
^"%array".*{NL} {
if ( C_plus_plus )
warn( "%array incompatible with -+ option" );
else
yytext_is_array = true;
++linenum;
}
^"%used" {
warn( "%used/%unused have been deprecated" );
checking_used = REALLY_USED; BEGIN(USED_LIST);
}
^"%unused" {
warn( "%used/%unused have been deprecated" );
checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
}
^"%"[aceknopr]{OPTWS}[0-9]*{OPTWS}{NL} ++linenum; /* ignore */
^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" );
^{NAME} {
strcpy( nmstr, yytext );
didadef = false;
BEGIN(PICKUPDEF);
}
{SCNAME} RETURNNAME;
^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
{OPTWS}{NL} ++linenum; return '\n';
<C_COMMENT>"*/" ACTION_ECHO; BEGIN(INITIAL);
<C_COMMENT>"*/".*{NL} ++linenum; ACTION_ECHO; BEGIN(INITIAL);
<C_COMMENT>[^*\n]+ ACTION_ECHO;
<C_COMMENT>"*" ACTION_ECHO;
<C_COMMENT>{NL} ++linenum; ACTION_ECHO;
<CODEBLOCK>^"%}".*{NL} ++linenum; BEGIN(INITIAL);
<CODEBLOCK>"reject" ACTION_ECHO; CHECK_REJECT(yytext);
<CODEBLOCK>"yymore" ACTION_ECHO; CHECK_YYMORE(yytext);
<CODEBLOCK>{NAME}|{NOT_NAME}|. ACTION_ECHO;
<CODEBLOCK>{NL} {
++linenum;
ACTION_ECHO;
if ( indented_code )
BEGIN(INITIAL);
}
<PICKUPDEF>{WS} /* separates name and definition */
<PICKUPDEF>{NOT_WS}.* {
strcpy( (char *) nmdef, yytext );
/* Skip trailing whitespace. */
for ( i = strlen( (char *) nmdef ) - 1;
i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
--i )
;
nmdef[i + 1] = '\0';
ndinstal( nmstr, nmdef );
didadef = true;
}
<PICKUPDEF>{NL} {
if ( ! didadef )
synerr( "incomplete name definition" );
BEGIN(INITIAL);
++linenum;
}
<RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); RETURNNAME;
<USED_LIST>{NL} ++linenum; BEGIN(INITIAL);
<USED_LIST>{WS}
<USED_LIST>"reject" {
if ( all_upper( yytext ) )
reject_really_used = checking_used;
else
synerr(
"unrecognized %used/%unused construct" );
}
<USED_LIST>"yymore" {
if ( all_lower( yytext ) )
yymore_really_used = checking_used;
else
synerr(
"unrecognized %used/%unused construct" );
}
<USED_LIST>{NOT_WS}+ synerr( "unrecognized %used/%unused construct" );
<SECT2PROLOG>^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
<SECT2PROLOG>^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
<SECT2PROLOG>^{WS}.* ACTION_ECHO; /* indented code in prolog */
<SECT2PROLOG>^{NOT_WS}.* { /* non-indented code */
if ( bracelevel <= 0 )
{ /* not in %{ ... %} */
yyless( 0 ); /* put it all back */
mark_prolog();
BEGIN(SECT2);
}
else
ACTION_ECHO;
}
<SECT2PROLOG>.* ACTION_ECHO;
<SECT2PROLOG>{NL} ++linenum; ACTION_ECHO;
<SECT2PROLOG><<EOF>> {
mark_prolog();
sectnum = 0;
yyterminate(); /* to stop the parser */
}
<SECT2>^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
<SECT2>^({WS}|"%{") {
indented_code = (yytext[0] != '%');
doing_codeblock = true;
bracelevel = 1;
if ( indented_code )
ACTION_ECHO;
BEGIN(CODEBLOCK_2);
}
<SECT2>^"<" BEGIN(SC); return '<';
<SECT2>^"^" return '^';
<SECT2>\" BEGIN(QUOTE); return '"';
<SECT2>"{"/[0-9] BEGIN(NUM); return '{';
<SECT2>"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR);
<SECT2>"$"/([ \t]|{NL}) return '$';
<SECT2>{WS}"%{" {
bracelevel = 1;
BEGIN(PERCENT_BRACE_ACTION);
return '\n';
}
<SECT2>{WS}"|".*{NL} continued_action = true; ++linenum; return '\n';
<SECT2>{WS} {
/* This rule is separate from the one below because
* otherwise we get variable trailing context, so
* we can't build the scanner using -{f,F}.
*/
bracelevel = 0;
continued_action = false;
BEGIN(ACTION);
return '\n';
}
<SECT2>{OPTWS}{NL} {
bracelevel = 0;
continued_action = false;
BEGIN(ACTION);
unput( '\n' ); /* so <ACTION> sees it */
return '\n';
}
<SECT2>"<<EOF>>" return EOF_OP;
<SECT2>^"%%".* {
sectnum = 3;
BEGIN(SECT3);
yyterminate(); /* to stop the parser */
}
<SECT2>"["{FIRST_CCL_CHAR}{CCL_CHAR}* {
int cclval;
strcpy( nmstr, yytext );
/* Check to see if we've already encountered this
* ccl.
*/
if ( (cclval = ccllookup( (Char *) nmstr )) )
{
if ( input() != ']' )
synerr( "bad character class" );
yylval = cclval;
++cclreuse;
return PREVCCL;
}
else
{
/* We fudge a bit. We know that this ccl will
* soon be numbered as lastccl + 1 by cclinit.
*/
cclinstal( (Char *) nmstr, lastccl + 1 );
/* Push back everything but the leading bracket
* so the ccl can be rescanned.
*/
yyless( 1 );
BEGIN(FIRSTCCL);
return '[';
}
}
<SECT2>"{"{NAME}"}" {
register Char *nmdefptr;
Char *ndlookup();
strcpy( nmstr, yytext + 1 );
nmstr[yyleng - 2] = '\0'; /* chop trailing brace */
if ( ! (nmdefptr = ndlookup( nmstr )) )
format_synerr( "undefined definition {%s}",
nmstr );
else
{ /* push back name surrounded by ()'s */
int len = strlen( (char *) nmdefptr );
if ( lex_compat || nmdefptr[0] == '^' ||
(len > 0 && nmdefptr[len - 1] == '$') )
{ /* don't use ()'s after all */
PUT_BACK_STRING((char *) nmdefptr, 0);
if ( nmdefptr[0] == '^' )
BEGIN(CARETISBOL);
}
else
{
unput(')');
PUT_BACK_STRING((char *) nmdefptr, 0);
unput('(');
}
}
}
<SECT2>[/|*+?.()] return (unsigned char) yytext[0];
<SECT2>. RETURNCHAR;
<SC>[,*] return (unsigned char) yytext[0];
<SC>">" BEGIN(SECT2); return '>';
<SC>">"/^ BEGIN(CARETISBOL); return '>';
<SC>{SCNAME} RETURNNAME;
<SC>. {
format_synerr( "bad <start condition>: %s", yytext );
}
<CARETISBOL>"^" BEGIN(SECT2); return '^';
<QUOTE>[^"\n] RETURNCHAR;
<QUOTE>\" BEGIN(SECT2); return '"';
<QUOTE>{NL} {
synerr( "missing quote" );
BEGIN(SECT2);
++linenum;
return '"';
}
<FIRSTCCL>"^"/[^-\]\n] BEGIN(CCL); return '^';
<FIRSTCCL>"^"/("-"|"]") return '^';
<FIRSTCCL>. BEGIN(CCL); RETURNCHAR;
<CCL>-/[^\]\n] return '-';
<CCL>[^\]\n] RETURNCHAR;
<CCL>"]" BEGIN(SECT2); return ']';
<CCL>.|{NL} {
synerr( "bad character class" );
BEGIN(SECT2);
return ']';
}
<NUM>[0-9]+ {
yylval = myctoi( yytext );
return NUMBER;
}
<NUM>"," return ',';
<NUM>"}" BEGIN(SECT2); return '}';
<NUM>. {
synerr( "bad character inside {}'s" );
BEGIN(SECT2);
return '}';
}
<NUM>{NL} {
synerr( "missing }" );
BEGIN(SECT2);
++linenum;
return '}';
}
<BRACEERROR>"}" synerr( "bad name in {}'s" ); BEGIN(SECT2);
<BRACEERROR>{NL} synerr( "missing }" ); ++linenum; BEGIN(SECT2);
<CODEBLOCK_2>"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT);
<PERCENT_BRACE_ACTION,CODEBLOCK_2>{OPTWS}"%}".* bracelevel = 0;
<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"reject" {
ACTION_ECHO;
CHECK_REJECT(yytext);
}
<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"yymore" {
ACTION_ECHO;
CHECK_YYMORE(yytext);
}
<PERCENT_BRACE_ACTION,CODEBLOCK_2>{NAME}|{NOT_NAME}|. ACTION_ECHO;
<PERCENT_BRACE_ACTION,CODEBLOCK_2>{NL} {
++linenum;
ACTION_ECHO;
if ( bracelevel == 0 ||
(doing_codeblock && indented_code) )
{
if ( ! doing_codeblock )
add_action( "\tYY_BREAK\n" );
doing_codeblock = false;
BEGIN(SECT2);
}
}
/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
<ACTION>"{" ACTION_ECHO; ++bracelevel;
<ACTION>"}" ACTION_ECHO; --bracelevel;
<ACTION>[^a-z_{}"'/\n]+ ACTION_ECHO;
<ACTION>{NAME} ACTION_ECHO;
<ACTION>"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT);
<ACTION>"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
<ACTION>\" ACTION_ECHO; BEGIN(ACTION_STRING);
<ACTION>{NL} {
++linenum;
ACTION_ECHO;
if ( bracelevel == 0 )
{
add_action( "\tYY_BREAK\n" );
BEGIN(SECT2);
}
}
<ACTION>. ACTION_ECHO;
<ACTION_COMMENT>"*/" {
ACTION_ECHO;
if ( doing_codeblock )
BEGIN(CODEBLOCK_2);
else
BEGIN(ACTION);
}
<ACTION_COMMENT>"*" ACTION_ECHO;
<ACTION_COMMENT>[^*\n]+ ACTION_ECHO;
<ACTION_COMMENT>[^*\n]*{NL} ++linenum; ACTION_ECHO;
<ACTION_STRING>[^"\\\n]+ ACTION_ECHO;
<ACTION_STRING>\\. ACTION_ECHO;
<ACTION_STRING>{NL} ++linenum; ACTION_ECHO;
<ACTION_STRING>\" ACTION_ECHO; BEGIN(ACTION);
<ACTION_STRING>. ACTION_ECHO;
<ACTION,ACTION_COMMENT,ACTION_STRING><<EOF>> {
synerr( "EOF encountered inside an action" );
yyterminate();
}
<SECT2,QUOTE,CCL>{ESCSEQ} {
yylval = myesc( (Char *) yytext );
return CHAR;
}
<FIRSTCCL>{ESCSEQ} {
yylval = myesc( (Char *) yytext );
BEGIN(CCL);
return CHAR;
}
<SECT3>.*(\n?) ECHO;
<SECT3><<EOF>> sectnum = 0; yyterminate();
<*>.|\n format_synerr( "bad character: %s", yytext );
%%
int yywrap()
{
if ( --num_input_files > 0 )
{
set_input_file( *++input_files );
return 0;
}
else
return 1;
}
/* set_input_file - open the given file (if NULL, stdin) for scanning */
void set_input_file( file )
char *file;
{
if ( file )
{
infilename = file;
yyin = fopen( infilename, "r" );
if ( yyin == NULL )
lerrsf( "can't open %s", file );
}
else
{
yyin = stdin;
infilename = "<stdin>";
}
}
/* Wrapper routines for accessing the scanner's malloc routines. */
void *flex_alloc( size )
unsigned int size;
{
return yy_flex_alloc( size );
}
void *flex_realloc( ptr, size )
void *ptr;
unsigned int size;
{
return yy_flex_realloc( ptr, size );
}
void flex_free( ptr )
void *ptr;
{
yy_flex_free( ptr );
}

1232
usr.bin/lex/skel.c Normal file

File diff suppressed because it is too large Load Diff

262
usr.bin/lex/sym.c Normal file
View File

@ -0,0 +1,262 @@
/* sym - symbol table routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: sym.c,v 1.2 94/01/04 14:33:06 vern Exp $ */
#include "flexdef.h"
/* declare functions that have forward references */
int hashfunct PROTO((register char[], int));
struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE];
struct hash_entry *sctbl[START_COND_HASH_SIZE];
struct hash_entry *ccltab[CCL_HASH_SIZE];
struct hash_entry *findsym();
/* addsym - add symbol and definitions to symbol table
*
* -1 is returned if the symbol already exists, and the change not made.
*/
int addsym( sym, str_def, int_def, table, table_size )
register char sym[];
char *str_def;
int int_def;
hash_table table;
int table_size;
{
int hash_val = hashfunct( sym, table_size );
register struct hash_entry *sym_entry = table[hash_val];
register struct hash_entry *new_entry;
register struct hash_entry *successor;
while ( sym_entry )
{
if ( ! strcmp( sym, sym_entry->name ) )
{ /* entry already exists */
return -1;
}
sym_entry = sym_entry->next;
}
/* create new entry */
new_entry = (struct hash_entry *)
flex_alloc( sizeof( struct hash_entry ) );
if ( new_entry == NULL )
flexfatal( "symbol table memory allocation failed" );
if ( (successor = table[hash_val]) )
{
new_entry->next = successor;
successor->prev = new_entry;
}
else
new_entry->next = NULL;
new_entry->prev = NULL;
new_entry->name = sym;
new_entry->str_val = str_def;
new_entry->int_val = int_def;
table[hash_val] = new_entry;
return 0;
}
/* cclinstal - save the text of a character class */
void cclinstal( ccltxt, cclnum )
Char ccltxt[];
int cclnum;
{
/* We don't bother checking the return status because we are not
* called unless the symbol is new.
*/
Char *copy_unsigned_string();
(void) addsym( (char *) copy_unsigned_string( ccltxt ),
(char *) 0, cclnum,
ccltab, CCL_HASH_SIZE );
}
/* ccllookup - lookup the number associated with character class text
*
* Returns 0 if there's no CCL associated with the text.
*/
int ccllookup( ccltxt )
Char ccltxt[];
{
return findsym( (char *) ccltxt, ccltab, CCL_HASH_SIZE )->int_val;
}
/* findsym - find symbol in symbol table */
struct hash_entry *findsym( sym, table, table_size )
register char sym[];
hash_table table;
int table_size;
{
static struct hash_entry empty_entry =
{
(struct hash_entry *) 0, (struct hash_entry *) 0,
(char *) 0, (char *) 0, 0,
} ;
register struct hash_entry *sym_entry =
table[hashfunct( sym, table_size )];
while ( sym_entry )
{
if ( ! strcmp( sym, sym_entry->name ) )
return sym_entry;
sym_entry = sym_entry->next;
}
return &empty_entry;
}
/* hashfunct - compute the hash value for "str" and hash size "hash_size" */
int hashfunct( str, hash_size )
register char str[];
int hash_size;
{
register int hashval;
register int locstr;
hashval = 0;
locstr = 0;
while ( str[locstr] )
{
hashval = (hashval << 1) + (unsigned char) str[locstr++];
hashval %= hash_size;
}
return hashval;
}
/* ndinstal - install a name definition */
void ndinstal( name, definition )
char name[];
Char definition[];
{
char *copy_string();
Char *copy_unsigned_string();
if ( addsym( copy_string( name ),
(char *) copy_unsigned_string( definition ), 0,
ndtbl, NAME_TABLE_HASH_SIZE ) )
synerr( "name defined twice" );
}
/* ndlookup - lookup a name definition
*
* Returns a nil pointer if the name definition does not exist.
*/
Char *ndlookup( nd )
char nd[];
{
return (Char *) findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val;
}
/* scextend - increase the maximum number of start conditions */
void scextend()
{
current_max_scs += MAX_SCS_INCREMENT;
++num_reallocs;
scset = reallocate_integer_array( scset, current_max_scs );
scbol = reallocate_integer_array( scbol, current_max_scs );
scxclu = reallocate_integer_array( scxclu, current_max_scs );
sceof = reallocate_integer_array( sceof, current_max_scs );
scname = reallocate_char_ptr_array( scname, current_max_scs );
actvsc = reallocate_integer_array( actvsc, current_max_scs );
}
/* scinstal - make a start condition
*
* NOTE
* The start condition is "exclusive" if xcluflg is true.
*/
void scinstal( str, xcluflg )
char str[];
int xcluflg;
{
char *copy_string();
/* Generate start condition definition, for use in BEGIN et al. */
printf( "#define %s %d\n", str, lastsc );
if ( ++lastsc >= current_max_scs )
scextend();
scname[lastsc] = copy_string( str );
if ( addsym( scname[lastsc], (char *) 0, lastsc,
sctbl, START_COND_HASH_SIZE ) )
format_pinpoint_message( "start condition %s declared twice",
str );
scset[lastsc] = mkstate( SYM_EPSILON );
scbol[lastsc] = mkstate( SYM_EPSILON );
scxclu[lastsc] = xcluflg;
sceof[lastsc] = false;
}
/* sclookup - lookup the number associated with a start condition
*
* Returns 0 if no such start condition.
*/
int sclookup( str )
char str[];
{
return findsym( str, sctbl, START_COND_HASH_SIZE )->int_val;
}

888
usr.bin/lex/tblcmp.c Normal file
View File

@ -0,0 +1,888 @@
/* tblcmp - table compression routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: /home/daffy/u0/vern/flex/RCS/tblcmp.c,v 2.10 93/12/07 10:18:30 vern Exp $ */
#include "flexdef.h"
/* declarations for functions that have forward references */
void mkentry PROTO((register int*, int, int, int, int));
void mkprot PROTO((int[], int, int));
void mktemplate PROTO((int[], int, int));
void mv2front PROTO((int));
int tbldiff PROTO((int[], int, int[]));
/* bldtbl - build table entries for dfa state
*
* synopsis
* int state[numecs], statenum, totaltrans, comstate, comfreq;
* bldtbl( state, statenum, totaltrans, comstate, comfreq );
*
* State is the statenum'th dfa state. It is indexed by equivalence class and
* gives the number of the state to enter for a given equivalence class.
* totaltrans is the total number of transitions out of the state. Comstate
* is that state which is the destination of the most transitions out of State.
* Comfreq is how many transitions there are out of State to Comstate.
*
* A note on terminology:
* "protos" are transition tables which have a high probability of
* either being redundant (a state processed later will have an identical
* transition table) or nearly redundant (a state processed later will have
* many of the same out-transitions). A "most recently used" queue of
* protos is kept around with the hope that most states will find a proto
* which is similar enough to be usable, and therefore compacting the
* output tables.
* "templates" are a special type of proto. If a transition table is
* homogeneous or nearly homogeneous (all transitions go to the same
* destination) then the odds are good that future states will also go
* to the same destination state on basically the same character set.
* These homogeneous states are so common when dealing with large rule
* sets that they merit special attention. If the transition table were
* simply made into a proto, then (typically) each subsequent, similar
* state will differ from the proto for two out-transitions. One of these
* out-transitions will be that character on which the proto does not go
* to the common destination, and one will be that character on which the
* state does not go to the common destination. Templates, on the other
* hand, go to the common state on EVERY transition character, and therefore
* cost only one difference.
*/
void bldtbl( state, statenum, totaltrans, comstate, comfreq )
int state[], statenum, totaltrans, comstate, comfreq;
{
int extptr, extrct[2][CSIZE + 1];
int mindiff, minprot, i, d;
/* If extptr is 0 then the first array of extrct holds the result
* of the "best difference" to date, which is those transitions
* which occur in "state" but not in the proto which, to date,
* has the fewest differences between itself and "state". If
* extptr is 1 then the second array of extrct hold the best
* difference. The two arrays are toggled between so that the
* best difference to date can be kept around and also a difference
* just created by checking against a candidate "best" proto.
*/
extptr = 0;
/* If the state has too few out-transitions, don't bother trying to
* compact its tables.
*/
if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
else
{
/* "checkcom" is true if we should only check "state" against
* protos which have the same "comstate" value.
*/
int checkcom =
comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
minprot = firstprot;
mindiff = totaltrans;
if ( checkcom )
{
/* Find first proto which has the same "comstate". */
for ( i = firstprot; i != NIL; i = protnext[i] )
if ( protcomst[i] == comstate )
{
minprot = i;
mindiff = tbldiff( state, minprot,
extrct[extptr] );
break;
}
}
else
{
/* Since we've decided that the most common destination
* out of "state" does not occur with a high enough
* frequency, we set the "comstate" to zero, assuring
* that if this state is entered into the proto list,
* it will not be considered a template.
*/
comstate = 0;
if ( firstprot != NIL )
{
minprot = firstprot;
mindiff = tbldiff( state, minprot,
extrct[extptr] );
}
}
/* We now have the first interesting proto in "minprot". If
* it matches within the tolerances set for the first proto,
* we don't want to bother scanning the rest of the proto list
* to see if we have any other reasonable matches.
*/
if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
{
/* Not a good enough match. Scan the rest of the
* protos.
*/
for ( i = minprot; i != NIL; i = protnext[i] )
{
d = tbldiff( state, i, extrct[1 - extptr] );
if ( d < mindiff )
{
extptr = 1 - extptr;
mindiff = d;
minprot = i;
}
}
}
/* Check if the proto we've decided on as our best bet is close
* enough to the state we want to match to be usable.
*/
if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
{
/* No good. If the state is homogeneous enough,
* we make a template out of it. Otherwise, we
* make a proto.
*/
if ( comfreq * 100 >=
totaltrans * TEMPLATE_SAME_PERCENTAGE )
mktemplate( state, statenum, comstate );
else
{
mkprot( state, statenum, comstate );
mkentry( state, numecs, statenum,
JAMSTATE, totaltrans );
}
}
else
{ /* use the proto */
mkentry( extrct[extptr], numecs, statenum,
prottbl[minprot], mindiff );
/* If this state was sufficiently different from the
* proto we built it from, make it, too, a proto.
*/
if ( mindiff * 100 >=
totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
mkprot( state, statenum, comstate );
/* Since mkprot added a new proto to the proto queue,
* it's possible that "minprot" is no longer on the
* proto queue (if it happened to have been the last
* entry, it would have been bumped off). If it's
* not there, then the new proto took its physical
* place (though logically the new proto is at the
* beginning of the queue), so in that case the
* following call will do nothing.
*/
mv2front( minprot );
}
}
}
/* cmptmps - compress template table entries
*
* Template tables are compressed by using the 'template equivalence
* classes', which are collections of transition character equivalence
* classes which always appear together in templates - really meta-equivalence
* classes.
*/
void cmptmps()
{
int tmpstorage[CSIZE + 1];
register int *tmp = tmpstorage, i, j;
int totaltrans, trans;
peakpairs = numtemps * numecs + tblend;
if ( usemecs )
{
/* Create equivalence classes based on data gathered on
* template transitions.
*/
nummecs = cre8ecs( tecfwd, tecbck, numecs );
}
else
nummecs = numecs;
while ( lastdfa + numtemps + 1 >= current_max_dfas )
increase_max_dfas();
/* Loop through each template. */
for ( i = 1; i <= numtemps; ++i )
{
/* Number of non-jam transitions out of this template. */
totaltrans = 0;
for ( j = 1; j <= numecs; ++j )
{
trans = tnxt[numecs * i + j];
if ( usemecs )
{
/* The absolute value of tecbck is the
* meta-equivalence class of a given
* equivalence class, as set up by cre8ecs().
*/
if ( tecbck[j] > 0 )
{
tmp[tecbck[j]] = trans;
if ( trans > 0 )
++totaltrans;
}
}
else
{
tmp[j] = trans;
if ( trans > 0 )
++totaltrans;
}
}
/* It is assumed (in a rather subtle way) in the skeleton
* that if we're using meta-equivalence classes, the def[]
* entry for all templates is the jam template, i.e.,
* templates never default to other non-jam table entries
* (e.g., another template)
*/
/* Leave room for the jam-state after the last real state. */
mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
}
}
/* expand_nxt_chk - expand the next check arrays */
void expand_nxt_chk()
{
register int old_max = current_max_xpairs;
current_max_xpairs += MAX_XPAIRS_INCREMENT;
++num_reallocs;
nxt = reallocate_integer_array( nxt, current_max_xpairs );
chk = reallocate_integer_array( chk, current_max_xpairs );
zero_out( (char *) (chk + old_max),
MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) );
}
/* find_table_space - finds a space in the table for a state to be placed
*
* synopsis
* int *state, numtrans, block_start;
* int find_table_space();
*
* block_start = find_table_space( state, numtrans );
*
* State is the state to be added to the full speed transition table.
* Numtrans is the number of out-transitions for the state.
*
* find_table_space() returns the position of the start of the first block (in
* chk) able to accommodate the state
*
* In determining if a state will or will not fit, find_table_space() must take
* into account the fact that an end-of-buffer state will be added at [0],
* and an action number will be added in [-1].
*/
int find_table_space( state, numtrans )
int *state, numtrans;
{
/* Firstfree is the position of the first possible occurrence of two
* consecutive unused records in the chk and nxt arrays.
*/
register int i;
register int *state_ptr, *chk_ptr;
register int *ptr_to_last_entry_in_state;
/* If there are too many out-transitions, put the state at the end of
* nxt and chk.
*/
if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT )
{
/* If table is empty, return the first available spot in
* chk/nxt, which should be 1.
*/
if ( tblend < 2 )
return 1;
/* Start searching for table space near the end of
* chk/nxt arrays.
*/
i = tblend - numecs;
}
else
/* Start searching for table space from the beginning
* (skipping only the elements which will definitely not
* hold the new state).
*/
i = firstfree;
while ( 1 ) /* loops until a space is found */
{
while ( i + numecs >= current_max_xpairs )
expand_nxt_chk();
/* Loops until space for end-of-buffer and action number
* are found.
*/
while ( 1 )
{
/* Check for action number space. */
if ( chk[i - 1] == 0 )
{
/* Check for end-of-buffer space. */
if ( chk[i] == 0 )
break;
else
/* Since i != 0, there is no use
* checking to see if (++i) - 1 == 0,
* because that's the same as i == 0,
* so we skip a space.
*/
i += 2;
}
else
++i;
while ( i + numecs >= current_max_xpairs )
expand_nxt_chk();
}
/* If we started search from the beginning, store the new
* firstfree for the next call of find_table_space().
*/
if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT )
firstfree = i + 1;
/* Check to see if all elements in chk (and therefore nxt)
* that are needed for the new state have not yet been taken.
*/
state_ptr = &state[1];
ptr_to_last_entry_in_state = &chk[i + numecs + 1];
for ( chk_ptr = &chk[i + 1];
chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr )
if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
break;
if ( chk_ptr == ptr_to_last_entry_in_state )
return i;
else
++i;
}
}
/* inittbl - initialize transition tables
*
* Initializes "firstfree" to be one beyond the end of the table. Initializes
* all "chk" entries to be zero.
*/
void inittbl()
{
register int i;
zero_out( (char *) chk,
current_max_xpairs * sizeof( int ) / sizeof( char ) );
tblend = 0;
firstfree = tblend + 1;
numtemps = 0;
if ( usemecs )
{
/* Set up doubly-linked meta-equivalence classes; these
* are sets of equivalence classes which all have identical
* transitions out of TEMPLATES.
*/
tecbck[1] = NIL;
for ( i = 2; i <= numecs; ++i )
{
tecbck[i] = i - 1;
tecfwd[i - 1] = i;
}
tecfwd[numecs] = NIL;
}
}
/* mkdeftbl - make the default, "jam" table entries */
void mkdeftbl()
{
int i;
jamstate = lastdfa + 1;
++tblend; /* room for transition on end-of-buffer character */
while ( tblend + numecs >= current_max_xpairs )
expand_nxt_chk();
/* Add in default end-of-buffer transition. */
nxt[tblend] = end_of_buffer_state;
chk[tblend] = jamstate;
for ( i = 1; i <= numecs; ++i )
{
nxt[tblend + i] = 0;
chk[tblend + i] = jamstate;
}
jambase = tblend;
base[jamstate] = jambase;
def[jamstate] = 0;
tblend += numecs;
++numtemps;
}
/* mkentry - create base/def and nxt/chk entries for transition array
*
* synopsis
* int state[numchars + 1], numchars, statenum, deflink, totaltrans;
* mkentry( state, numchars, statenum, deflink, totaltrans );
*
* "state" is a transition array "numchars" characters in size, "statenum"
* is the offset to be used into the base/def tables, and "deflink" is the
* entry to put in the "def" table entry. If "deflink" is equal to
* "JAMSTATE", then no attempt will be made to fit zero entries of "state"
* (i.e., jam entries) into the table. It is assumed that by linking to
* "JAMSTATE" they will be taken care of. In any case, entries in "state"
* marking transitions to "SAME_TRANS" are treated as though they will be
* taken care of by whereever "deflink" points. "totaltrans" is the total
* number of transitions out of the state. If it is below a certain threshold,
* the tables are searched for an interior spot that will accommodate the
* state array.
*/
void mkentry( state, numchars, statenum, deflink, totaltrans )
register int *state;
int numchars, statenum, deflink, totaltrans;
{
register int minec, maxec, i, baseaddr;
int tblbase, tbllast;
if ( totaltrans == 0 )
{ /* there are no out-transitions */
if ( deflink == JAMSTATE )
base[statenum] = JAMSTATE;
else
base[statenum] = 0;
def[statenum] = deflink;
return;
}
for ( minec = 1; minec <= numchars; ++minec )
{
if ( state[minec] != SAME_TRANS )
if ( state[minec] != 0 || deflink != JAMSTATE )
break;
}
if ( totaltrans == 1 )
{
/* There's only one out-transition. Save it for later to fill
* in holes in the tables.
*/
stack1( statenum, minec, state[minec], deflink );
return;
}
for ( maxec = numchars; maxec > 0; --maxec )
{
if ( state[maxec] != SAME_TRANS )
if ( state[maxec] != 0 || deflink != JAMSTATE )
break;
}
/* Whether we try to fit the state table in the middle of the table
* entries we have already generated, or if we just take the state
* table at the end of the nxt/chk tables, we must make sure that we
* have a valid base address (i.e., non-negative). Note that
* negative base addresses dangerous at run-time (because indexing
* the nxt array with one and a low-valued character will access
* memory before the start of the array.
*/
/* Find the first transition of state that we need to worry about. */
if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
{
/* Attempt to squeeze it into the middle of the tables. */
baseaddr = firstfree;
while ( baseaddr < minec )
{
/* Using baseaddr would result in a negative base
* address below; find the next free slot.
*/
for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
;
}
while ( baseaddr + maxec - minec + 1 >= current_max_xpairs )
expand_nxt_chk();
for ( i = minec; i <= maxec; ++i )
if ( state[i] != SAME_TRANS &&
(state[i] != 0 || deflink != JAMSTATE) &&
chk[baseaddr + i - minec] != 0 )
{ /* baseaddr unsuitable - find another */
for ( ++baseaddr;
baseaddr < current_max_xpairs &&
chk[baseaddr] != 0; ++baseaddr )
;
while ( baseaddr + maxec - minec + 1 >=
current_max_xpairs )
expand_nxt_chk();
/* Reset the loop counter so we'll start all
* over again next time it's incremented.
*/
i = minec - 1;
}
}
else
{
/* Ensure that the base address we eventually generate is
* non-negative.
*/
baseaddr = MAX( tblend + 1, minec );
}
tblbase = baseaddr - minec;
tbllast = tblbase + maxec;
while ( tbllast + 1 >= current_max_xpairs )
expand_nxt_chk();
base[statenum] = tblbase;
def[statenum] = deflink;
for ( i = minec; i <= maxec; ++i )
if ( state[i] != SAME_TRANS )
if ( state[i] != 0 || deflink != JAMSTATE )
{
nxt[tblbase + i] = state[i];
chk[tblbase + i] = statenum;
}
if ( baseaddr == firstfree )
/* Find next free slot in tables. */
for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
;
tblend = MAX( tblend, tbllast );
}
/* mk1tbl - create table entries for a state (or state fragment) which
* has only one out-transition
*/
void mk1tbl( state, sym, onenxt, onedef )
int state, sym, onenxt, onedef;
{
if ( firstfree < sym )
firstfree = sym;
while ( chk[firstfree] != 0 )
if ( ++firstfree >= current_max_xpairs )
expand_nxt_chk();
base[state] = firstfree - sym;
def[state] = onedef;
chk[firstfree] = state;
nxt[firstfree] = onenxt;
if ( firstfree > tblend )
{
tblend = firstfree++;
if ( firstfree >= current_max_xpairs )
expand_nxt_chk();
}
}
/* mkprot - create new proto entry */
void mkprot( state, statenum, comstate )
int state[], statenum, comstate;
{
int i, slot, tblbase;
if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
{
/* Gotta make room for the new proto by dropping last entry in
* the queue.
*/
slot = lastprot;
lastprot = protprev[lastprot];
protnext[lastprot] = NIL;
}
else
slot = numprots;
protnext[slot] = firstprot;
if ( firstprot != NIL )
protprev[firstprot] = slot;
firstprot = slot;
prottbl[slot] = statenum;
protcomst[slot] = comstate;
/* Copy state into save area so it can be compared with rapidly. */
tblbase = numecs * (slot - 1);
for ( i = 1; i <= numecs; ++i )
protsave[tblbase + i] = state[i];
}
/* mktemplate - create a template entry based on a state, and connect the state
* to it
*/
void mktemplate( state, statenum, comstate )
int state[], statenum, comstate;
{
int i, numdiff, tmpbase, tmp[CSIZE + 1];
Char transset[CSIZE + 1];
int tsptr;
++numtemps;
tsptr = 0;
/* Calculate where we will temporarily store the transition table
* of the template in the tnxt[] array. The final transition table
* gets created by cmptmps().
*/
tmpbase = numtemps * numecs;
if ( tmpbase + numecs >= current_max_template_xpairs )
{
current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
++num_reallocs;
tnxt = reallocate_integer_array( tnxt,
current_max_template_xpairs );
}
for ( i = 1; i <= numecs; ++i )
if ( state[i] == 0 )
tnxt[tmpbase + i] = 0;
else
{
transset[tsptr++] = i;
tnxt[tmpbase + i] = comstate;
}
if ( usemecs )
mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 );
mkprot( tnxt + tmpbase, -numtemps, comstate );
/* We rely on the fact that mkprot adds things to the beginning
* of the proto queue.
*/
numdiff = tbldiff( state, firstprot, tmp );
mkentry( tmp, numecs, statenum, -numtemps, numdiff );
}
/* mv2front - move proto queue element to front of queue */
void mv2front( qelm )
int qelm;
{
if ( firstprot != qelm )
{
if ( qelm == lastprot )
lastprot = protprev[lastprot];
protnext[protprev[qelm]] = protnext[qelm];
if ( protnext[qelm] != NIL )
protprev[protnext[qelm]] = protprev[qelm];
protprev[qelm] = NIL;
protnext[qelm] = firstprot;
protprev[firstprot] = qelm;
firstprot = qelm;
}
}
/* place_state - place a state into full speed transition table
*
* State is the statenum'th state. It is indexed by equivalence class and
* gives the number of the state to enter for a given equivalence class.
* Transnum is the number of out-transitions for the state.
*/
void place_state( state, statenum, transnum )
int *state, statenum, transnum;
{
register int i;
register int *state_ptr;
int position = find_table_space( state, transnum );
/* "base" is the table of start positions. */
base[statenum] = position;
/* Put in action number marker; this non-zero number makes sure that
* find_table_space() knows that this position in chk/nxt is taken
* and should not be used for another accepting number in another
* state.
*/
chk[position - 1] = 1;
/* Put in end-of-buffer marker; this is for the same purposes as
* above.
*/
chk[position] = 1;
/* Place the state into chk and nxt. */
state_ptr = &state[1];
for ( i = 1; i <= numecs; ++i, ++state_ptr )
if ( *state_ptr != 0 )
{
chk[position + i] = i;
nxt[position + i] = *state_ptr;
}
if ( position + numecs > tblend )
tblend = position + numecs;
}
/* stack1 - save states with only one out-transition to be processed later
*
* If there's room for another state on the "one-transition" stack, the
* state is pushed onto it, to be processed later by mk1tbl. If there's
* no room, we process the sucker right now.
*/
void stack1( statenum, sym, nextstate, deflink )
int statenum, sym, nextstate, deflink;
{
if ( onesp >= ONE_STACK_SIZE - 1 )
mk1tbl( statenum, sym, nextstate, deflink );
else
{
++onesp;
onestate[onesp] = statenum;
onesym[onesp] = sym;
onenext[onesp] = nextstate;
onedef[onesp] = deflink;
}
}
/* tbldiff - compute differences between two state tables
*
* "state" is the state array which is to be extracted from the pr'th
* proto. "pr" is both the number of the proto we are extracting from
* and an index into the save area where we can find the proto's complete
* state table. Each entry in "state" which differs from the corresponding
* entry of "pr" will appear in "ext".
*
* Entries which are the same in both "state" and "pr" will be marked
* as transitions to "SAME_TRANS" in "ext". The total number of differences
* between "state" and "pr" is returned as function value. Note that this
* number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
*/
int tbldiff( state, pr, ext )
int state[], pr, ext[];
{
register int i, *sp = state, *ep = ext, *protp;
register int numdiff = 0;
protp = &protsave[numecs * (pr - 1)];
for ( i = numecs; i > 0; --i )
{
if ( *++protp == *++sp )
*++ep = SAME_TRANS;
else
{
*++ep = *sp;
++numdiff;
}
}
return numdiff;
}

1
usr.bin/lex/version.h Normal file
View File

@ -0,0 +1 @@
#define FLEX_VERSION "2.4.7"

199
usr.bin/lex/yylex.c Normal file
View File

@ -0,0 +1,199 @@
/* yylex - scanner front-end for flex */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header: /home/daffy/u0/vern/flex/RCS/yylex.c,v 2.10 93/09/16 20:31:48 vern Exp $ */
#include <ctype.h>
#include "flexdef.h"
#include "parse.h"
/* yylex - scan for a regular expression token */
int yylex()
{
int toktype;
static int beglin = false;
if ( eofseen )
toktype = EOF;
else
toktype = flexscan();
if ( toktype == EOF || toktype == 0 )
{
eofseen = 1;
if ( sectnum == 1 )
{
synerr( "premature EOF" );
sectnum = 2;
toktype = SECTEND;
}
else
toktype = 0;
}
if ( trace )
{
if ( beglin )
{
fprintf( stderr, "%d\t", num_rules + 1 );
beglin = 0;
}
switch ( toktype )
{
case '<':
case '>':
case '^':
case '$':
case '"':
case '[':
case ']':
case '{':
case '}':
case '|':
case '(':
case ')':
case '-':
case '/':
case '\\':
case '?':
case '.':
case '*':
case '+':
case ',':
(void) putc( toktype, stderr );
break;
case '\n':
(void) putc( '\n', stderr );
if ( sectnum == 2 )
beglin = 1;
break;
case SCDECL:
fputs( "%s", stderr );
break;
case XSCDECL:
fputs( "%x", stderr );
break;
case WHITESPACE:
(void) putc( ' ', stderr );
break;
case SECTEND:
fputs( "%%\n", stderr );
/* We set beglin to be true so we'll start
* writing out numbers as we echo rules.
* flexscan() has already assigned sectnum.
*/
if ( sectnum == 2 )
beglin = 1;
break;
case NAME:
fprintf( stderr, "'%s'", nmstr );
break;
case CHAR:
switch ( yylval )
{
case '<':
case '>':
case '^':
case '$':
case '"':
case '[':
case ']':
case '{':
case '}':
case '|':
case '(':
case ')':
case '-':
case '/':
case '\\':
case '?':
case '.':
case '*':
case '+':
case ',':
fprintf( stderr, "\\%c",
yylval );
break;
default:
if ( ! isascii( yylval ) ||
! isprint( yylval ) )
fprintf( stderr,
"\\%.3o",
(unsigned int) yylval );
else
(void) putc( yylval,
stderr );
break;
}
break;
case NUMBER:
fprintf( stderr, "%d", yylval );
break;
case PREVCCL:
fprintf( stderr, "[%d]", yylval );
break;
case EOF_OP:
fprintf( stderr, "<<EOF>>" );
break;
case 0:
fprintf( stderr, "End Marker" );
break;
default:
fprintf( stderr,
"*Something Weird* - tok: %d val: %d\n",
toktype, yylval );
break;
}
}
return toktype;
}