/* mml-iopf.c -- Simple I/O, parsing, formatting for "mml" package

	Copyright 2003,2004,2005,2006
		by Mark E. Mallett, MV Communications, Inc.

	See the "LICENSE" file for terms.

Contains a number of functions randomly related to input, output,
  formatting, converting, and parsing.

Note: various functions have a single letter prefix indicating the
  input source, e.g. 'f' for FILE, 'i' for MML_IN.  In general I
  like to order the functions alphabetically in the file; in these
  cases the primary ordering is done without regard to the first
  letter, and all related functions are grouped together with the
  same preamble.

*/

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <ctype.h>
#include <time.h>

#include <mml/mml.h>

#include <mml/mml-err.h>
#include <mml/mml-ll.h>
#include <mml/mml-str.h>
#include <mml/mml-alloc.h>
#include <mml/mml-in.h>
#include <mml/mml-iopf.h>


/* Local definitions */

    /* Flags for the Bytemask table entries. */
#define	BYTE_VIS	0x01		/* A visible separator */
#define	BYTE_INVIS	0x02		/* An invisible separator */


/* External data referenced */



/* External routines used */



/* Local data publicly available */



/* Local routines and forward references */

    /* Mark bytemask entries from a NUL-terminated string. */
static	void	bytemask_str_mark PROTO( (UBYTE *strP, UBYTE value) );

    /* Set bytemask entries from a NUL-terminated string. */
static	void	bytemask_str_set PROTO( (UBYTE *strP, UBYTE value) );

    /* Get a cheap and temporary  MML_IN handle from a FILE pointer */
static	MML_IN	*get_i_from_f PROTO( (FILE *fP) );

    /* Release a MML_IN handle acquired from get_i_from_f() */
static	void	rel_i_from_f PROTO( (MML_IN *inP, FILE *fP) );

/* Private data */

    /* An array that is used for flags for every possible BYTE value.
        BYTE_XXX flags are defined above; not all values are useful
	in all contexts.  This array is always assumed to be set to
	zero unless it is being used; functions that use this table
	typically set only the entries they need to mark, and then
	unmark them when finished.  The theory is that this may be
	slightly faster than always clearing every byte before use.
    */
static	UBYTE	Bytemask[256];

    /* And since the Bytemask array is static, it must be protected
       while it is in use..
    */
#ifdef	MML_PTHREADS
static	pthread_mutex_t	Bytemask_lock = PTHREAD_MUTEX_INITIALIZER;
#endif	/* MML_PTHREADS */


    /* Static mml-in structures.  We use insider knowledge of these
       things, so be sure to keep this in sync with mml-in.h
    */
static	MML_IN	Priv_mml_in_f = {	/* Input handle for get_i_from_f */
	FALSE,			/* in_lescF: initialized dynamically */
	FALSE,			/* in_eolF: initialized dynamically */
	FALSE,			/* in_bolF: initialized dynamically */
	NULL,			/* in_streamP: initialized dynamically */
	0,			/* in_level: initialized dynamically */
	0,			/* in_clineN: initialized dynamically */
	0,			/* in_result: initialized dynamically */
	0			/* in_errno: initialized dynamically */
};


static	MML_INS	Priv_mml_ins_f = {	/* file stream for get_i_from_f */
	&Priv_mml_in_f,		/* is_inP: the input stack */
	NULL,			/* is_prevP: No next stream */
	0,			/* is_pbc: initialized dynamically */
	in_h_file_getc,		/* is_getcP: routine to call */
	NULL,			/* is_finishP: finish function for handle */
	NULL,			/* is_handleP: initialized dynamically */
	NULL,			/* is_nameP: name, none */
	0,			/* is_lineN: initialized dynamically */
	FALSE			/* is_poolF: always FALSE */
};

    /* The i_from_f access has to be protected since it is static. */
#ifdef	MML_PTHREADS
static	pthread_mutex_t I_from_f_lock = PTHREAD_MUTEX_INITIALIZER;
#endif	/* MML_PTHREADS */

/*

*//* <x>getbline( <input>, bsP )

	Input a line of text from an input source.
	<x> is 'f' for FILE, 'i' for MML_IN.

Accepts :

	<input>		Ptr to input handle, per <x>
	bsP		Ptr to byte string object for the data
	
Returns :

	<value>		Number of characters read (-1 if EOF)

Notes :

	This just calls <x>getbtrec with our default record control
	flags for a line of text.

*/

int
fgetbline ARGLIST( ( fP, bsP ) )
   NFARG( FILE		*fP )		/* File ptr */
    FARG( BSTR		*bsP )		/* Byte string ptr */
{
    return ( fgetbtrec( fP, bsP, TXR_LINE, NULL ) );
}

int
igetbline ARGLIST( ( inP, bsP ) )
   NFARG( MML_IN	*inP )		/* Input ptr */
    FARG( BSTR		*bsP )		/* Byte string ptr */
{
    return ( igetbtrec( inP, bsP, TXR_LINE, NULL ) );
}
/*

*//* <x>getblinea( <input>, bsP )

	Input a line of text from an input source, appending to buffer.
	<x> is 'f' for FILE, 'i' for MML_IN

Accepts :

	<input>		Ptr to input handle, per <x>
	bsP		Ptr to byte string object for the data
	
Returns :

	<value>		Number of characters read (-1 if EOF)

Notes :

	This just calls <x>getbtrec with our default record control
	flags for a line of text.

*/

int
fgetblinea ARGLIST( ( fP, bsP ) )
   NFARG( FILE		*fP )		/* File ptr */
    FARG( BSTR		*bsP )		/* Byte string ptr */
{
    return ( fgetbtreca( fP, bsP, TXR_LINE, NULL ) );
}

int
igetblinea ARGLIST( ( inP, bsP ) )
   NFARG( MML_IN	*inP )		/* Input ptr */
    FARG( BSTR		*bsP )		/* Byte string ptr */
{
    return ( igetbtreca( inP, bsP, TXR_LINE, NULL ) );
}
/*

*//* <x>getblinec( <input>, bsP )

	Input a line of text from an input source, plus any continuation lines
	<x> is 'f' for FILE, 'i' for MML_IN

Accepts :

	<input>		Ptr to input handle, per <x>
	bsP		Ptr to byte string object for the data
	
Returns :

	<value>		Number of characters read (-1 if EOF)

Notes :

	This just calls <x>getbtrecc with our default record control
	flags for a line of text.

*/

int
fgetblinec ARGLIST( ( fP, bsP ) )
   NFARG( FILE		*fP )		/* File ptr */
    FARG( BSTR		*bsP )		/* Byte string ptr */
{
    return ( fgetbtrecc( fP, bsP, TXR_LINE, NULL ) );
}

int
igetblinec ARGLIST( ( inP, bsP ) )
   NFARG( MML_IN	*inP )		/* Input ptr */
    FARG( BSTR		*bsP )		/* Byte string ptr */
{
    return ( igetbtrecc( inP, bsP, TXR_LINE, NULL ) );
}
/*

*//* <x>gettkbline( <input>, bsP, tknsP );

	Input a line of text from an input source and tokenize it.
	<x> is 'f' for FILE, 'i' for MML_IN

Accepts :

	<input>		Ptr to input handle, per <x>
	bsP		Ptr to byte string object for string
	tknsP		Ptr to token pool object 
	
Returns :

	<value>		Number of characters read (-1 if EOF)
	*tknsP		<filled in with token info>

Notes :

	Excess input (up to a newline) is ignored (byte string
	   buffer will indicate an overflow)

*/

int
fgettkbline ARGLIST( ( fP, bsP, tknsP ) )
    NFARG( FILE		*fP )		/* Input file ptr */
    NFARG( BSTR		*bsP )		/* Ptr to byte string */
     FARG( TKNS		*tknsP )	/* Ptr to token pool */
{
	int		cC;
	MML_IN		*inP;

    inP = get_i_from_f( fP );
    cC = igettkbline( inP, bsP, tknsP );
    rel_i_from_f( inP, fP );

    return ( cC );
}

int
igettkbline ARGLIST( ( inP, bsP, tknsP ) )
    NFARG( MML_IN	*inP )		/* Input ptr */
    NFARG( BSTR		*bsP )		/* Ptr to byte string */
     FARG( TKNS		*tknsP )	/* Ptr to token pool */
{
	int		cC;		/* Characters */

    /* Input the line */
    cC = igetbline( inP, bsP );

    /* Use tkline() to tokenize it */
    if ( !tkline( bsP, tknsP ) )
	error( "tkline failure in igettkbline" );

    return( cC );
}
/*

*//* find_token_delimited( strP, strL, tknbP, vsepP, isepP, tflags)

	Find a token in a string via delimiters

Accepts :

	strP		The string to look for the token in.
	strL		The length of the string.
	tknbP		Token boundary passed/return info (see notes)
	vsepP		"visible" separators -- chars that will terminate
			 and will also be returned as single-character tokens;
	isepP		"invisible" separators -- chars that will terminate
			  a token and will be deleted from the source string.
	tflags		Any relevant token-related flags

Returns :

	<value>		0 : if no token was found,
			>0: the index where the next token search would start.
	*tknbP		updated


Notes :

	Relevant flags in tflags:

	  TKF_NOSKIPLEAD will suppress skipping any leading invisible
	  separator characters before looking for a token.  Note that any
	  single trailing invisible separator will always be skipped
	  regardless.  The effect of NOT skipping multiple leading invisible
	  separators is to allow for empty tokens between these single
	  separator characters.

	  TKF_NOQUOTSTR will disable looking for tokens that are quoted
	  strings (either single or double quotes).

*/

int
find_token_delimited ARGLIST( ( strP, strL, tknbP, vsepP, isepP, tflags ) )
    NFARG( UBYTE	*strP )		/* String to look in */
    NFARG( int		strL )		/* Length of string */
    NFARG( TKNB		*tknbP )	/* Token boundaries */
    NFARG( UBYTE	*vsepP )	/* Visible separators */
    NFARG( UBYTE	*isepP )	/* Invisible separators */
     FARG( UWORD	tflags )	/* Token flags */
{
	UBYTE		ch;		/* Character */
	UBYTE		quote;		/* Quoting character */
	int		bX;		/* Byte index */
	BOOL		emptyF;		/* Token is empty */

    /* Trap bad inputs and get start point */
    if ( ( strP == NULL ) ||
         ( tknbP == NULL ) ||
	 ( ( bX = tknbP->tb_startX ) < 0 ) ||
	 ( bX >= strL ) )
	return( FALSE );

    /* Preprocess the separator arrays by marking their values in our
       Bytemask array.  Note that they must be unset when we're done.
    */
    if ( mml_mutex_lock( &Bytemask_lock ) != 0 )
	warning( "find_token_delimited: can't get Bytemask_lock" );
    bytemask_str_set( vsepP, BYTE_VIS );
    bytemask_str_mark( isepP, BYTE_INVIS );

    emptyF = TRUE;			/* Token is empty so far */

    /* Skip past any leading "invisible" separators if we should */
    if ( ( tflags & TKF_NOSKIPLEAD ) == 0 ) {
	while ( bX < strL ) {
	    ch = strP[bX];
	    if ( ( Bytemask[ch] & BYTE_INVIS ) == 0 )
		break;
	     ++bX;
	}
    }

    /* Check for quoted character string */
    if ( ( ( tflags & TKF_NOQUOTSTR ) == 0 ) &&
           ( bX < strL ) &&
           ( ( ( ch = strP[bX] ) == '"' ) || ( ch == '\'' ) ) ) {
	quote = ch;
	++bX;
    }
    else
	quote = NUL;

    /* Loop to get token characters */
    for( tknbP->tb_startX = bX; ; ++bX ) {
	/* If we ran off the end, mark the end point. */
	if ( bX >= strL ) {
	    tknbP->tb_endX = strL;
	    break;
	}

	ch = strP[bX];

	/* Check for terminating quote */
	if ( quote != NUL ) {
	    if ( ch == quote ) {
		quote = NUL;
		tknbP->tb_endX = bX++;	/* Remember end, skip quote */
		break;
	    }
	}

	/* Check for delimiter */
	else if ( Bytemask[ ch ] != 0 ) {
	    /* If it's invisible, just skip it */
	    if ( ( Bytemask[ch] & BYTE_INVIS ) != 0 )
		tknbP->tb_endX = bX++;
	    else {
		/* If visible, take it if it's the only char. */
		if ( emptyF ) {
		    emptyF = FALSE;
		    tknbP->tb_endX = ++bX;
		}
		else
		    tknbP->tb_endX = bX;
	    }
	    break;
	}

	/* The token will have this character */
	emptyF = FALSE;
    }

    /* Undo the separator array marks for next time */

/* On some systems, this might be faster.  */
/*    memset( &Bytemask[0], 0, sizeof(Bytemask) );   */

    bytemask_str_set( vsepP, 0 );
    bytemask_str_set( isepP, 0 );

    if ( mml_mutex_unlock( &Bytemask_lock ) != 0 )
	warning( "find_token_delimited: can't release Bytemask_lock" );


    /* Return the next index if we either got a token or there is more
       remaining in the source string.  i.e., return 0 if there no more
       tokens..
    */
    if ( emptyF && ( bX >= strL ) )
	return ( 0 );
    else
	return ( bX );
}
/*

*//* gettoken( bsP, tkbsP, vsepP, isepP, sisepF )

	Extract a token from a string

Accepts :

	bsP		Source string.  We'll look for the next token
			 starting at the "current index" in this byte
			 string (i.e. bs_bX).
	tkbsP		Buffer for token.  New token characters will be
			 appended to any string already in this buffer.
	vsepP		"visible" separators -- chars that will terminate
			 and will also be returned as single-character tokens;
	isepP		"invisible" separators -- chars that will terminate
			  a token and will be deleted from the source string.
	sisepF		Whether to take isep chars singly (see below)

Returns :

	<value>		TRUE if a token was extracted,
			FALSE if no more tokens.
	*tkbsP		token appended


Notes :

	set "sisepF" if any single character in "isepP" will be taken
	as a separator character.  Otherwise (sisepF FALSE) any sequence
	of adjacent characters will be taken as a separator.  e.g. set
	to TRUE if you expect to have null tokens separated by single
	separator characters.

	More explicitly:  each time gettoken() is called, it skips past all
	leading "isepP" characters before looking for a token.  Before
	returning, any single trailing "isepP" character is skipped.
	Sending "sisepF" to TRUE will inhibit the pre-skipping of isepP
	characters, thus only the trailing separator will be skipped.

	Be careful when remembering returned strings when reusing the token
	buffer.  The token buffer can be expanded and realloced, changing
	any remembered the buffer location.  

*/

BOOL
gettoken ARGLIST( ( bsP, tkbsP, vsepP, isepP, sisepF ) )
    NFARG( BSTR		*bsP )	/* Source string */
    NFARG( BSTR		*tkbsP )	/* Where to put token */
    NFARG( UBYTE	*vsepP )	/* Visible separators */
    NFARG( UBYTE	*isepP )	/* Invisible separators */
     FARG( BOOL		sisepF )	/* "single invisible separator" flag */
{
	UBYTE		ch;		/* Character */
	UBYTE		quote;		/* Quoting character */
	BOOL		emptyF;		/* Token is empty */
	UBYTE		*bufP;		/* Ptr to input buffer */

    /* Trap NULL source string pointer */
    if ( ( bsP == NULL || ( (bufP = bsP->bs_bP) == NULL ) ) )
	return( FALSE );

    /* Preprocess the separator arrays by marking their values in our
       Bytemask array.  Note that they must be unset when we're done.
    */
    if ( mml_mutex_lock( &Bytemask_lock ) != 0 )
	warning( "gettoken: can't get Bytemask_lock" );
    bytemask_str_set( vsepP, BYTE_VIS );
    bytemask_str_mark( isepP, BYTE_INVIS );

    emptyF = TRUE;			/* Token is empty so far */

    /* Strip leading "invisible" separators if we should */
    if ( ! sisepF ) {
	while ( bsP->bs_bX < bsP->bs_bC ) {
	    ch = bufP[bsP->bs_bX];
	    if ( ( Bytemask[ch] & BYTE_INVIS ) == 0 )
		break;
	     ++bsP->bs_bX;
	}
    }

    /* Check for quoted character string */
    if ( ( bsP->bs_bX < bsP->bs_bC ) &&
         ( ( ( ch = bufP[bsP->bs_bX] )== '"' ) || ( ch == '\'' ) ) ) {
	quote = ch;
	++bsP->bs_bX;
	emptyF = FALSE;			/* Never empty, even if zero-length */
    }
    else
	quote = NUL;

    /* Loop to get token characters */
    for( ; bsP->bs_bX < bsP->bs_bC; ++bsP->bs_bX ) {
	ch = bufP[bsP->bs_bX];

	/* Check for escaped character */
	if ( ( ch == '\\' ) && ( bsP->bs_bX < (bsP->bs_bC - 1) ) )
	    ch = bufP[ ++bsP->bs_bX ];  /* Use the escaped char */

	/* Check for terminating quote */
	else if ( quote != NUL ) {
	    if ( ch == quote ) {
		quote = NUL;
		++bsP->bs_bX;
		break;
	    }
	}

	/* Check for delimiter */
	else if ( Bytemask[ ch ] != 0 ) {
	    /* If it's invisible, just skip it */
	    if ( ( Bytemask[ch] & BYTE_INVIS ) != 0 )
		++bsP->bs_bX;
	    else {
		/* If visible, take it if it's the only char. */
		if ( emptyF ) {
		    emptyF = FALSE;
		    ++bsP->bs_bX;
		    bstr_putb( tkbsP, ch );
		}
	    }
	    break;
	}

	/* Add the character to the token */
	bstr_putb( tkbsP, ch );
	emptyF = FALSE;
    }

    if ( !emptyF )
	bstr_nulterm( tkbsP );		/* NUL-terminate the token */


    /* Undo the separator array marks for next time */

/* On some systems, this might be faster.  */
/*    memset( &Bytemask[0], 0, sizeof(Bytemask) );   */

    bytemask_str_set( vsepP, 0 );
    bytemask_str_set( isepP, 0 );

    if ( mml_mutex_unlock( &Bytemask_lock ) != 0 )
	warning( "gettoken: can't release Bytemask_lock" );

    /* return TRUE if we either got a token or there is more remaining
       in the source string.  i.e., return FALSE if there no more
       tokens..
    */
    return ( ! ( emptyF && ( bsP->bs_bX >= bsP->bs_bC ) ) );
}
/*

*//* <x>getbtrec( <input>, bsP, rctl, tiP )

	Input a "record" of text from an input source.
	<x> is 'f' for FILE, 'i' for MML_IN

Accepts :

	<input>		Ptr to input handle, per <x>
	bsP		Ptr to byte string object for the data
	rctl		Control flagword for reading the record
	tiP		Extended text record info
	
Returns :

	<value>		Number of characters read (-1 if EOF)

Notes :

	Calls <x>getbtreca (so see that), but first resets the output
	buffer so that the new line overwrites the buffer rather than
	being appended to any existing info in the buffer.

*/

int
fgetbtrec ARGLIST( ( fP, bsP, rctl, tiP ) )
   NFARG( FILE		*fP )		/* File ptr */
   NFARG( BSTR		*bsP )		/* Byte string ptr */
   NFARG( UWORD		rctl )		/* Control flags for text record */
    FARG( TXRINFO	*tiP )		/* Extended text record info */
{
    /* Just clear the buffer and hand it off to fgetbtreca() */
    bsP->bs_bC = 0;
    return( fgetbtreca( fP, bsP, rctl, tiP ) );
}

int
igetbtrec ARGLIST( ( inP, bsP, rctl, tiP ) )
   NFARG( MML_IN	*inP )		/* Input ptr */
   NFARG( BSTR		*bsP )		/* Byte string ptr */
   NFARG( UWORD		rctl )		/* Control flags for text record */
    FARG( TXRINFO	*tiP )		/* Extended text record info */
{
    /* Just clear the buffer and hand it off to igetbtreca() */
    bsP->bs_bC = 0;
    return( igetbtreca( inP, bsP, rctl, tiP ) );
}
/*

*//* <x>getbtreca( <input>, bsP, rctl, tiP )

	Input a "record" of text from an input source, appending to buffer.
	<x> is 'f' for FILE, 'i' for MML_IN

Accepts :

	<input>		Ptr to input handle, per <x>
	bsP		Ptr to byte string object for the data
	rctl		Control flagword for reading the record
	tiP		Extended text record info
	
Returns :

	<value>		Number of characters read (-1 if EOF)

Notes :

	Bytes are stored into the byte string object, which may
	indicate an overflow condition if it fills up.  However,
	a trailing NUL character will always be stored into the
	byte string even if there is an overflow (remember there's
	always room for a termination byte).

	Input bytes past an overflow condition will be discarded
	unless TXR_END_FULL is set as a termination option.

	The notion of an input record is controlled by the flagwords
	passed.  (See the .h file for bit definitions.)

	Note that this routine is not extremely sophisticated: it just
	has a few ways of recognizing a line termination.  We also
	accept a pointer to an extended definition object, but that
	object currently has no implementation.  It's just a placeholder
	for any future use.

*/

int
fgetbtreca ARGLIST( ( fP, bsP, rctl, tiP ) )
   NFARG( FILE		*fP )		/* File ptr */
   NFARG( BSTR		*bsP )		/* Byte string ptr */
   NFARG( UWORD		rctl )		/* Control flags for text record */
    FARG( TXRINFO	*tiP )		/* Extended text record info */
{
	int		cC;
	MML_IN		*inP;

    inP = get_i_from_f( fP );
    cC = igetbtreca( inP, bsP, rctl, tiP );
    rel_i_from_f( inP, fP );

    return ( cC );
}

int
igetbtreca ARGLIST( ( inP, bsP, rctl, tiP ) )
   NFARG( MML_IN	*inP )		/* Input ptr */
   NFARG( BSTR		*bsP )		/* Byte string ptr */
   NFARG( UWORD		rctl )		/* Control flags for text record */
    FARG( TXRINFO	*tiP )		/* Extended text record info */
{
	int		ch;		/* Input character */
	int		cC;		/* Character count */

    /* Loop inputting characters. */
    for ( cC = 0; ( ch = in_char( inP ) ) != EOF; ++cC ) {
	/* Process depending on the char */
	if ( ch == '\r' ) {
	    /* Got a carriage return (CR).
	       If termination can be CRLF, we have to peek ahead
	       to check for the LF.
	    */
	    if ( ( rctl & TXR_END_CRLF ) != 0 ) {
		ch = in_char( inP );
		if ( ch == '\n' )
		    break;
		/* Not LF-- put it back */
		if ( ch != EOF )
		    in_charback( inP, ch );
		ch = '\r';
	    }

	    /* if naked CR is OK, be done. */
	    if ( ( rctl & TXR_END_CR ) != 0 )
		break;
	}

	else if ( ch == '\n' ) {
	    /* If naked LF is OK, we're done */
	    if ( ( rctl & TXR_END_LF ) != 0 )
		break;
	}

	/* If we're here, this is not the end of the record.
	   Store the character in the BSTR and continue on.
	*/
	bstr_putb( bsP, ch );

	/* If the buffer's full and that's an end condition, be done */
	if ( ( bsP->bs_bC == bsP->bs_bM ) &&
	     ( ( rctl & TXR_END_FULL ) != 0 ) )
	    break;
    }

    bstr_nulterm( bsP );		/* Make sure it's NUL-terminated */
    
    if ( ( cC == 0 ) && ( ch == EOF ) )
	cC = -1;

    return( cC );
}
/*

*//* <x>getbtrecc( <input>, bsP, rctl, tiP )

	Input a "record" of text from an input source plus any continuation records
	<x> is 'f' for FILE, 'i' for MML_IN

Accepts :

	<input>		Ptr to input handle, per <x>
	bsP		Ptr to byte string object for the data
	rctl		Control flagword for reading the record
	tiP		Extended text record info
	
Returns :

	<value>		Number of characters read (-1 if EOF)

Notes :

	Inputs 'records' until one does not end with a continuation
	character (defined as backslash).

	Uses <x>getbrec and <x>getbreca-- see those.


*/

int
fgetbtrecc ARGLIST( ( fP, bsP, rctl, tiP ) )
   NFARG( FILE		*fP )		/* File ptr */
   NFARG( BSTR		*bsP )		/* Byte string ptr */
   NFARG( UWORD		rctl )		/* Control flags for text record */
    FARG( TXRINFO	*tiP )		/* Extended text record info */
{
	int		cC;
	MML_IN		*inP;

    inP = get_i_from_f( fP );
    cC = igetbtrecc( inP, bsP, rctl, tiP );
    rel_i_from_f( inP, fP );

    return ( cC );
}

int
igetbtrecc ARGLIST( ( inP, bsP, rctl, tiP ) )
   NFARG( MML_IN	*inP )		/* Input ptr */
   NFARG( BSTR		*bsP )		/* Byte string ptr */
   NFARG( UWORD		rctl )		/* Control flags for text record */
    FARG( TXRINFO	*tiP )		/* Extended text record info */
{
	int		cC;
	int		totalcC;

    /* Get the first line */
    totalcC = igetbtrec( inP, bsP, rctl, tiP );
    if ( totalcC < 0 )
	return ( totalcC );

    /* Loop until we get a line that doesn't end with '\\' */
    for ( ; ; ) {
	if ( ( bsP->bs_bC <= 0 ) ||
	     ( bsP->bs_bP[ bsP->bs_bC -1 ] != '\\' ) )
	    break;

	--bsP->bs_bC;
	--totalcC;

	cC = igetbtreca( inP, bsP, rctl, tiP );
	if ( cC < 0 )
	    break;

	totalcC += cC;
    }

    return ( totalcC );
}
/*

*//* tkline( bsP, tknsP )

	Tokenize a line of text

Accepts :

	bsP		Ptr to byte string object containing string
	tknsP		Ptr to token pool object
	
Returns :

	<value>		TRUE if OK
			FALSE if some fatal error

	*tknsP		<filled in with token info>

Notes :

    Starts from the beginning of a line and tokenizes it.

    Also see tkline_more()

*/

BOOL
tkline ARGLIST( ( bsP, tknsP ) )
    NFARG( BSTR		*bsP )		/* Ptr to byte string */
     FARG( TKNS		*tknsP )	/* Ptr to token pool */
{
    tknsP->tp_tC = 0;			/* Reset the tokens pool */
    bsP->bs_bX = 0;			/* Reset the fetch point */
    tknsP->tp_bsP->bs_bC = 0;		/* Reset the store point */

    /* Just hand off to tkline_more */
    return( tkline_more( bsP, tknsP ) );
}
/*

*//* tkline_more( bsP, tknsP )

	Tokenize more of a line of text

Accepts :

	bsP		Ptr to byte string object containing string
	tknsP		Ptr to token pool object
	
Returns :

	<value>		TRUE if OK
			FALSE if some fatal error

	*tknsP		<filled in with token info>

Notes :

    Tokenizes from where a previous operation may have left off.  e.g.
    a caller might set "token max" to 1 just to get the first token,
    and then want to tokenize the rest of the line after that.

    Tokens are stored into a byte buffer which may change (via reallocations
    and all).  That's one reason that the tokens are described via indexes
    and not via pointers.

*/

BOOL
tkline_more ARGLIST( ( bsP, tknsP ) )
    NFARG( BSTR		*bsP )		/* Ptr to byte string */
     FARG( TKNS		*tknsP )	/* Ptr to token pool */
{
	int		sX;		/* Index of start of token */
	BSTR		*tkbsP;		/* Token byte buffer */

    /* Loop getting tokens */
    tkbsP = tknsP->tp_bsP;

    for ( ; tknsP->tp_tC < tknsP->tp_max; ++tknsP->tp_tC ) {
	/* Remember index of where next token will be stored */
	sX = tkbsP->bs_bC;

	/* Get the next token */
	if ( !gettoken( bsP, tkbsP,
		         tknsP->tp_vsepP, tknsP->tp_isepP,
			 tknsP->tp_sisepF ) )
	    break;

	/* Got a token-- register it */
	if ( !tkns_reg( tknsP, sX, tkbsP->bs_bC ) )
	    return ( FALSE );

	/* Insert a NUL separator (note: don't use bstr_nulterm as
	   that doesn't advance past the NUL byte
	*/
#if 1
	bstr_putb( tkbsP, NUL );
#else
	bstr_nulterm( tkbsP );
#endif
    }

    return ( TRUE );
}
/*

*//* tkns_new( bsP, max, vsepP, isepP, sisepF )

	Creates a new "tokens" object

Accepts :

	bsP		Byte string to use for token storage

	max		Initial token max value

	vsepP		Pointer to visible separator list initially
			 associated with this token pool (see gettoken)

	isepP		Pointer to invisible separator list initially
			 associated with this token pool (see gettoken)

	sisepF		Value of "single isep" flag initially associated
			 with this token pool (see gettoken)

Returns :

	<value>		Ptr to new token pool object
			FALSE if failure.

Notes :


*/

TKNS *
tkns_new ARGLIST( (bsP, max, vsepP, isepP, sisepF ) )
   NFARG( BSTR		*bsP )		/* Byte string for storage */
   NFARG( int		max )		/* Initial "max tokens */
   NFARG( UBYTE		*vsepP )	/* Visible separators */
   NFARG( UBYTE		*isepP )	/* Invisible separators */
    FARG( BOOL		sisepF )	/* "single invisible separator" flag */
{
	TKNS		*tknsP;		/* Tokens object */

    /* Allocate new pool object */
    tknsP = (TKNS *)emalloc( "tkns_new", "new pool object", sizeof( TKNS ) );
    if ( tknsP == NULL )
	return ( NULL );

    /* Allocate the initial bounds array */
    tknsP->tp_tL = 20;			/* A generous initial array size */
    if ( ( max > 0 ) && ( tknsP->tp_tL > max ) )
	tknsP->tp_tL = max;		/* Back off, might not use that much */

    tknsP->tp_tbP = (TKNB *)emalloc( "tkns_new", "initial bounds array",
				    tknsP->tp_tL * sizeof(TKNB) );
    if ( tknsP->tp_tbP == NULL ) {
	/* Failed-- might as well free up the new pool object */
	dealloc( tknsP );
	return ( NULL );
    }


    /* Set up the initial values */
    tknsP->tp_tC = 0;			/* Current number of tokens */
    tknsP->tp_max = ( max > 0 ? max : 0 );  /* non-negative max */
    tknsP->tp_vsepP = vsepP;
    tknsP->tp_isepP = isepP;
    tknsP->tp_sisepF = sisepF;
    tknsP->tp_bsP = bsP;

    return ( tknsP );
}
/*

*//* tkns_reg( tknsP, startX, endX )

	Register a new token into the token pool

Accepts :

	tknsP		Ptr to token pool object
	startX		Buffer offset to beginning of token
	endX		Buffer offset to byte following token

Returns :

	<value>		TRUE if OK
			FALSE if failure.

Notes :

	The token has already been placed into the token buffer at this
	point, probably with a terminating NUL.  This routine adds the
	new information to the token bounds array in the token pool.

*/

BOOL
tkns_reg ARGLIST( (tknsP, startX, endX) )
   NFARG( TKNS		*tknsP )	/* Ptr to token pool */
   NFARG( int		startX )	/* Where the token starts */
    FARG( int		endX )		/* Offset of byte after end of token */
{
    if ( tknsP->tp_tC == tknsP->tp_tL ) {
	/* Need to grow the bounds array */
	tknsP->tp_tL += 20;		/* Grow it by this much */
	tknsP->tp_tbP = (TKNB *)erealloc( "tkline_more", "bounds array",
				     tknsP->tp_tbP,
				     tknsP->tp_tL * sizeof( TKNB ) );
	if ( tknsP->tp_tbP == NULL )
	    return ( FALSE );
    }

    tknsP->tp_tbP[ tknsP->tp_tC ].tb_startX = startX;
    tknsP->tp_tbP[ tknsP->tp_tC ].tb_endX = endX;

    return ( TRUE );
}


/***********************************************************************
 *                                                                     *
 *                     Internal support routines                       *
 *                                                                     *
 ***********************************************************************/

/*

*//* bytemask_str_mark( strP, value )

	Mark bytemask entries corresponding to bytes in a string

Accepts :

	strP		NUL-terminated string containing the bytes to set
	value		The value to add to corresponding byte values

Returns :

	<nothing>

Notes :

	Marks the Bytemask array with 'value' for the bytes for
	  every byte in strP.  "Marking" means that the new value
	  is ORed in with the old value.

	Note that the Bytemask array is static and must be protected by the
	  caller as a critical resource if pthread support is enabled.

*/

void
bytemask_str_mark ARGLIST( ( strP, value ) )
    NFARG( UBYTE	*strP )		/* List of byte values to mark */
     FARG( UBYTE	value )		/* The value to add */
{
	UBYTE		ch;

    if ( strP != NULL )
	while ( ( ch = *strP++ ) != NUL )
	    Bytemask[ch] |= value;
}
/*

*//* bytemask_str_set( strP, value )

	Set bytemask entries corresponding to bytes in a string

Accepts :

	strP		NUL-terminated string containing the bytes to set
	value		The value to set for corresponding byte values

Returns :

	<nothing>

Notes :

	Set the Bytemask array to 'value' for the bytes for
	  every byte in strP.

	Note that the Bytemask array is static and must be protected by the
	  caller as a critical resource if pthread support is enabled.

*/

void
bytemask_str_set ARGLIST( ( strP, value ) )
    NFARG( UBYTE	*strP )		/* List of byte values to mark */
     FARG( UBYTE	value )		/* The value to set */
{
	UBYTE		ch;

    if ( strP != NULL )
	while ( ( ch = *strP++ ) != NUL )
	    Bytemask[ch] = value;
}
/*

*//* get_i_from_f( fP )

	Construct a quick&dirty temporary MML_IN handle from a FILE pointer

Accepts :

	fP		FILE handle

Returns :

	<value>		Pointer to a temporary MML_IN handle.

Notes :

	Uses a static MML_IN handle that is built with insider knowledge
	of how the handle should be made.   I suppose this is dangerous
	because it has to know about the MML_IN internals.  This
	warning is noted in mml-in.h .

*/

static MML_IN *
get_i_from_f ARGLIST( (fP) )
    FARG( FILE		*fP )		/* File ptr */
{
    /* Since this is static, caller must have exclusive access. */
    if ( mml_mutex_lock( &I_from_f_lock ) != 0 )
	warning( "get_i_from_f: can't get I_from_f_lock" );

    /* Set up dynamic parts of the stream handle */
    Priv_mml_ins_f.is_pbc = -1;		/* There is no pushed-back char */
    Priv_mml_ins_f.is_handleP = fP;	/* The file stream */
    Priv_mml_ins_f.is_lineN = 0;	/* Line number */

    /* And the dynamic parts of the input handle */
    Priv_mml_in_f.in_lescF = FALSE;	/* Last char was not escaped */
    Priv_mml_in_f.in_eolF = FALSE;	/* Not end of line */
    Priv_mml_in_f.in_bolF = TRUE;	/* Probably beginning of line */
    Priv_mml_in_f.in_streamP = &Priv_mml_ins_f;  /* Input stream */
    Priv_mml_in_f.in_level = 1;		/* Stream level */
    Priv_mml_in_f.in_clineN = 1;	/* Line number */
    Priv_mml_in_f.in_result = IOR_NONE;	/* Result */
    Priv_mml_in_f.in_errno = 0;		/* errno */

    return ( &Priv_mml_in_f );
}
/*

*//* rel_i_from_f( inP, fP )

	Release MML_IN handle previously accessed via get_i_from_f

Accepts :

	inP		The input handle
	fP		FILE handle

Returns :

	<nothing>

Notes :

	See other warnings about insider knowledge of mml-in structures

*/

static void
rel_i_from_f ARGLIST( (inP, fP) )
   NFARG( MML_IN	*inP )		/* Input handle */
    FARG( FILE		*fP )		/* File ptr */
{
	MML_INS		*streamP;

    /* If there was a pushed-back character we must push it back
       to the file stream as well.
    */
    if ( ( ( streamP = inP->in_streamP ) != NULL ) &&
         ( streamP->is_pbc >= 0 ) )
	ungetc( streamP->is_pbc, fP );

    /* Release exclusive access */
    if ( mml_mutex_unlock( &I_from_f_lock ) != 0 )
	warning( "rel_i_from_f: can't release I_from_f_lock" );
}
