/* mml-str.h -- definitions for the mml "str" module

	Copyright 2003,2004,2005,2006,2007
		by Mark E. Mallett, MV Communications, Inc.

	See the "LICENSE" file for terms.

Stuff strictly relating to character strings, including storage,
  copying, comparing, and even some other things that you might
  think should go in the "iopf" module.

*/


#ifndef	H_MML_STR		/* For multiple inclusions */
#define	H_MML_STR

#include <sys/types.h>
#include <stdarg.h>
#include <regex.h>


    /* Number of [sub]matches to save */
#define	MATCHKEEP	10

#define	BSTR_MAX	10000	/* The default maximum byte string length
				   (used if "0" is given)
				*/

#define	BSTR_CHUNK	20	/* Byte string chunk size-- buffer is
				   expanded by this number of bytes at a time
				*/

#define	BSTR_SLOP	2	/* Amount of "slop" space added to the
				   byte string buffer.  See the comments
				   in the BSTR structure.
				*/

   /* String match flagbits */
#define	MF_IGNCASE	0x0001	/* Ignore case */
#define	MF_SUBSTR	0x0002	/* Match substring */
#define	MF_GREEDY	0x0004	/* Left-greedy wildcard matching */


   /* String quoting flagbits (flags and styles) */
#define	QF_REGEX	0x0001	/* Needs quoting for regex */
#define	QF_WILD		0x0002	/* Needs quoting for wildcard */
#define	QF_DQUOTE	0x0004	/* Needs quoting for doublequote */

#define	QS_QOCTAL	0x8000	/* Style: backslash followed by 3-digit octal */



    /* Get the BSTR ptr from a REFSTR ptr */
#define	refstr_bstr( rsP ) (&((rsP)->rs_baseP->bbs_bstr))

    /* Various ESTR access operations.

       NOTE: Do not invoke these with any argument that will
       produce a side effect, since the argument may be accessed
       more than once.  In particular, don't pass a function
       that returns an ESTR, as that function may be called
       multiple times.
    */

    /* Given an ESTR ptr and an offset, get the byte (or EOF if end) */
#define	estr_bytex(esP, esX) ( \
    (esP)->es_len == -1 ? \
        ( ((esP)->es_strP[(esX)] == NUL) ? EOF : \
	    ((esP)->es_strP[esX]) ) : \
	( ((esX) >= (esP)->es_len) ? EOF : \
	    ((esP)->es_strP[esX]) ) )


    /* Return the length of an ESTR */
#define	estr_len( esP ) ( \
    (esP)->es_len >= 0 ? (esP)->es_len : strlen( (BYTE *)(esP)->es_strP ) )

    /* Given an ESTR ptr and an offset, get the next byte (or EOF if end)
       The esX argument must be a simple lvalue (i.e. a variable)
    */
#define	estr_nextx(esP, esX) ( \
    (esP)->es_len == -1 ? \
        ( ((esP)->es_strP[(esX)] == NUL) ? EOF : \
	    ((esP)->es_strP[(esX)++]) ) : \
	( ((esX) >= (esP)->es_len) ? EOF : \
	    ((esP)->es_strP[(esX)++]) ) )


    /* Encapsulated string types */
typedef enum {
	ES_NUL,			/* Your everyday nul-terminated string */
	ES_COUNT,		/* Counted string */
	ES_BSTR,		/* A BSTR object */
	ES_REFSTR,		/* A REFSTR object */
}  ESTRTYPE;

    /* String match types */
typedef enum {
	SM_NONE,		/* Not a valid match type */
	SM_STRCMP,		/* Standard strcmp */
	SM_MATCH,		/* Exact match */
	SM_WILD,		/* Wildcard match */
	SM_REGEX,		/* Regex */
}  STRMATTYPE;


    /* An ecapsulated string descriptor.  The mml-str module has
       routines to deal both with specific string storage types
       (such as BSTR and REFSTR), and generic routines that operate
       on any one of those types.  For the latter, an encapsulation
       of the type and the string information is used as a handle.
       There are functions to quickly construct those encapsulations
       out of each underlying type, as well as a corresponding
       function to release an encapsulation.

       Not all structure members may be used by each type.

    */
typedef struct estr {
    ESTRTYPE	es_type;	/* String type */
    void	*es_objP;	/* Pointer to the underlying object */
    UBYTE	*es_strP;	/* Ptr to the string, if needed by the type */
    int		es_len;		/* String length, or -1 for NUL-terminated */
    int		es_strX;	/* Next byte index */

	/* The following are reserved for application use and are not
	   used by mml-str functions, other than to initialize them
	   when an ESTR is allocated.
	*/
    UWORD	es_xflags;	/* Flagword reserved for caller use */
    void	*es_xP;		/* Generic pointer reserved for caller use */
}  ESTR;


    /* A byte string "object."  This is used to contain a dynamically
       expandable byte string, up to a maximum specified by a max
       storage value.  This object may seem overly complex but then
       again it's not necessarily intended to be used for every
       little string- merely for flexible expandable buffering.
    */
       
typedef struct {		/* A character string object */
    UBYTE	*bs_bP;		/* buffer */
    int		bs_bC;		/* # of bytes stored in the string */
    int		bs_bX;		/* Current index into buffer */
    int		bs_bL;		/* Length of the buffer, minus 2.
				   There is always a "slop" of 2 bytes,
				     thus the allocated length is 2 greater
				     that this.  The slop is because
				     we can store one more than the max,
				     indicating an overflow,
				   and we always allow for an extra byte
				     in case somebody wants to add a
				     termination byte (e.g. NUL) */
    int		bs_bM;		/* Maximum storage length (can actually
				    store one greater than this, indicating
				    an overflow.
				*/
    int		bs_chunk;	/* How much to grow by when we expand */
}  BSTR;



    /* The base structure for REFSTRs (see below).  This base
       structure created and used indirectly by REFSTR instances,
       and is not directly used by callers.
    */
typedef struct {		/* Base for REFSTR references */
    BSTR	bbs_bstr;	/* The base BSTR */
    LL_HEAD	bbs_ll;		/* Linked list header */
}  REFSTR_BASE;


    /* REFSTR: a view (aka reference) of a string.  Multiple related
       REFSTRs share the same BSTR, however they might represent
       different portions of the string.  When a portion of a string
       is updated (moved, grown, shrunk) all sibling REFSTRs are
       udpated to reflect that change.  This allows for substring
       handles that dynamically follow their data where possible.
    */

typedef struct {		/* The refstr */
    LL_NODE	*rs_llP;	/* Ptr to the linked list node */
    REFSTR_BASE	*rs_baseP;	/* Ptr to the base object */
    int		rs_refC;	/* Number of references */
    int		rs_bX;		/* Current index into buffer;
				   -1 means uninitialized.
				*/
    int		rs_startX;	/* Offset to start of reference,
				   -1 == beginning, which is
				    different from 0 (a substring starting
				    at 0 can move if something is inserted
				    prior to it).
				*/
    int		rs_endX;	/* Offset to the end of the reference.
				   -1 == end, which is also different
				     than any known end value that might
				     automatically be changed.
				*/
}  REFSTR;


/* Declarations for functions contained in this module: */

    /* Concatenate to a bstr from a nul-terminated character string */
BOOL	bstr_cat PROTO( (BSTR *bsP, char *strP ) );

    /* Concatenate to a bstr from a bstr */
BOOL	bstr_catb PROTO( (BSTR *bsP, BSTR *inbsP, int startX) );

    /* Concatenate to a bstr from an ESTR */
BOOL	bstr_cate PROTO( (BSTR *bsP, ESTR *esP, int startX) );

    /* Concatenate to a bstr from a counted byte string */
BOOL	bstr_catn PROTO( (BSTR *bsP, UBYTE *strP, int cC ) );

    /* Duplicate a bstr */
BSTR	*bstr_dup PROTO( (BSTR *bsP, int extra) );

    /* Delete a bstr */
BOOL	bstr_free PROTO( ( BSTR *bsP) );

    /* Fetch the next byte from a BSTR fetch point */
int	bstr_getb PROTO( (BSTR *bsP) );

    /* Grow a buffer by a new chunk */
BOOL	bstr_grow PROTO( (BSTR *bsP, int chunksize) );

    /* Allocate a new byte string object */
BSTR	*bstr_new PROTO( (char *noteP, int size, int max) );

    /* NUL-terminate a byte string */
void	bstr_nulterm PROTO( (BSTR *bsP) );

    /* Perform printf into a bstr */
BOOL	bstr_printf PROTO( (BSTR *bsP, char *fmtP, ...) );

    /* Add a byte to a string */
    /* (might be defined as a macro) */
#ifndef bstr_putb
BOOL	bstr_putb PROTO( (BSTR *bsP, UBYTE bval) );
#endif

    /* Copy a nul-terminated string to a bstr, quoting special chars */
BOOL	bstr_quote_str PROTO( (BSTR *bsP, UBYTE *strP, UWORD qmask) );

    /* Copy a counted string to a bstr, quoting special chars */
BOOL	bstr_quote_str_n PROTO( (BSTR *bsP, UBYTE *strP, int len,
				 UWORD qmask) );

    /* Replace part of a BSTR content with a nul-terminated string */
BOOL	bstr_replace PROTO( (BSTR *bsP, int startX, int endX,
			     UBYTE *newP) );

    /* Replace part of a BSTR content with a counted string */
BOOL	bstr_replacen PROTO( (BSTR *bsP, int startX, int endX,
			     UBYTE *newP, int newlen) );

    /* Add a terminating byte to a string */
void	bstr_term PROTO( (BSTR *bsP, UBYTE bval) );

    /* Perform printf into a bstr using a varargs handle */
BOOL	bstr_vprintf PROTO( (BSTR *bsP, char *fmtP, va_list ap) );

    /* Create an ESTR from a BSTR */
ESTR	*estr_bstr PROTO( (BSTR *bstrP) );

    /* ESTR: compare */
int	estr_cmp PROTO( (ESTR *es1P, ESTR *es2P) );

    /* ESTR: compare with counted str */
int	estr_cmp_nstr PROTO( (ESTR *es1P, UBYTE *strP, int len) );

    /* ESTR: compare with str */
int	estr_cmp_str PROTO( (ESTR *es1P, UBYTE *strP) );

    /* Create an ESTR from an ESTR */
ESTR	*estr_estr PROTO( (ESTR *estrP) );

    /* Release an ESTR */
void	estr_finish PROTO( (ESTR *esP) );

    /* ESTR: case-insensitive compare */
int	estr_icmp PROTO( (ESTR *es1P, ESTR *es2P) );

    /* ESTR: case-insensitive compare with counted string */
int	estr_icmp_nstr PROTO( (ESTR *es1P, UBYTE *strP, int len) );

    /* ESTR: case-insensitive compare with str */
int	estr_icmp_str PROTO( (ESTR *es1P, UBYTE *strP) );

    /* Create an empty ESTR (string encapsulation) */
ESTR	*estr_init PROTO( (ESTRTYPE estype) );

    /* ESTR: general string match supporting various match types */
int	estr_match PROTO( (ESTR *patP, ESTR *esP, STRMATTYPE mtype,
			   UWORD mflags) );

    /* ESTR: match pattern: exact pattern */
BOOL	estr_match_exact PROTO( (void *cpatP,
				 ESTR *patP, ESTR *esP,
				 UWORD mflags) );

    /* ESTR: match pattern: regex pattern */
BOOL	estr_match_regex PROTO( (void *cpatP,
				 ESTR *patP, ESTR *esP,
				 UWORD mflags) );

    /* ESTR: match pattern: wildcard pattern */
BOOL	estr_match_wild PROTO( (void *cpatP,
				ESTR *patP, ESTR *esP,
				UWORD mflags) );

    /* Create an ESTR from a counted string */
ESTR	*estr_nstr PROTO( (UBYTE *strP, int len) );

    /* Create an ESTR from a REFSTR */
ESTR	*estr_refstr PROTO( (REFSTR *rsP) );

    /* ESTR: skip over leading blanks and tabs */
void	estr_solbat PROTO( (ESTR *esP, BOOL eolF) );

    /* Create an ESTR from a null-terminated string */
ESTR	*estr_str PROTO( (UBYTE *strP) );

    /* Yield a temporary ESTR from a BSTR */
ESTR	*estr_tmp_bstr PROTO( (BSTR *bsP) );

    /* Yield a temporary ESTR from source ESTR, making a new string */
ESTR	*estr_tmp_estr_new PROTO( (ESTR *estrP) );

    /* Yield a temporary ESTR from source ESTR, making a new string */
ESTR	*estr_tmp_estr_tmpstr PROTO( (ESTR *estrP) );

    /* Yield a temporary ESTR from a counted string */
ESTR	*estr_tmp_nstr PROTO( (UBYTE *strP, int len) );

    /* Yield a temporary ESTR from a counted string, making a new string */
ESTR	*estr_tmp_nstr_new PROTO( (UBYTE *strP, int len) );

    /* Yield a temporary ESTR from a refstr */
ESTR	*estr_tmp_refstr PROTO( (REFSTR *rsP) );

    /* Yield a temporary ESTR from a null-terminated string */
ESTR	*estr_tmp_str PROTO( (UBYTE *strP) );

    /* Interpret ESTR as floating point (a la atof()) */
double	estr_to_f PROTO( (ESTR *esP) );

    /* Interpret ESTR as long int (a la atol()) */
long	estr_to_l PROTO( (ESTR *esP) );

    /* Extract a whitespace-delimited token from ESTR */
int	estr_token_ws PROTO( (ESTR *esP, UBYTE *bufP, int bufL, BOOL eolF) );

    /* see if a string is all blank */
BOOL	isblankstr PROTO( ( char *strP ) );

    /* Create a new nul-terminated string from an ESTR */
char	*newstr_estr PROTO( (char *facP, char *nameP, ESTR *esP) );

    /* Access a REFSTR */
REFSTR	*refstr_access PROTO( (REFSTR *rsP) );

    /* Translate refstr end to bstr offset */
int	refstr_endx PROTO( (REFSTR *rsP) );

    /* Release access to a refstr */
BOOL	refstr_free PROTO( (REFSTR *rsP) );

    /* Fetch the next byte from a REFSTR fetch point */
int	refstr_getb PROTO( (REFSTR *rsP) );

    /* The length of a string mapped by a refstr */
int	refstr_len PROTO( (REFSTR *rsP) );

    /* Move a string reference */
BOOL	refstr_move PROTO( (REFSTR *rsP, int n) );

    /* Allocate a new, lone refstr */
REFSTR	*refstr_new PROTO( (char *noteP, int size, int max) );

    /* Create a REFSTR, copying from an existing BSTR */
REFSTR	*refstr_new_bstr_copy PROTO( (char *noteP, BSTR *bsP) );

    /* Create a REFSTR, stealing from an existing BSTR */
REFSTR	*refstr_new_bstr_steal PROTO( (char *noteP, BSTR *bsP) );

    /* Allocate a reference refstr */
REFSTR	*refstr_ref PROTO( (REFSTR *rrsP, int startX, int endX) );

    /* Replace part of a REFSTR content from a nul-terminated string */
BOOL	refstr_replace PROTO( (REFSTR *rsP, int startX, int endX,
			     UBYTE *newP) );

    /* Replace part of a REFSTR content with ESTR string */
BOOL	refstr_replacee PROTO( (REFSTR *rsP, int startX, int endX,
			     ESTR *newP) );

    /* Replace part of a REFSTR content with a counted string */
BOOL	refstr_replacen PROTO( (REFSTR *rsP, int startX, int endX,
			     UBYTE *newP, int newlen) );

    /* Translate refstr start to bstr offset */
int	refstr_startx PROTO( (REFSTR *rsP) );


    /* Skip over leading blanks and tabs (i.e. whitespace) */
char	*solbat PROTO( (char *strP, BOOL eolF) );

    /* Find byte in string, return index */
int	strbyte PROTO( (UBYTE *strP, UBYTE bval) );

    /* Find byte in counted string, return index */
int	strbyte_n PROTO( (UBYTE *strP, int strL, UBYTE bval) );

#ifndef HAVE_STRICMP	/* See above */
    /* Case-insensitive strcmp() */
int	stricmp PROTO( ( char *s1P, char *s2P ) );
#endif	/* HAVE_STRICMP */

    /* Convert a string to lowercase, in place */
void	strlc PROTO( (char *strP) );

    /* General string comparison supporting various match types */
int	strmatch PROTO( (UBYTE *patP, UBYTE *strP, STRMATTYPE mtype,
			 UWORD mflags) );

    /* Match pattern: exact pattern */
BOOL	strmatch_exact PROTO( (void *cpatP,
			       UBYTE *patP, UBYTE *strP,
			       UWORD mflags) );

    /* Match pattern: regex pattern */
BOOL	strmatch_regex PROTO( (void *cpatP,
			       UBYTE *patP, UBYTE *strP,
			       UWORD mflags) );

    /* Match pattern: wildcard pattern */
BOOL	strmatch_wild PROTO( (void *cpatP,
			      UBYTE *patP, UBYTE *strP,
			      UWORD mflags) );

    /* Yield nth matched subpart */
UBYTE	*strmatched_part PROTO( (int partN, int *lenP) );


#ifndef HAVE_STRICMP	/* See above */
    /* Case-insensitive strncmp() */
int	strnicmp PROTO( ( char *s1P, char *s2P, int max ) );
#endif	/* HAVE_STRICMP */


#ifndef	HAVE_STRSTR		/* See above */

    /* Find one string in another */
char	*strstr PROTO( ( char *s1P, char *s2P ) );

#endif	/* HAVE_STRSTR */


    /* Extract a whitespace-delimited token from NUL-terminated byte string */
int	strtoken_ws PROTO( (UBYTE **strPP, UBYTE *bufP, int bufL, BOOL eolF) );

    /* Get a temporary string buffer */
UBYTE	*tmpstr PROTO( (int len) );

    /* Make a nul-terminated tmpstr from an ESTR, with limits */
UBYTE	*tmpstr_estr PROTO( (ESTR *esP) );



#endif	/* H_MML_STR */
