/*
 *  HP 9000 Series 800 Linker, Copyright Hewlett-Packard Co. 1985-1999  
 *  Locale/Read Routines
 *
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <stdio.h>
#include <ctype.h>
#include <setjmp.h>
#include <errno.h>
#include <stdlib.h>
#include <locale.h>
#include <limits.h>
#include <wchar.h>

#include "errors.h"
#include "std.h"
#include "util.h"

/* this is probably not right --pschwan@thepuffingroup.com */
#define getwc getc

#define WBACKSLASH	L'\\'
#define WCOMMENT_CHAR	L'#'

# define IS_MBSPACE(wc)  (iswspace (wc))

#if 0
# define IS_MBSPACE(wc)  (iswspace (wc) || wc == L',' || wc == L';')
#endif /* 0 */

/* mb_fgetword is the multi-byte equivalent of fgetword. */

int mb_fgetword(f, buf, end_condition)
FILE *f;
char *buf;
int end_condition;   /* EOF, or '\n' */
{
    int 		new_word = TRUE;
    wchar_t		wc;
    int			wc_byte_len;

    static wchar_t	lastwc = L'\0';

    if (end_condition == EOF)
	end_condition = WEOF;

    if (lastwc == end_condition) {
	lastwc = L'\0';
	return (FALSE);
	}

    while ((wc = getwc(f)) != end_condition)  {
	/* Don't try to read past WEOF if last line has no EOL */
	if (wc == WEOF)
	    break;
	if (!IS_MBSPACE (wc))
	    break;
	}


    /* jump over a continuation char and ignore whitespace on next line */
    while (wc == WBACKSLASH) {
	wc = getwc(f);
	if (iswspace(wc)) {
            while ((wc = getwc(f)) != end_condition)  {
                /* Don't try to read past WEOF if last line has no EOL */
                if (wc == WEOF)
                    break;
	        if (!IS_MBSPACE (wc))
	            break;
	        }
	    }
	else
	    break;  /* get out we have a good char */
	}

    if (wc == end_condition) {
	lastwc = 0;
	return (FALSE);
	}
	
    lastwc = wc;
    wc_byte_len = wctomb(buf, wc);
    if (wc_byte_len == -1)
	user_error(BAD_MB_DATA_CMD_FILE);
    buf += wc_byte_len;
    new_word = TRUE;

    while ((wc = getwc(f)) != end_condition) {
        /* Don't try to read past WEOF if last line has no newline char.
           If missing final newline char, and end_condition was set to EOL,
           break out of loop, and we will set an error below */
        if (wc == WEOF)
            break;

        if ((lastwc == WCOMMENT_CHAR) && new_word) {

            /* get rid of the last character which is a # */
            buf = buf - 1;

    	    if (wc == WCOMMENT_CHAR)  {       /* ## is taken as a single # */
	        new_word = FALSE;
		lastwc = 0;
		}

	    else  {                /* we have a comment */
                while ((wc != end_condition) && (wc != L'\n') && (wc != WEOF))
                    wc=getwc(f);
		if (wc == end_condition) {
	            lastwc = 0;
		    return (FALSE);
	            }

		/* no longer a comment - skip white space */
		while ((wc = getwc(f)) != end_condition) {
		    if (!IS_MBSPACE (wc))
			break;
			}

		if (wc == end_condition) {
	            lastwc = 0;
		    return (FALSE);
	            }

	        lastwc = wc;
	        new_word = TRUE;
	        }
	    }
	else if (wc == WBACKSLASH || lastwc == WBACKSLASH) {

            /* get rid of this character which is a \ */
	    if (lastwc != WBACKSLASH)
	        wc = getwc(f);
	    else
		buf = buf - 1;    /* take slash out of buffer */

    	    if (wc != L'\n')  { 
		/* any char following a backslash is used */
	        new_word = FALSE;   /* so \\ will become \ */
		lastwc = 0;
		}
	    else {
		/* c must be a newline */
	        if (!new_word) {
		    /* Just eat newline */
		    wc = getwc(f);           /* get 1 after newline */
		    lastwc = wc;
	            if (IS_MBSPACE (wc))
			/* word ended just before continuation */
	                break;  /* out of big while loop */
		    }
		else {
		    /* new word and newline - skip any whitespace */
		    while ((wc = getwc(f)) != end_condition) {
		        if (!IS_MBSPACE (wc)) {
		 	    lastwc = wc;
			    break;
			    }
		        }

		    if (wc == end_condition) {
	                lastwc = 0;
		        return (FALSE);
	                }

	            lastwc = wc;
	            new_word = TRUE;
                    }
	        }
	    }   /* backslash code */

	else { /* still traversing a single token */
	    new_word = FALSE;
	    if (IS_MBSPACE (wc))
	        break;
  	    }

	wc_byte_len = wctomb(buf, wc);
	if (wc_byte_len == -1)
	    user_error(BAD_MB_DATA_CMD_FILE);
	buf += wc_byte_len;
	lastwc = wc;
        } /* while */

    /* Lastwc is set so we can check for an end condition when we enter */
    /* the routine again - If so, we can return false so an outer loop */
    /* can terminate. */
    lastwc = wc;

    if (wc == WEOF) {
        lastwc = 0;
	return (FALSE);
        }

    *buf++ = 0;
    return (TRUE);
}

#define ERR_BUFLEN         256
#define CUR_MAX_BYTES_IN_CHAR      2+1       /* multibytes are 2 chars long */


/* Special Characters */
#define BACKSLASH       '\\'                /* a single backslash char */
#define COMMENT_CHAR    '#'                 /* a single pound sign */

# define IS_SPACE(c)  (isspace (c))

#if 0
# define IS_SPACE(c)  (isspace (c) || c == ',' || c == ';')
#endif /* 0 */


/* Fgetword gets the next word from the file f and places it in buf. */

Boolean fgetword(f, buf, end_condition)
FILE *f;
char *buf;
int end_condition;   /* EOF, or '\n' */
    {
    int new_word = TRUE;
    int c;
    static int lastc = 0;

    if (MB_CUR_MAX != 1)
	return(mb_fgetword(f, buf, end_condition));

    if (lastc == end_condition) {
	lastc = 0;
	return (FALSE);
	}
       
    while ((c = getc(f)) != end_condition)  {
	/* Don't try to read past EOF if last line has no EOL */
	if (c == EOF)
	    break;
	if (!IS_SPACE (c))
	    break;
	}

    /* jump over a continuation char and ignore whitespace on next line */
    while (c == BACKSLASH) {
	c = getc(f);
	if (isspace(c)) {
            while ((c = getc(f)) != end_condition)  {
		/* Don't try to read past EOF if last line has no EOL */
		if (c == EOF)
		    break;
	        if (!IS_SPACE (c))
	            break;
	        }

	    }
	else
	    break;  /* get out we have a good char */
	}


    if (c == end_condition) {
	lastc = 0;
	return (FALSE);
	}
	
    lastc = c;
    *buf++ = c;
    new_word = TRUE;


    while ((c = getc(f)) != end_condition) {
	/* Don't try to read past EOF if last line has no newline char.
	   If missing final newline char, and end_condition was set to EOL,
	   break out of loop, and we will set an error below */
	if (c == EOF)
	    break;

        if ((lastc == COMMENT_CHAR) && new_word) {

            /* get rid of the last character which is a # */
            buf = buf - 1;

    	    if (c == COMMENT_CHAR)  {       /* ## is taken as a single # */
	        new_word = FALSE;
		lastc = 0;
		}

	    else  {                /* we have a comment */
                while ((c != end_condition) && (c != '\n') && (c != EOF))
                    c=getc(f);
		if (c == end_condition) {
	            lastc = 0;
		    return (FALSE);
	            }

		/* no longer a comment - skip white space */
		while ((c = getc(f)) != end_condition) {
		    if (!IS_SPACE (c))
			break;
			}

		if (c == end_condition) {
	            lastc = 0;
		    return (FALSE);
	            }

	        lastc = c;
	        new_word = TRUE;
	        }
	    }
	else if (c == BACKSLASH || lastc == BACKSLASH) {

            /* get rid of this character which is a \ */
	    if (lastc != BACKSLASH)
	        c = getc(f);
	    else
		buf = buf - 1;    /* take slash out of buffer */

    	    if (c != '\n')  { 
		/* any char following a backslash is used */
	        new_word = FALSE;   /* so \\ will become \ */
		lastc = 0;
		}
	    else {
		/* c must be a newline */
	        if (!new_word) {
		    /* Just eat newline */
		    c = getc(f);           /* get 1 after newline */
		    lastc = c;
	            if (IS_SPACE (c))
			/* word ended just before continuation */
	                break;  /* out of big while loop */
		    }
		else {
		    /* new word and newline - skip any whitespace */
		    while ((c = getc(f)) != end_condition) {
		        if (!IS_SPACE (c)) {
		 	    lastc = c;
			    break;
			    }
		        }

		    if (c == end_condition) {
	                lastc = 0;
		        return (FALSE);
	                }

	            lastc = c;
	            new_word = TRUE;
                    }
	        }
	    }   /* backslash code */

	else { /* still traversing a single token */
	    new_word = FALSE;
	    if (IS_SPACE (c))
	        break;
  	    }

        *buf++ = c;
	lastc = c;

        } /* while */

    /* Lastc is set so we can check for an end condition when we enter */
    /* the routine again - If so, we can return false so an outer loop */
    /* can terminate. */
    lastc = c;

    if (c == EOF) {
        lastc = 0;
	return (FALSE);
        }

    *buf++ = 0;
    return (TRUE);
    }


init_locale()
{
    extern char *setlocale();

    /* set the locale based on the environment variables */
    if (!setlocale(LC_ALL, "")) {
	/* bad locale, output HP standard "unavailable language warning" */
#if 0
	(void) fputs(_errlocale("ld"), stderr);
#else
	/* we don't have _errlocale --pschwan@thepuffingroup.com */
	fputs("preferred language unavailable.\n", stderr);
#endif
    }
}
