nexmon – Rev 1

Subversion Repositories:
Rev:
/* source: nestlex.c */
/* Copyright Gerhard Rieger 2006-2010 */
/* Published under the GNU General Public License V.2, see file COPYING */

/* a function for lexical scanning of nested character patterns */

#include "config.h"
#include "mytypes.h"

#include "sysincludes.h"


/* sub: scan a string and copy its value to output string
   end scanning when an unescaped, unnested string from ends array is found
   does not copy the end pattern
   does not write a trailing \0 to token
   allows escaping with \ and quoting (\ and quotes are removed)
   allows nesting with div. parens
   returns -1 if out string was too small
   returns 1 if addr ended unexpectedly
   returns 0 if token could be extracted successfully
*/
int nestlex(const char **addr,  /* input string; aft points to end token */
            char **token,       /* output token; aft points to first unwritten
                                   char (caller might want to set it to \0) */
            size_t *len,        /* remaining bytes in token space (incl. \0) */
            const char *ends[], /* list of end strings */
            const char *hquotes[],/* list of strings that quote (hard qu.) */
            const char *squotes[],/* list of strings that quote softly */
            const char *nests[],/* list of strings that start nesting;
                                   every second one is matching end */
            bool dropspace,     /* drop trailing space before end token */
            bool dropquotes,    /* drop the outermost quotes */
            bool c_esc,         /* solve C char escapes: \n \t \0 etc */
            bool html_esc       /* solve HTML char escapes: %0d %08 etc */
            ) {
   const char *in = *addr;      /* pointer into input string */
   const char **endx;   /* loops over end patterns */
   const char **quotx;  /* loops over quote patterns */
   const char **nestx;  /* loops over nest patterns */
   char *out = *token;  /* pointer into output token */
   char *lastnonspace = out;
   char c;
   int i;
   int result;

   while (true) {

      /* is this end of input string? */
      if (*in == 0)  {
         if (dropspace) {
            out = lastnonspace;
         }
         break; /* end of string */
      }

      /* first check the end patterns (e.g. for ']') */
      endx = ends;  i = 0;
      while (*endx) {
         if (!strncmp(in, *endx, strlen(*endx))) {
            /* this end pattern matches */
            if (dropspace) {
               out = lastnonspace;
            }
            *addr = in;
            *token = out;
            return 0;
         }
         ++endx;
      }

      /* check for hard quoting pattern */
      quotx = hquotes;
      while (hquotes && *quotx) {
         if (!strncmp(in, *quotx, strlen(*quotx))) {
            /* this quote pattern matches */
            const char *endnest[2];
            if (dropquotes) {
               /* we strip this quote */
               in += strlen(*quotx);
            } else {
               for (i = strlen(*quotx); i > 0; --i) {
                  *out++ = *in++;
                  if (--*len <= 0) { *addr = in; *token = out; return -1; }
               }
            }
            /* we call nestlex recursively */
            endnest[0] = *quotx;
            endnest[1] = NULL;
            result =
               nestlex(&in, &out, len, endnest, NULL/*hquotes*/,
                       NULL/*squotes*/, NULL/*nests*/,
                       false, false, c_esc, html_esc);
            if (result == 0 && dropquotes) {
               /* we strip this quote */
               in += strlen(*quotx);
            } else {
               /* we copy the trailing quote */
               for (i = strlen(*quotx); i > 0; --i) {
                  *out++ = *in++;
                  if (--*len <= 0) { *addr = in; *token = out; return -1; }
               }
            }
               
            break;
         }
         ++quotx;
      }
      if (hquotes && *quotx != NULL) {
         /* there was a quote; string might continue with hard quote */
         lastnonspace = out;
         continue;
      }

      /* check for soft quoting pattern */
      quotx = squotes;
      while (squotes && *quotx) {
         if (!strncmp(in, *quotx, strlen(*quotx))) {
            /* this quote pattern matches */
            /* we strip this quote */
            /* we call nestlex recursively */
            const char *endnest[2];
            if (dropquotes) {
               /* we strip this quote */
               in += strlen(*quotx);
            } else {
               for (i = strlen(*quotx); i > 0; --i) {
                  *out++ = *in++;
                  if (--*len <= 0) { *addr = in; *token = out; return -1; }
               }
            }
            endnest[0] = *quotx;
            endnest[1] = NULL;
            result =
               nestlex(&in, &out, len, endnest, hquotes,
                       squotes, nests,
                       false, false, c_esc, html_esc);

            if (result == 0 && dropquotes) {
               /* we strip the trailing quote */
               in += strlen(*quotx);
            } else {
               /* we copy the trailing quote */
               for (i = strlen(*quotx); i > 0; --i) {
                  *out++ = *in++;
                  if (--*len <= 0) { *addr = in; *token = out; return -1; }
               }
            }
            break;
         }
         ++quotx;
      }
      if (squotes && *quotx != NULL) {
         /* there was a soft quote; string might continue with any quote */
         lastnonspace = out;
         continue;
      }

      /* check patterns that start a nested clause */
      nestx = nests;  i = 0;
      while (nests && *nestx) {
         if (!strncmp(in, *nestx, strlen(*nestx))) {
            /* this nest pattern matches */
            const char *endnest[2];
            endnest[0] = nestx[1];
            endnest[1] = NULL;

            for (i = strlen(nestx[1]); i > 0; --i) {
               *out++ = *in++;
               if (--*len <= 0)  { *addr = in; *token = out; return -1; }
            }

            result =
               nestlex(&in, &out, len, endnest, hquotes, squotes, nests,
                       false, false, c_esc, html_esc);
            if (result == 0) {
               /* copy endnest */
               i = strlen(nestx[1]); while (i > 0) {
                  *out++ = *in++;
                  if (--*len <= 0) {
                     *addr = in;
                     *token = out;
                     return -1;
                  }
                  --i;
               }
            }
            break;
         }
         nestx += 2;    /* skip matching end pattern in table */
      }
      if (nests && *nestx) {
         /* we handled a nested expression, continue loop */
         lastnonspace = out;
         continue;
      }

      /* "normal" data, possibly escaped */
      c = *in++;
      if (c == '\\') {
         /* found a plain \ escaped part */
         c = *in++;
         if (c == 0)  { /* Warn("trailing '\\'");*/ break; }
         if (c_esc) { /* solve C char escapes: \n \t \0 etc */
            switch (c) {
            case '0': c = '\0'; break;
            case 'a': c = '\a'; break;
            case 'b': c = '\b'; break;
            case 'f': c = '\f'; break;
            case 'n': c = '\n'; break;
            case 'r': c = '\r'; break;
            case 't': c = '\t'; break;
            case 'v': c = '\v'; break;
#if LATER
            case 'x': !!! 1 to 2 hex digits; break;
            case 'u': !!! 4 hex digits?; break;
            case 'U': !!! 8 hex digits?; break;
#endif
            default: break;
            }
         }
         *out++ = c;
         --*len;
         if (*len == 0) {
            *addr = in;
            *token = out;
            return -1;  /* output overflow */
         }
         lastnonspace = out;
         continue;
      }

      /* just a simple char */
      *out++ = c;
      --*len;
      if (*len == 0) {
         *addr = in;
         *token = out;
         return -1;     /* output overflow */
      }
      if (!isspace(c)) {
         lastnonspace = out;
      }   

   }
   /* never come here? */

   *addr = in;
   *token = out;
   return 0;    /* OK */
}

int skipsp(const char **text) {
    while (isspace(**text)) {
        ++(*text);
    }
    return 0;
}