cmdopts.diff
changeset 64 d328b18462bd
child 65 7e44adeed9a7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cmdopts.diff	Sun Feb 24 04:47:32 2013 +0200
@@ -0,0 +1,416 @@
+# HG changeset patch
+# Parent 92fa48ef53c909928706ab4c51518953339a38e4
+Unified command option parsing
+
+diff -r 92fa48ef53c9 mcabber/mcabber/utils.c
+--- a/mcabber/mcabber/utils.c	Sun Jan 27 00:40:37 2013 +0200
++++ b/mcabber/mcabber/utils.c	Sun Feb 24 04:24:14 2013 +0200
+@@ -555,6 +555,311 @@
+     *str = tolower(*str);
+ }
+ 
++// FURTHER TODO:
++// Allow to specify catchall argument in the middle of string (requires some reverse parser)
++// Better error messages (caller frees them)
++// --help generates error with short usage, based on info in options struct
++
++// in_space        -> in_space, in_optstart, in_argstart
++// in_optstart     -> in_shortoptend, in_longoptstart, in_argstart ('-')
++// in_shortoptend  -> in_space, error
++// in_longoptstart -> in_longopt, in_space, in_argstart ('---')
++// in_longopt      -> in_longopt, in_space, error
++// in_argstart     -> in_arg, success
++// in_arg          -> in_arg, in_space, error
++
++// arguments: rw buffer in utf8, end of buffer pointer, options description struct
++static const char *cmdopts_parse_internal(gchar *arg, gchar *e, cmdopts_t *options)
++{
++  // parser state
++  enum {
++    in_space,
++    in_optstart,
++    in_shortoptstart,
++    in_shortoptend,
++    in_longoptstart,
++    in_longopt,
++    in_argstart,
++    in_arg,
++  } state = in_space;
++  // current pointer, start of object pointer
++  gchar *p, *s;
++  //
++  gboolean quotes = FALSE;
++  gboolean opts_ended = FALSE;
++  // option, for which argument is currently parsed
++  cmdopt_t *option = NULL;
++  // argument, that we currently parse
++  cmdarg_t *argument = NULL;
++  // flags of option/argument
++  guint flags = 0;
++  // error message to return
++  const char *error = NULL;
++
++  p = arg;
++  // we allow parser to do one extra run on final '\0'
++  while (p <= e && error == NULL) {
++    if (state == in_space) { // space between args/options
++      if (*p == ' ' || *p == '\0') { // still space
++        p ++;
++      } else if (*p == '-' && !opts_ended) { // option
++        state = in_optstart;
++        p ++;
++      } else { // argument
++        if (!option) {
++          opts_ended = TRUE;
++        }
++        s = p;
++        state = in_argstart;
++      }
++    } else if (state == in_optstart) { // long/short option
++      if (*p == ' ' || *p == '\0') { // argument '-'
++        opts_ended = TRUE;
++        s = p - 1;
++        state = in_argstart;
++      } else if (*p == '-') { // long option
++        state = in_longoptstart;
++        p ++;
++      } else { // short option
++        s = p;
++        state = in_shortoptend;
++        p ++;
++      }
++    } else if (state == in_shortoptend) { // short option
++      if (*p == ' ' || *p == '\0') { // option really ended
++        gboolean found = FALSE;
++        option = options -> opts;
++        if (option) {
++          do {
++            if (option -> shortopt == *s) {
++              found = TRUE;
++              break;
++            }
++          } while ((!(option++ -> flags & CMDOPT_LAST)) && !found);
++        }
++        if (found) { // option is known
++          if (option -> flags & CMDOPT_SWITCH) { // it is switch
++            if (option -> flags & CMDOPT_CATCHALL) {
++              option -> value.swc ++;
++            } else {
++              option -> value.swc = !option -> value.swc;
++            }
++            option = NULL;
++          } else { // it is option
++            if (*p == '\0') {
++              error = "Short option argument not specified";
++            }
++          }
++          state = in_space;
++          p ++;
++        } else { // option is unknown
++          error = "Unknown short option";
++        }
++      } else { // short option not ended
++        error = "Extra characters at short option end";
++      }
++    } else if (state == in_longoptstart) { // long option initialization
++      if (*p == ' ' || *p == '\0') { // end of options '--'
++        opts_ended = TRUE;
++        state = in_space;
++        p ++;
++      } else if (*p == '-') { // argument, starting with '---'
++        opts_ended = TRUE;
++        s = p - 2;
++        state = in_argstart;
++      } else { // it is long option
++        s = p;
++        state = in_longopt;
++        p ++;
++      }
++    } else if (state == in_longopt) { // long option name
++      if (*p == ' ' || *p == '\0') { // long option ended
++        gboolean found = FALSE;
++        gboolean eof = *p == '\0';
++        *p = '\0';
++        option = options -> opts;
++        if (option) {
++          do {
++            if (!g_strcmp0 (option -> longopt, s)) {
++              found = TRUE;
++              break;
++            }
++          } while ((!(option++ -> flags & CMDOPT_LAST)) && !found);
++        }
++        if (found) { // option is known
++          if (option -> flags & CMDOPT_SWITCH) { // it is switch
++            if (option -> flags & CMDOPT_CATCHALL) {
++              option -> value.swc ++;
++            } else {
++              option -> value.swc = !option -> value.swc;
++            }
++            option = NULL;
++          } else { // it is option
++            if (eof) {
++              error = "Long option argument not specified";
++            }
++          }
++          state = in_space;
++          p ++;
++        } else { // option is unknown
++          error = "Unknown long option";
++        }
++      } else { // still long option
++        p ++;
++      }
++    } else if (state == in_argstart) { // option/command argument initialization
++      if (option) {
++        flags = option -> flags & ~CMDOPT_CATCHALL; // catchall in options indicates multi-options
++      } else {
++        if (!argument) {
++          argument = options -> args;
++        }
++        if (!argument) { // no need to parse arguments at all
++          break;
++        }
++        flags = argument -> flags;
++        if ((flags & CMDOPT_CATCHALL) && (flags & CMDOPT_PLAIN)) { // can finish right away
++          argument -> value.arg = s;
++          break;
++        }
++      }
++      quotes = FALSE;
++      state = in_arg;
++    } else if (state == in_arg) { // option/command argument value
++      if (*p == '\0' && quotes) { // end of line in quotes
++        error = "Unfinished quoted argument";
++      } else if ((*p == ' ' && (!quotes) && !(flags & CMDOPT_CATCHALL)) || *p == '\0') { // argument ended
++        if (*p != '\0') {
++          *p = '\0';
++          p ++;
++        } 
++        if (option) { // option argument
++          if (option -> flags & CMDOPT_CATCHALL) { // multi-value option
++            option -> value.multiopt = g_slist_append (option -> value.multiopt, s);
++          } else { // single-value option
++            option -> value.opt = s;
++          }
++          option = NULL;
++        } else { // command argument
++          if (argument -> flags & CMDOPT_SUBCOMMAND) {
++            gboolean found = FALSE;
++            subcmd_t *subcommand = options -> cmds;
++            if (subcommand) {
++              do {
++                if (!g_strcmp0(s, subcommand -> name)) {
++                  found = TRUE;
++                  break;
++                }
++              } while (!(subcommand++ -> flags & CMDOPT_LAST));
++            }
++            if (found) {
++              argument -> value.cmd = subcommand;
++              error = cmdopts_parse_internal(p, e, subcommand -> options);
++              break;
++            } else {
++              error = "Unknown subcommand";
++            }
++          } else {
++            argument -> value.arg = s;
++            if (argument -> flags & CMDOPT_LAST) { // last argument
++              break;
++            }
++            argument ++;
++          }
++        }
++        state = in_space;
++        p ++;
++      } else if (*p == '\\' && !(flags & CMDOPT_PLAIN)) { // next char escape
++        memmove(p, p+1, e-(p+1));
++        e --;
++        if (*p == '\0') {
++          error = "Escape at the end of line";
++        }
++        p ++;
++      } else if (*p == '"' && !(flags & CMDOPT_PLAIN)) { // quotation start/end
++        memmove(p, p+1, e-(p+1));
++        e --;
++        quotes = !quotes;
++      } else { // still argument
++        p ++;
++      }
++    }
++  }
++
++  // check required flags on options
++  if (error == NULL && options -> opts) {
++    option = options -> opts;
++    do {
++      if (option -> flags & CMDOPT_REQUIRED) {
++        if (option -> flags & CMDOPT_SWITCH) {
++          // no way to check trigger switches, but no point in it as well
++          if (option -> flags & CMDOPT_CATCHALL && option -> value.swc == 0) {
++            error = "Required switch is not specified";
++            break;
++          }
++        } else {
++          if ((option -> flags & CMDOPT_CATCHALL && option -> value.multiopt == NULL) ||
++              ((!(option -> flags & CMDOPT_CATCHALL)) && option -> value.opt == NULL)) {
++            error = "Required option is not specified";
++            break;
++          }
++        }
++      }
++    } while (!(option++ -> flags & CMDOPT_LAST));
++  }
++
++  // check required flags on arguments
++  if (error == NULL && options -> args) {
++    argument = options -> args;
++    do {
++      if (argument -> flags & CMDOPT_REQUIRED) {
++        if (argument -> flags & CMDOPT_SUBCOMMAND && argument -> value.cmd == NULL) {
++          error = "Subcommand is not specified";
++          break;
++        }
++      } else {
++        if ((!(argument -> flags & CMDOPT_SUBCOMMAND)) && argument -> value.arg == NULL) {
++          error = "Required argument is not specified";
++          break;
++        }
++      }
++    } while (!(argument++ -> flags & CMDOPT_LAST));
++  }
++
++  return error;
++}
++
++const char *cmdopts_parse(const char *arg, cmdopts_t *options)
++{
++  gchar *utf8 = to_utf8(arg);
++  gchar *e;
++
++  for (e = utf8; *e; e++);
++  options -> freeme = utf8;
++  return cmdopts_parse_internal(utf8, e, options);
++}
++
++void cmdopts_free(cmdopts_t *options)
++{
++  cmdopt_t *option = options -> opts;
++  subcmd_t *subcommand = options -> cmds;
++  if (option) {
++    do {
++      if ((option -> flags & (CMDOPT_CATCHALL|CMDOPT_SWITCH)) == CMDOPT_CATCHALL) {
++        g_slist_free(option -> value.multiopt);
++        option -> value.multiopt = NULL;
++      }
++    } while (!(option++ -> flags & CMDOPT_LAST));
++  }
++  if (subcommand) {
++    do {
++      cmdopts_free(subcommand -> options);
++    } while (!(subcommand++ -> flags & CMDOPT_LAST));
++  }
++  g_free(options -> freeme);
++  options -> freeme = NULL;
++}
++
+ //  strip_arg_special_chars(string)
+ // Remove quotes and backslashes before an escaped quote
+ // Only quotes need a backslash
+diff -r 92fa48ef53c9 mcabber/mcabber/utils.h
+--- a/mcabber/mcabber/utils.h	Sun Jan 27 00:40:37 2013 +0200
++++ b/mcabber/mcabber/utils.h	Sun Feb 24 04:24:14 2013 +0200
+@@ -43,6 +43,93 @@
+ char **split_arg(const char *arg, unsigned int n, int dontstriplast);
+ void free_arg_lst(char **arglst);
+ 
++//  error cmdopts_parse (argstring, optionlist)
++// Function parses command argument string according to provided list of
++// options and arguments. If in this process it encounters an error, it
++// returns error string (that should be displayed and g_free'd afterwards).
++// Note: For now returned error is constant string, that shouldn't be freed,
++// but we're getting there.
++// After processing you should free freeme and any GSList values of catchall
++// options (only lists itself, not values). For your convenience, there is
++// cmdopts_free(), that does exactly that.
++// The function recognizes four kinds of expressions:
++//  - Options with arguments in a form '-f bar' or '--foo bar'
++//  - Switches without arguments in a form '-f' or '--foo'
++//  - End-of-options marker '--'
++//  - Individual arguments ('-' and '---' are considered arguments too)
++// To define command line syntax, you pass cmdopts_t struct, that contains
++// two contiguous lists of cmdopt_t and cmdarg_t structs accordingly. The
++// last struct in list must have CMDOPT_LAST flag set.
++// You can specify your own default values, they will be replaced/appended
++// if needed.
++// You can omit specifying longopt or shortopt (put NULL or '\0' there).
++// Note: returned values and arguments are already converted to utf8.
++
++// Flags:
++// Only applies to options, defined if option does not have argument.
++#define CMDOPT_SWITCH     ( 0<<1 )
++// Don't process quotes and escapes in argument (applies to option arguments too).
++#define CMDOPT_PLAIN      ( 1<<1 )
++// For options   - put all encountered values into GSList value.multiopt
++//                 instead of overwriting value.opt.
++// For switches  - increment value.swc instead of logical flipping.
++// For arguments - grab the rest of the line without splitting on spaces.
++// Implicitly last argument.
++#define CMDOPT_CATCHALL   ( 2<<1 )
++// Option/argument must have value.
++#define CMDOPT_REQUIRED   ( 3<<1 )
++// Last entry in struct sequence.
++#define CMDOPT_LAST       ( 4<<1 )
++// Argument only, argument is the name for subcommand.
++// Implicitly last argument.
++#define CMDOPT_SUBCOMMAND ( 5<<1 )
++
++// thoughts about future:
++// command struct contains cmdopts
++// cmdopt/cmdarg struct contains argument type, that implies completion id and argument correctness checks
++// cmdopt/cmdarg struct contains default value
++// when building completion for command, we allow options (if not before --)
++// would be good to have 'subcommands' mcabber commands
++//
++// so, the process of command execution looks like:
++// - we walk through the options, set default values
++// - we parse argument string, populating options
++// - we check for required options availability
++// - we call callback
++// - we free resources
++typedef struct cmdopts_struct cmdopts_t;
++typedef struct {
++  guint      flags;
++  const char *name;
++  cmdopts_t  *options;
++} subcmd_t;
++typedef struct {
++  guint      flags;
++  char       shortopt;
++  const char *longopt;
++  union {
++    GSList *multiopt;
++    gchar  *opt;
++    guint  swc;
++  } value;
++} cmdopt_t;
++typedef struct {
++  guint flags;
++  union {
++    gchar    *arg;
++    subcmd_t *cmd;
++  } value;
++} cmdarg_t;
++struct cmdopts_struct {
++  cmdopt_t *opts;
++  cmdarg_t *args;
++  subcmd_t *cmds;
++  gchar    *freeme;
++};
++
++const char *cmdopts_parse (const char *arg, cmdopts_t *options);
++void cmdopts_free(cmdopts_t *options);
++
+ void replace_nl_with_dots(char *bufstr);
+ char *ut_expand_tabs(const char *text);
+ char *ut_unescape_tabs_cr(const char *text);