--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cmdopts.diff Sun Feb 24 04:47:32 2013 +0200
@@ -0,0 +1,416 @@
+# HG changeset patch
+# Parent 92fa48ef53c909928706ab4c51518953339a38e4
+Unified command option parsing
+
+diff -r 92fa48ef53c9 mcabber/mcabber/utils.c
+--- a/mcabber/mcabber/utils.c Sun Jan 27 00:40:37 2013 +0200
++++ b/mcabber/mcabber/utils.c Sun Feb 24 04:24:14 2013 +0200
+@@ -555,6 +555,311 @@
+ *str = tolower(*str);
+ }
+
++// FURTHER TODO:
++// Allow to specify catchall argument in the middle of string (requires some reverse parser)
++// Better error messages (caller frees them)
++// --help generates error with short usage, based on info in options struct
++
++// in_space -> in_space, in_optstart, in_argstart
++// in_optstart -> in_shortoptend, in_longoptstart, in_argstart ('-')
++// in_shortoptend -> in_space, error
++// in_longoptstart -> in_longopt, in_space, in_argstart ('---')
++// in_longopt -> in_longopt, in_space, error
++// in_argstart -> in_arg, success
++// in_arg -> in_arg, in_space, error
++
++// arguments: rw buffer in utf8, end of buffer pointer, options description struct
++static const char *cmdopts_parse_internal(gchar *arg, gchar *e, cmdopts_t *options)
++{
++ // parser state
++ enum {
++ in_space,
++ in_optstart,
++ in_shortoptstart,
++ in_shortoptend,
++ in_longoptstart,
++ in_longopt,
++ in_argstart,
++ in_arg,
++ } state = in_space;
++ // current pointer, start of object pointer
++ gchar *p, *s;
++ //
++ gboolean quotes = FALSE;
++ gboolean opts_ended = FALSE;
++ // option, for which argument is currently parsed
++ cmdopt_t *option = NULL;
++ // argument, that we currently parse
++ cmdarg_t *argument = NULL;
++ // flags of option/argument
++ guint flags = 0;
++ // error message to return
++ const char *error = NULL;
++
++ p = arg;
++ // we allow parser to do one extra run on final '\0'
++ while (p <= e && error == NULL) {
++ if (state == in_space) { // space between args/options
++ if (*p == ' ' || *p == '\0') { // still space
++ p ++;
++ } else if (*p == '-' && !opts_ended) { // option
++ state = in_optstart;
++ p ++;
++ } else { // argument
++ if (!option) {
++ opts_ended = TRUE;
++ }
++ s = p;
++ state = in_argstart;
++ }
++ } else if (state == in_optstart) { // long/short option
++ if (*p == ' ' || *p == '\0') { // argument '-'
++ opts_ended = TRUE;
++ s = p - 1;
++ state = in_argstart;
++ } else if (*p == '-') { // long option
++ state = in_longoptstart;
++ p ++;
++ } else { // short option
++ s = p;
++ state = in_shortoptend;
++ p ++;
++ }
++ } else if (state == in_shortoptend) { // short option
++ if (*p == ' ' || *p == '\0') { // option really ended
++ gboolean found = FALSE;
++ option = options -> opts;
++ if (option) {
++ do {
++ if (option -> shortopt == *s) {
++ found = TRUE;
++ break;
++ }
++ } while ((!(option++ -> flags & CMDOPT_LAST)) && !found);
++ }
++ if (found) { // option is known
++ if (option -> flags & CMDOPT_SWITCH) { // it is switch
++ if (option -> flags & CMDOPT_CATCHALL) {
++ option -> value.swc ++;
++ } else {
++ option -> value.swc = !option -> value.swc;
++ }
++ option = NULL;
++ } else { // it is option
++ if (*p == '\0') {
++ error = "Short option argument not specified";
++ }
++ }
++ state = in_space;
++ p ++;
++ } else { // option is unknown
++ error = "Unknown short option";
++ }
++ } else { // short option not ended
++ error = "Extra characters at short option end";
++ }
++ } else if (state == in_longoptstart) { // long option initialization
++ if (*p == ' ' || *p == '\0') { // end of options '--'
++ opts_ended = TRUE;
++ state = in_space;
++ p ++;
++ } else if (*p == '-') { // argument, starting with '---'
++ opts_ended = TRUE;
++ s = p - 2;
++ state = in_argstart;
++ } else { // it is long option
++ s = p;
++ state = in_longopt;
++ p ++;
++ }
++ } else if (state == in_longopt) { // long option name
++ if (*p == ' ' || *p == '\0') { // long option ended
++ gboolean found = FALSE;
++ gboolean eof = *p == '\0';
++ *p = '\0';
++ option = options -> opts;
++ if (option) {
++ do {
++ if (!g_strcmp0 (option -> longopt, s)) {
++ found = TRUE;
++ break;
++ }
++ } while ((!(option++ -> flags & CMDOPT_LAST)) && !found);
++ }
++ if (found) { // option is known
++ if (option -> flags & CMDOPT_SWITCH) { // it is switch
++ if (option -> flags & CMDOPT_CATCHALL) {
++ option -> value.swc ++;
++ } else {
++ option -> value.swc = !option -> value.swc;
++ }
++ option = NULL;
++ } else { // it is option
++ if (eof) {
++ error = "Long option argument not specified";
++ }
++ }
++ state = in_space;
++ p ++;
++ } else { // option is unknown
++ error = "Unknown long option";
++ }
++ } else { // still long option
++ p ++;
++ }
++ } else if (state == in_argstart) { // option/command argument initialization
++ if (option) {
++ flags = option -> flags & ~CMDOPT_CATCHALL; // catchall in options indicates multi-options
++ } else {
++ if (!argument) {
++ argument = options -> args;
++ }
++ if (!argument) { // no need to parse arguments at all
++ break;
++ }
++ flags = argument -> flags;
++ if ((flags & CMDOPT_CATCHALL) && (flags & CMDOPT_PLAIN)) { // can finish right away
++ argument -> value.arg = s;
++ break;
++ }
++ }
++ quotes = FALSE;
++ state = in_arg;
++ } else if (state == in_arg) { // option/command argument value
++ if (*p == '\0' && quotes) { // end of line in quotes
++ error = "Unfinished quoted argument";
++ } else if ((*p == ' ' && (!quotes) && !(flags & CMDOPT_CATCHALL)) || *p == '\0') { // argument ended
++ if (*p != '\0') {
++ *p = '\0';
++ p ++;
++ }
++ if (option) { // option argument
++ if (option -> flags & CMDOPT_CATCHALL) { // multi-value option
++ option -> value.multiopt = g_slist_append (option -> value.multiopt, s);
++ } else { // single-value option
++ option -> value.opt = s;
++ }
++ option = NULL;
++ } else { // command argument
++ if (argument -> flags & CMDOPT_SUBCOMMAND) {
++ gboolean found = FALSE;
++ subcmd_t *subcommand = options -> cmds;
++ if (subcommand) {
++ do {
++ if (!g_strcmp0(s, subcommand -> name)) {
++ found = TRUE;
++ break;
++ }
++ } while (!(subcommand++ -> flags & CMDOPT_LAST));
++ }
++ if (found) {
++ argument -> value.cmd = subcommand;
++ error = cmdopts_parse_internal(p, e, subcommand -> options);
++ break;
++ } else {
++ error = "Unknown subcommand";
++ }
++ } else {
++ argument -> value.arg = s;
++ if (argument -> flags & CMDOPT_LAST) { // last argument
++ break;
++ }
++ argument ++;
++ }
++ }
++ state = in_space;
++ p ++;
++ } else if (*p == '\\' && !(flags & CMDOPT_PLAIN)) { // next char escape
++ memmove(p, p+1, e-(p+1));
++ e --;
++ if (*p == '\0') {
++ error = "Escape at the end of line";
++ }
++ p ++;
++ } else if (*p == '"' && !(flags & CMDOPT_PLAIN)) { // quotation start/end
++ memmove(p, p+1, e-(p+1));
++ e --;
++ quotes = !quotes;
++ } else { // still argument
++ p ++;
++ }
++ }
++ }
++
++ // check required flags on options
++ if (error == NULL && options -> opts) {
++ option = options -> opts;
++ do {
++ if (option -> flags & CMDOPT_REQUIRED) {
++ if (option -> flags & CMDOPT_SWITCH) {
++ // no way to check trigger switches, but no point in it as well
++ if (option -> flags & CMDOPT_CATCHALL && option -> value.swc == 0) {
++ error = "Required switch is not specified";
++ break;
++ }
++ } else {
++ if ((option -> flags & CMDOPT_CATCHALL && option -> value.multiopt == NULL) ||
++ ((!(option -> flags & CMDOPT_CATCHALL)) && option -> value.opt == NULL)) {
++ error = "Required option is not specified";
++ break;
++ }
++ }
++ }
++ } while (!(option++ -> flags & CMDOPT_LAST));
++ }
++
++ // check required flags on arguments
++ if (error == NULL && options -> args) {
++ argument = options -> args;
++ do {
++ if (argument -> flags & CMDOPT_REQUIRED) {
++ if (argument -> flags & CMDOPT_SUBCOMMAND && argument -> value.cmd == NULL) {
++ error = "Subcommand is not specified";
++ break;
++ }
++ } else {
++ if ((!(argument -> flags & CMDOPT_SUBCOMMAND)) && argument -> value.arg == NULL) {
++ error = "Required argument is not specified";
++ break;
++ }
++ }
++ } while (!(argument++ -> flags & CMDOPT_LAST));
++ }
++
++ return error;
++}
++
++const char *cmdopts_parse(const char *arg, cmdopts_t *options)
++{
++ gchar *utf8 = to_utf8(arg);
++ gchar *e;
++
++ for (e = utf8; *e; e++);
++ options -> freeme = utf8;
++ return cmdopts_parse_internal(utf8, e, options);
++}
++
++void cmdopts_free(cmdopts_t *options)
++{
++ cmdopt_t *option = options -> opts;
++ subcmd_t *subcommand = options -> cmds;
++ if (option) {
++ do {
++ if ((option -> flags & (CMDOPT_CATCHALL|CMDOPT_SWITCH)) == CMDOPT_CATCHALL) {
++ g_slist_free(option -> value.multiopt);
++ option -> value.multiopt = NULL;
++ }
++ } while (!(option++ -> flags & CMDOPT_LAST));
++ }
++ if (subcommand) {
++ do {
++ cmdopts_free(subcommand -> options);
++ } while (!(subcommand++ -> flags & CMDOPT_LAST));
++ }
++ g_free(options -> freeme);
++ options -> freeme = NULL;
++}
++
+ // strip_arg_special_chars(string)
+ // Remove quotes and backslashes before an escaped quote
+ // Only quotes need a backslash
+diff -r 92fa48ef53c9 mcabber/mcabber/utils.h
+--- a/mcabber/mcabber/utils.h Sun Jan 27 00:40:37 2013 +0200
++++ b/mcabber/mcabber/utils.h Sun Feb 24 04:24:14 2013 +0200
+@@ -43,6 +43,93 @@
+ char **split_arg(const char *arg, unsigned int n, int dontstriplast);
+ void free_arg_lst(char **arglst);
+
++// error cmdopts_parse (argstring, optionlist)
++// Function parses command argument string according to provided list of
++// options and arguments. If in this process it encounters an error, it
++// returns error string (that should be displayed and g_free'd afterwards).
++// Note: For now returned error is constant string, that shouldn't be freed,
++// but we're getting there.
++// After processing you should free freeme and any GSList values of catchall
++// options (only lists itself, not values). For your convenience, there is
++// cmdopts_free(), that does exactly that.
++// The function recognizes four kinds of expressions:
++// - Options with arguments in a form '-f bar' or '--foo bar'
++// - Switches without arguments in a form '-f' or '--foo'
++// - End-of-options marker '--'
++// - Individual arguments ('-' and '---' are considered arguments too)
++// To define command line syntax, you pass cmdopts_t struct, that contains
++// two contiguous lists of cmdopt_t and cmdarg_t structs accordingly. The
++// last struct in list must have CMDOPT_LAST flag set.
++// You can specify your own default values, they will be replaced/appended
++// if needed.
++// You can omit specifying longopt or shortopt (put NULL or '\0' there).
++// Note: returned values and arguments are already converted to utf8.
++
++// Flags:
++// Only applies to options, defined if option does not have argument.
++#define CMDOPT_SWITCH ( 0<<1 )
++// Don't process quotes and escapes in argument (applies to option arguments too).
++#define CMDOPT_PLAIN ( 1<<1 )
++// For options - put all encountered values into GSList value.multiopt
++// instead of overwriting value.opt.
++// For switches - increment value.swc instead of logical flipping.
++// For arguments - grab the rest of the line without splitting on spaces.
++// Implicitly last argument.
++#define CMDOPT_CATCHALL ( 2<<1 )
++// Option/argument must have value.
++#define CMDOPT_REQUIRED ( 3<<1 )
++// Last entry in struct sequence.
++#define CMDOPT_LAST ( 4<<1 )
++// Argument only, argument is the name for subcommand.
++// Implicitly last argument.
++#define CMDOPT_SUBCOMMAND ( 5<<1 )
++
++// thoughts about future:
++// command struct contains cmdopts
++// cmdopt/cmdarg struct contains argument type, that implies completion id and argument correctness checks
++// cmdopt/cmdarg struct contains default value
++// when building completion for command, we allow options (if not before --)
++// would be good to have 'subcommands' mcabber commands
++//
++// so, the process of command execution looks like:
++// - we walk through the options, set default values
++// - we parse argument string, populating options
++// - we check for required options availability
++// - we call callback
++// - we free resources
++typedef struct cmdopts_struct cmdopts_t;
++typedef struct {
++ guint flags;
++ const char *name;
++ cmdopts_t *options;
++} subcmd_t;
++typedef struct {
++ guint flags;
++ char shortopt;
++ const char *longopt;
++ union {
++ GSList *multiopt;
++ gchar *opt;
++ guint swc;
++ } value;
++} cmdopt_t;
++typedef struct {
++ guint flags;
++ union {
++ gchar *arg;
++ subcmd_t *cmd;
++ } value;
++} cmdarg_t;
++struct cmdopts_struct {
++ cmdopt_t *opts;
++ cmdarg_t *args;
++ subcmd_t *cmds;
++ gchar *freeme;
++};
++
++const char *cmdopts_parse (const char *arg, cmdopts_t *options);
++void cmdopts_free(cmdopts_t *options);
++
+ void replace_nl_with_dots(char *bufstr);
+ char *ut_expand_tabs(const char *text);
+ char *ut_unescape_tabs_cr(const char *text);