From mboxrd@z Thu Jan 1 00:00:00 1970 From: Joshua Roys Subject: Re: Backwards-compatible string encoding Date: Thu, 9 Apr 2009 15:55:48 -0400 Message-ID: <49DE52C4.5060202@gtri.gatech.edu> References: <49CCEE58.5010800@gtri.gatech.edu> <49CD01A4.5060408@redhat.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------020108010609050205000200" Return-path: In-Reply-To: <49CD01A4.5060408@redhat.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: linux-audit-bounces@redhat.com Errors-To: linux-audit-bounces@redhat.com To: John Dennis Cc: "linux-audit@redhat.com" List-Id: linux-audit@redhat.com --------------020108010609050205000200 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit On 03/27/2009 12:41 PM, John Dennis wrote: > Strings should be formatted as strings which means enclosed in double > quotes with standard C99 escaping. > > As it stands now the audit libraries have hard coded lists of every > field the kernel can emit in an audit message. The test for decoding hex > strings is based on whether the field is known to be a string. Field > values *never* currently begin with a quote. If the kernel audit code > was modified to format strings such that they are always enclosed in > quotes the following positive things would occur: > > * No need for hard coded list of which fields are string values. > > * As long as the audit libraries are used for parsing it's fully > backwards compatible (because during parsing you first look for a quote, > if it's there you know its a string value, otherwise fall back to the > legacy logic). > > * Strings are always human readable and it's obvious what is a string. > > * C99 string encoding is trivial and extremely efficient so there is no > burden on the kernel. > > All in all a win/win situation. > > > Hello all, The following is to provoke discussion; this is an issue I would like to see fixed, and I have the time to work on it at present. Attached is a C program that has 2 output formats depending on a global flag (you can change the flag by incrementing argc). I use it to demonstrate 2 options, but if one were picked, it could be used to preserve backwards compatibility via a proc file. The binary format and API are inspired from both an email by Paul Moore: https://www.redhat.com/archives/linux-audit/2008-January/msg00087.html extprot, a self-describing binary encoding: http://eigenclass.org/R2/writings/extprot-extensible-protocols-intro and some SSH buffer management code I wrote. Oh, and it should look suspiciously similar to some current audit code. The text output is basically what John Dennis was looking for, I think. Although I don't quote numbers or do any radix prefixing right now. Anyway. Comments? Anything I should change? Is this worth pursuing at all (does something like this have any hope of being integrated)? $ gcc testformat.c $ ./a.out (binary display via xxd) $ ./a.out q (text output) One last thing: as you can see, the 3rd key/value pair msg='...' has key/value pairs inside it, kind of like the current setup where audit gets stuff from other code. It has a k/v user_said=\'...\'...\' with a quote in the middle. Basically, I would also like to being some sanity to users of audit, even though that may be insanely difficult, as has been mentioned. Perhaps only exposing the k/v interface to them. Something. Anything. Thanks, Joshua Roys --------------020108010609050205000200 Content-Type: text/x-csrc; name="testformat.c" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="testformat.c" #include #include #include #include #include #include int BINARY_FORMAT = 1; // .h enum encoded_type_t { TYPE_KV, TYPE_STRING, TYPE_UINT32, TYPE_UINT16, MAX_TYPE }; struct buffer { size_t siz; uint16_t pos; void *buf; }; #define DEFAULT_BUF_SIZ 1024 struct buffer *log_start(); void log_end(struct buffer *buf); int buf_uint32(struct buffer *buf, uint32_t ui); int buf_uint16(struct buffer *buf, uint16_t ui); int buf_string(struct buffer *buf, const char *str); int buf_unsafestring(struct buffer *buf, const char *str); int buf_char(struct buffer *buf, const char c); void log_kv_uint32(struct buffer *buf, const char *key, uint32_t val); void log_kv_string(struct buffer *buf, const char *key, const char *val); void log_kv_unsafestring(struct buffer *buf, const char *key, const char *val); // end .h struct buffer *log_start() { struct buffer *b; b = malloc(sizeof(struct buffer)); if (b) { b->buf = malloc(DEFAULT_BUF_SIZ); b->siz = DEFAULT_BUF_SIZ; b->pos = (BINARY_FORMAT ? 2 : 0); /* binary: prefix length using uint16_t */ memset(b->buf, 0, b->siz); } return (b); } void log_end(struct buffer *buf) { FILE *fp; uint16_t tmppos; if (BINARY_FORMAT) { /* tmppos = buf->pos; buf->pos = 0; buf_uint16(buf, tmppos); buf->pos = tmppos; */ tmppos = htons(buf->pos); memcpy(buf->buf, &tmppos, 2); fp = popen("xxd", "w"); if (!fp) { perror("popen"); } else { fwrite(buf->buf, buf->pos, 1, fp); fclose(fp); } } else { printf("%s\n", (char *) buf->buf); } free(buf->buf); free(buf); } int buf_resize(struct buffer *buf, size_t siz) { size_t newsiz = siz > 2 * buf->siz ? siz : 2 * buf->siz; void *newbuf; newbuf = realloc(buf->buf, newsiz); if (!newbuf) return (-1); buf->buf = newbuf; buf->siz = newsiz; return (0); } int buf_vsprintf(struct buffer *buf, const char *format, va_list ap) { int rc; va_list bak; va_copy(bak, ap); rc = vsnprintf(buf->buf + buf->pos, buf->siz - buf->pos, format, ap); if (rc > buf->siz - buf->pos) { rc = buf_resize(buf, buf->siz + rc + 1); if (rc >= 0) { rc = vsnprintf(buf->buf + buf->pos, buf->siz - buf->pos, format, bak); } } if (rc > 0) buf->pos += rc; va_end(bak); return (rc); } int buf_sprintf(struct buffer *buf, const char *format, ...) { int rc; va_list ap; va_start(ap, format); rc = buf_vsprintf(buf, format, ap); va_end(ap); return (rc); } int buf_type(struct buffer *buf, enum encoded_type_t type) { if (buf->pos == buf->siz && buf_resize(buf, buf->siz + 1)) return (-1); /* assert type < uint8_max (256) */ memcpy(buf->buf + buf->pos++, &type, 1); return (0); } int buf_uint32(struct buffer *buf, uint32_t ui) { int rc = 0; ui = htonl(ui); if (BINARY_FORMAT) { buf_type(buf, TYPE_UINT32); if (buf->siz - buf->pos < sizeof(ui) && buf_resize(buf, buf->siz + sizeof(ui))) return (-1); memcpy(buf->buf + buf->pos, &ui, sizeof(ui)); buf->pos += sizeof(ui); } else { rc = buf_sprintf(buf, "%u", ui); } return (rc); } int buf_uint16(struct buffer *buf, uint16_t ui) { int rc = 0; ui = htons(ui); if (BINARY_FORMAT) { buf_type(buf, TYPE_UINT16); if (buf->siz - buf->pos < sizeof(ui) && buf_resize(buf, buf->siz + sizeof(ui))) return (-1); memcpy(buf->buf + buf->pos, &ui, sizeof(ui)); buf->pos += sizeof(ui); } else { rc = buf_sprintf(buf, "%hu", ui); } return (rc); } int buf_string(struct buffer *buf, const char *str) { int rc = 0; size_t len = strlen(str); uint16_t len16; if (BINARY_FORMAT) { buf_type(buf, TYPE_STRING); if (len > 0x0ffff) return (-1); len16 = len & 0x0ffff; /* +2 for the encoded length of the string */ if (len + 2 > buf->siz - buf->pos && buf_resize(buf, buf->siz + len + 2)) return (-1); len16 = htons(len16); memcpy(buf->buf + buf->pos, &len16, sizeof(len16)); memcpy(buf->buf + buf->pos + 2, str, len); buf->pos += (len + 2); } else { rc = buf_sprintf(buf, "%s", str); } return (rc); } int buf_unsafestring(struct buffer *buf, const char *str) { char ch = 0; char *temp; size_t i, pos, len = strlen(str); static char hex[16] = "0123456789ABCDEF"; static char encode[14] = { '0', // \0 0, 0, 0, 0, 0, 0, // 1-6 'a', 'b', 't', 'n', 'v', 'f', 'r' // 7-13 // BEL, BS, HT, LF, VT, FF, CR }; temp = malloc(3 * len + 1); if (!temp) return (-1); for(i = pos = 0; i < len; i++, pos++) { if (isprint(str[i]) && str[i] != '\'' && str[i] != '\\') { temp[pos] = str[i]; } else { temp[pos++] = '\\'; if (str[i] < 14) ch = encode[str[i]]; else if (str[i] == '\'') ch = '\''; else if (str[i] == '\\') ch = '\\'; if (ch) { temp[pos] = ch; } else { temp[pos++] = hex[(str[i] & 0xf0) >> 4]; temp[pos] = hex[str[i] & 0x0f]; } } } temp[pos] = '\0'; buf_string(buf, temp); free(temp); return (0); } int buf_char(struct buffer *buf, const char c) { if (buf->pos == buf->siz && buf_resize(buf, buf->siz + 1)) return (-1); memcpy(buf->buf + buf->pos++, &c, 1); return (0); } void log_kv_uint32(struct buffer *buf, const char *key, uint32_t val) { if (BINARY_FORMAT) { buf_type(buf, TYPE_KV); buf_string(buf, key); buf_uint32(buf, val); } else { buf_char(buf, ' '); buf_string(buf, key); buf_char(buf, '='); buf_uint32(buf, val); } } void log_kv_string(struct buffer *buf, const char *key, const char *val) { if (BINARY_FORMAT) { buf_type(buf, TYPE_KV); buf_string(buf, key); buf_string(buf, val); } else { /* check val for illegal characters? */ buf_char(buf, ' '); buf_string(buf, key); buf_char(buf, '='); buf_char(buf, '\''); buf_string(buf, val); buf_char(buf, '\''); } } void log_kv_unsafestring(struct buffer *buf, const char *key, const char *val) { if (BINARY_FORMAT) { buf_type(buf, TYPE_KV); buf_string(buf, key); buf_string(buf, val); } else { buf_char(buf, ' '); buf_string(buf, key); buf_char(buf, '='); buf_char(buf, '\''); buf_unsafestring(buf, val); buf_char(buf, '\''); } } int main(int argc, char *argv[]) { BINARY_FORMAT = argc % 2; struct buffer *buf = log_start(); log_kv_string(buf, "comm", "/usr/bin/foo"); log_kv_unsafestring(buf, "args", "--do-this how? --right-now --or-else 'the end of the world'"); log_kv_unsafestring(buf, "msg", "foo=bar cmd=deltree C:\\ user_said='where's my \x99\f\a\a stuff gone?\n'"); log_end(buf); return (0); } --------------020108010609050205000200 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline --------------020108010609050205000200--