Import my code to do reasonably fast tdbpack/unpack from Python

author Martin Pool <mbp@samba.org>

Mon, 9 Sep 2002 06:30:48 +0000 (06:30 +0000)

committer Martin Pool <mbp@samba.org>

Mon, 9 Sep 2002 06:30:48 +0000 (06:30 +0000)
author Martin Pool <mbp@samba.org>
Mon, 9 Sep 2002 06:30:48 +0000 (06:30 +0000)
committer Martin Pool <mbp@samba.org>
Mon, 9 Sep 2002 06:30:48 +0000 (06:30 +0000)
diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c

new file mode 100644 (file)

index 0000000..e504494
--- /dev/null
+++ b/source3/python/py_tdbpack.c
@@ -0,0 +1,662 @@
+/* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
+        
+   Python wrapper for Samba tdb pack/unpack functions
+   Copyright (C) Martin Pool 2002
+
+
+   NOTE PYTHON STYLE GUIDE
+   http://www.python.org/peps/pep-0007.html
+   
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+
+#include "Python.h"
+
+static int pytdbpack_calc_reqd_len(char *format_str,
+                                  PyObject *val_seq);
+
+static PyObject *pytdbpack_unpack_item(char,
+                                     char **pbuf,
+                                     int *plen);
+static int
+pytdbpack_calc_item_len(char format_ch,
+                       PyObject *val_obj);
+
+static PyObject *pytdbpack_pack_data(const char *format_str,
+                                    PyObject *val_seq,
+                                    unsigned char *buf);
+
+
+       
+static const char * pytdbpack_docstring =
+"Convert between Python values and Samba binary encodings.
+
+This module is conceptually similar to the standard 'struct' module, but it
+uses both a different binary format and a different description string.
+
+Samba's encoding is based on that used inside DCE-RPC and SMB: a
+little-endian, unpadded, non-self-describing binary format.  It is intended
+that these functions be as similar as possible to the routines in Samba's
+tdb/tdbutil module, with appropriate adjustments for Python datatypes.
+
+Python strings are used to specify the format of data to be packed or
+unpacked.
+
+Strings in TDBs are typically stored in DOS codepages.  The caller of this
+module must make appropriate translations if necessary, typically to and from
+Unicode objects.
+
+tdbpack format strings:
+
+    'f':  NULL-terminated string in DOS codepage
+
+    'P':  same as 'f'
+
+    'd':  4 byte little-endian number
+
+    'w':  2 byte little-endian number
+
+    'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
+          really just an \"exists\" or \"does not exist\" flag.  The boolean
+          value of the Python object is used.
+    
+    'B': 4-byte LE length, followed by that many bytes of binary data.
+         Corresponds to a Python byte string of the appropriate length.
+
+    '$': Special flag indicating that the preceding format code should be
+         repeated while data remains.  This is only supported for unpacking.
+
+    Every code corresponds to a single Python object, except 'B' which
+    corresponds to two values (length and contents), and '$', which produces
+    however many make sense.
+";
+
+
+static char const pytdbpack_pack_doc[] = 
+"pack(format, values) -> buffer
+Pack Python objects into Samba binary format according to format string.
+
+arguments:
+    format -- string of tdbpack format characters
+    values -- sequence of value objects corresponding 1:1 to format characters
+
+returns:
+    buffer -- string containing packed data
+
+raises:
+    IndexError -- if there are not the same number of format codes as of
+        values
+    ValueError -- if any of the format characters is illegal
+    TypeError  -- if the format is not a string, or values is not a sequence,
+        or any of the values is of the wrong type for the corresponding
+        format character
+";
+
+
+static char const pytdbpack_unpack_doc[] =
+"unpack(format, buffer) -> (values, rest)
+Unpack Samba binary data according to format string.
+
+arguments:
+    format -- string of tdbpack characters
+    buffer -- string of packed binary data
+
+returns:
+    2-tuple of:
+        values -- sequence of values corresponding 1:1 to format characters
+        rest -- string containing data that was not decoded, or '' if the
+            whole string was consumed
+
+raises:
+    IndexError -- if there is insufficient data in the buffer for the
+        format (or if the data is corrupt and contains a variable-length
+        field extending past the end)
+    ValueError -- if any of the format characters is illegal
+
+notes:
+    Because unconsumed data is returned, you can feed it back in to the
+    unpacker to extract further fields.  Alternatively, if you wish to modify
+    some fields near the start of the data, you may be able to save time by
+    only unpacking and repacking the necessary part.
+";
+
+
+
+/*
+  Game plan is to first of all walk through the arguments and calculate the
+  total length that will be required.  We allocate a Python string of that
+  size, then walk through again and fill it in.
+
+  We just borrow references to all the passed arguments, since none of them
+  need to be permanently stored.  We transfer ownership to the returned
+  object.
+ */    
+static PyObject *
+pytdbpack_pack(PyObject *self,
+              PyObject *args)
+{
+       char *format_str;
+       PyObject *val_seq, *fast_seq, *buf_str;
+       int reqd_len;
+       char *packed_buf;
+
+       /* TODO: Test passing wrong types or too many arguments */
+       if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
+               return NULL;
+
+       /* Convert into a list or tuple (if not already one), so that we can
+        * index more easily. */
+       fast_seq = PySequence_Fast(val_seq,
+                                  __FUNCTION__ ": argument 2 must be sequence");
+       if (!fast_seq)
+               return NULL;
+                       
+       reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
+       if (reqd_len == -1)     /* exception was thrown */
+               return NULL;
+
+       /* Allocate space.
+        
+          This design causes an unnecessary copying of the data when Python
+          constructs an object, and that might possibly be avoided by using a
+          Buffer object of some kind instead.  I'm not doing that for now
+          though.  */
+       packed_buf = malloc(reqd_len);
+       if (!packed_buf) {
+               PyErr_Format(PyExc_MemoryError,
+                            "%s: couldn't allocate %d bytes for packed buffer",
+                            __FUNCTION__, reqd_len);
+               return NULL;
+       }       
+       
+       if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
+               free(packed_buf);
+               return NULL;
+       }
+
+       buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
+       free(packed_buf);       /* get rid of tmp buf */
+       
+       return buf_str;
+}
+
+
+
+static PyObject *
+pytdbpack_unpack(PyObject *self,
+                PyObject *args)
+{
+       char *format_str, *packed_str, *ppacked;
+       PyObject *val_list = NULL, *ret_tuple = NULL;
+       PyObject *rest_string = NULL;
+       int format_len, packed_len;
+       int i;
+       char last_format = '#';
+       
+       /* get arguments */
+       if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
+               return NULL;
+
+       format_len = strlen(format_str);
+       
+       /* allocate list to hold results */
+       val_list = PyList_New(format_len);
+       if (!val_list)
+               goto failed;
+       ret_tuple = PyTuple_New(2);
+       if (!ret_tuple)
+               goto failed;
+       
+       /* For every object, unpack.  */
+       for (ppacked = packed_str, i = 0; i < format_len; i++) {
+               PyObject *val_obj;
+               char format;
+
+               format = format_str[i];
+               if (format == '$') {
+                       if (i == 0) {
+                               PyErr_Format(PyExc_ValueError,
+                                            "%s: '$' may not be first character in format",
+                                            __FUNCTION__);
+                               goto failed;
+                       }
+                       else {
+                               format = last_format; /* repeat */
+                       }
+               }
+
+               val_obj = pytdbpack_unpack_item(format,
+                                               &ppacked,
+                                               &packed_len);
+               if (!val_obj)
+                       goto failed;
+
+               PyList_SET_ITEM(val_list, i, val_obj);
+               last_format = format;
+       }
+
+       /* put leftovers in box for lunch tomorrow */
+       rest_string = PyString_FromStringAndSize(ppacked, packed_len);
+       if (!rest_string)
+               goto failed;
+
+       /* return (values, rest) tuple; give up references to them */
+       PyTuple_SET_ITEM(ret_tuple, 0, val_list);
+       val_list = NULL;
+       PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
+       val_list = NULL;
+       return ret_tuple;
+
+  failed:
+       /* handle failure: deallocate anything */
+       Py_XDECREF(val_list);
+       Py_XDECREF(ret_tuple);
+       Py_XDECREF(rest_string);
+       return NULL;
+}
+
+
+/*
+  Internal routine that calculates how many bytes will be required to
+  encode the values in the format.
+
+  Also checks that the value list is the right size for the format list.
+
+  Returns number of bytes (may be 0), or -1 if there's something wrong, in
+  which case a Python exception has been raised.
+
+  Arguments:
+
+    val_seq: a Fast Sequence (list or tuple), being all the values
+*/
+static int
+pytdbpack_calc_reqd_len(char *format_str,
+                       PyObject *val_seq)
+{
+       int len = 0;
+       char *p;
+       int val_i;
+       int val_len;
+
+       val_len = PySequence_Fast_GET_SIZE(val_seq);
+
+       for (p = format_str, val_i = 0; *p; p++, val_i++) {
+               char ch = *p;
+               PyObject *val_obj;
+               int item_len;
+
+               if (val_i >= val_len) {
+                       PyErr_Format(PyExc_IndexError,
+                                    "samba.tdbpack.pack: value list is too short for format string");
+                       return -1;
+               }
+
+               /* borrow a reference to the item */
+               val_obj = PySequence_Fast_GET_ITEM(val_seq, val_i);
+               if (!val_obj)
+                       return -1;
+
+               item_len = pytdbpack_calc_item_len(ch, val_obj);
+               if (item_len == -1)
+                       return -1;
+               else
+                       len += item_len;
+       }
+
+       if (val_i != val_len) {
+               PyErr_Format(PyExc_IndexError,
+                            "%s: value list is wrong length for format string",
+                            __FUNCTION__);
+               return -1;
+       }
+
+       return len;
+}
+
+
+/*
+  Calculate the number of bytes required to pack a single value.
+*/
+static int
+pytdbpack_calc_item_len(char ch,
+                       PyObject *val_obj)
+{
+       if (ch == 'd' || ch == 'w') {
+               if (!PyInt_Check(val_obj)) {
+                       PyErr_Format(PyExc_TypeError,
+                                    "tdbpack: format '%c' requires an Int",
+                                    ch);
+                       return -1;
+               }
+               if (ch == 'w')
+                       return 2;
+               else 
+                       return 4;
+       } else if (ch == 'p') {
+               return 4;
+       }
+       else if (ch == 'f' || ch == 'P' || ch == 'B') {
+               /* nul-terminated 8-bit string */
+               if (!PyString_Check(val_obj)) {
+                       PyErr_Format(PyExc_TypeError,
+                                    "tdbpack: format '%c' requires a String",
+                                    ch);
+                       return -1;
+               }
+               
+               if (ch == 'B') {
+                       /* byte buffer; just use Python string's length, plus
+                          a preceding word */
+                       return 4 + PyString_GET_SIZE(val_obj);
+               }
+               else {
+                       /* one nul character */
+                       return 1 + PyString_GET_SIZE(val_obj);
+               }               
+       }
+       else {  
+               PyErr_Format(PyExc_ValueError,
+                            __FUNCTION__ ": format character '%c' is not supported",
+                            ch);
+               
+               return -1;
+       }
+}
+
+
+/*
+  XXX: glib and Samba have quicker macro for doing the endianness conversions,
+  but I don't know of one in plain libc, and it's probably not a big deal.  I
+  realize this is kind of dumb because we'll almost always be on x86, but
+  being safe is important.
+*/
+static void pack_int32(unsigned long val_long, unsigned char **pbuf)
+{
+       (*pbuf)[0] =         val_long & 0xff;
+       (*pbuf)[1] = (val_long >> 8)  & 0xff;
+       (*pbuf)[2] = (val_long >> 16) & 0xff;
+       (*pbuf)[3] = (val_long >> 24) & 0xff;
+       (*pbuf) += 4;
+}
+
+
+static void pack_bytes(long len, const char *from,
+                      unsigned char **pbuf)
+{
+       memcpy(*pbuf, from, len);
+       (*pbuf) += len;
+}
+
+
+static void
+unpack_err_too_short(void)
+{
+       PyErr_Format(PyExc_IndexError,
+                    __FUNCTION__ ": data too short for unpack format");
+}
+
+
+static PyObject *
+unpack_int32(char **pbuf, int *plen)
+{
+       long v;
+       unsigned char *b;
+       
+       if (*plen < 4) {
+               unpack_err_too_short();
+               return NULL;
+       }
+
+       b = *pbuf;
+       v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+       
+       (*pbuf) += 4;
+       (*plen) -= 4;
+
+       return PyInt_FromLong(v);
+}
+
+
+static PyObject *unpack_int16(char **pbuf, int *plen)
+{
+       long v;
+       unsigned char *b;
+       
+       if (*plen < 2) {
+               unpack_err_too_short();
+               return NULL;
+       }
+
+       b = *pbuf;
+       v = b[0] | b[1]<<8;
+       
+       (*pbuf) += 2;
+       (*plen) -= 2;
+
+       return PyInt_FromLong(v);
+}
+
+
+static PyObject *
+unpack_string(char **pbuf, int *plen)
+{
+       int len;
+       char *nul_ptr, *start;
+
+       start = *pbuf;
+       
+       nul_ptr = memchr(start, '\0', *plen);
+       if (!nul_ptr) {
+               unpack_err_too_short();
+               return NULL;
+       }
+
+       len = nul_ptr - start;
+
+       *pbuf += len + 1;       /* skip \0 */
+       *plen -= len + 1;
+
+       return PyString_FromStringAndSize(start, len);
+}
+
+
+static PyObject *
+unpack_buffer(char **pbuf, int *plen)
+{
+       /* first get 32-bit len */
+       long slen;
+       unsigned char *b;
+       unsigned char *start;
+       
+       if (*plen < 4) {
+               unpack_err_too_short();
+               return NULL;
+       }
+       
+       b = *pbuf;
+       slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+
+       if (slen < 0) { /* surely you jest */
+               PyErr_Format(PyExc_ValueError,
+                            __FUNCTION__ ": buffer seems to have negative length");
+               return NULL;
+       }
+
+       (*pbuf) += 4;
+       (*plen) -= 4;
+       start = *pbuf;
+
+       if (*plen < slen) {
+               PyErr_Format(PyExc_IndexError,
+                            __FUNCTION__ ": not enough data to unpack buffer: "
+                            "need %d bytes, have %d",
+                            (int) slen, *plen);
+               return NULL;
+       }
+
+       (*pbuf) += slen;
+       (*plen) -= slen;
+
+       return PyString_FromStringAndSize(start, slen);
+}
+
+
+/* Unpack a single field from packed data, according to format character CH.
+   Remaining data is at *PBUF, of *PLEN.
+
+   *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
+   been consumed.
+
+   Returns a reference to the unpacked Python object, or NULL for failure.
+*/
+static PyObject *pytdbpack_unpack_item(char ch,
+                                      char **pbuf,
+                                      int *plen)
+{
+       if (ch == 'w') {        /* 16-bit int */
+               return unpack_int16(pbuf, plen);
+       }
+       else if (ch == 'd' || ch == 'p') { /* 32-bit int */
+               /* pointers can just come through as integers */
+               return unpack_int32(pbuf, plen);
+       }
+       else if (ch == 'f' || ch == 'P') { /* nul-term string  */
+               return unpack_string(pbuf, plen);
+       }
+       else if (ch == 'B') { /* length, buffer */
+               return unpack_buffer(pbuf, plen);
+       }
+       else {
+               PyErr_Format(PyExc_ValueError,
+                            __FUNCTION__ ": format character '%c' is not supported",
+                            ch);
+               
+               return NULL;
+       }
+}
+
+
+
+/*
+  Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
+  and advance the pointer.  Buffer length has been pre-calculated so we are
+  sure that there is enough space.
+
+*/
+static PyObject *
+pytdbpack_pack_item(char ch,
+                   PyObject *val_obj,
+                   unsigned char **pbuf)
+{
+       if (ch == 'w') {
+               unsigned long val_long = PyInt_AsLong(val_obj);
+               (*pbuf)[0] = val_long & 0xff;
+               (*pbuf)[1] = (val_long >> 8) & 0xff;
+               (*pbuf) += 2;
+       }
+       else if (ch == 'd') {
+               /* 4-byte LE number */
+               pack_int32(PyInt_AsLong(val_obj), pbuf);
+       }
+       else if (ch == 'p') {
+               /* "Pointer" value -- in the subset of DCERPC used by Samba,
+                  this is really just an "exists" or "does not exist"
+                  flag. */
+               pack_int32(PyObject_IsTrue(val_obj), pbuf);
+       }
+       else if (ch == 'f' || ch == 'P') {
+               int size;
+               char *sval;
+
+               size = PyString_GET_SIZE(val_obj);
+               sval = PyString_AS_STRING(val_obj);
+               pack_bytes(size+1, sval, pbuf); /* include nul */
+       }
+       else if (ch == 'B') {
+               int size;
+               char *sval;
+
+               size = PyString_GET_SIZE(val_obj);
+               pack_int32(size, pbuf);
+               sval = PyString_AS_STRING(val_obj);
+               pack_bytes(size, sval, pbuf); /* do not include nul */
+       }
+       else {
+               /* this ought to be caught while calculating the length, but
+                  just in case. */
+               PyErr_Format(PyExc_ValueError,
+                            "%s: format character '%c' is not supported",
+                            __FUNCTION__, ch);
+               
+               return NULL;
+       }
+               
+       return Py_None;
+}
+
+
+/*
+  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+  PACKED_BUF.
+
+  The string has already been checked out, so we know that VAL_SEQ is large
+  enough to hold the packed data, and that there are enough value items.
+  (However, their types may not have been thoroughly checked yet.)
+
+  In addition, val_seq is a Python Fast sequence.
+
+  Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_pack_data(const char *format_str,
+                   PyObject *val_seq,
+                   unsigned char *packed_buf)
+{
+       int i;
+
+       for (i = 0; format_str[i]; i++) {
+               char ch = format_str[i];
+               PyObject *val_obj;
+
+               /* borrow a reference to the item */
+               val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
+               if (!val_obj)
+                       return NULL;
+
+               if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
+                       return NULL;
+       }
+
+       return Py_None;
+}
+
+
+
+
+
+static PyMethodDef pytdbpack_methods[] = {
+       { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
+       { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
+};
+
+DL_EXPORT(void)
+inittdbpack(void)
+{
+       Py_InitModule3("tdbpack", pytdbpack_methods,
+                      (char *) pytdbpack_docstring);
+}
author	Martin Pool <mbp@samba.org>
	Mon, 9 Sep 2002 06:30:48 +0000 (06:30 +0000)
committer	Martin Pool <mbp@samba.org>
	Mon, 9 Sep 2002 06:30:48 +0000 (06:30 +0000)