r23784: use the GPLv3 boilerplate as recommended by the FSF and the license text

[tprouty/samba.git] / source / python / py_tdbpack.c
diff --git a/source/python/py_tdbpack.c b/source/python/py_tdbpack.c

index 87cd804ed4ef14c1b457501f8cd85be063ae1cef..e504f30b863605a571fc8bccdf53bfbf89be003f 100644 (file)
--- a/source/python/py_tdbpack.c
+++ b/source/python/py_tdbpack.c
@@ -1,7 +1,7 @@
  /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
          
     Python wrapper for Samba tdb pack/unpack functions
-   Copyright (C) Martin Pool 2002
+   Copyright (C) Martin Pool 2002, 2003
  
  
     NOTE PYTHON STYLE GUIDE
@@ -10,7 +10,7 @@
     
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
     (at your option) any later version.
     
     This program is distributed in the hope that it will be useful,
@@ -19,24 +19,45 @@
     GNU General Public License for more details.
     
     You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
  
+#include "Python.h"
  
+/* This symbol is used in both config.h and Python.h which causes an
+   annoying compiler warning. */
  
-#include "Python.h"
+#ifdef HAVE_FSTAT
+#undef HAVE_FSTAT
+#endif
+
+/* This module is supposed to be standalone, however for portability
+   it would be good to use the FUNCTION_MACRO preprocessor define. */
+
+#include "include/config.h"
  
-static int pytdbpack_calc_reqd_len(char *format_str,
-                                  PyObject *val_seq);
+#ifdef HAVE_FUNCTION_MACRO
+#define FUNCTION_MACRO  (__FUNCTION__)
+#else
+#define FUNCTION_MACRO  (__FILE__)
+#endif
  
-static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
+static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_str(char ch,
+                               PyObject *val_iter, PyObject *packed_list,
+                               const char *encoding);
+static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
  
-static PyObject *pytdbpack_pack_data(const char *format_str,
+static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
+
+static PyObject *pytdbpack_data(const char *format_str,
                                      PyObject *val_seq,
-                                    unsigned char *buf);
+                                    PyObject *val_list);
  
+static PyObject *
+pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
  
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
  
  
  static PyObject *pytdbpack_bad_type(char ch,
@@ -44,165 +65,393 @@ static PyObject *pytdbpack_bad_type(char ch,
                                     PyObject *val_obj);
  
  static const char * pytdbpack_docstring =
-"Convert between Python values and Samba binary encodings.
+"Convert between Python values and Samba binary encodings.\n"
+"\n"
+"This module is conceptually similar to the standard 'struct' module, but it\n"
+"uses both a different binary format and a different description string.\n"
+"\n"
+"Samba's encoding is based on that used inside DCE-RPC and SMB: a\n"
+"little-endian, unpadded, non-self-describing binary format.  It is intended\n"
+"that these functions be as similar as possible to the routines in Samba's\n"
+"tdb/tdbutil module, with appropriate adjustments for Python datatypes.\n"
+"\n"
+"Python strings are used to specify the format of data to be packed or\n"
+"unpacked.\n"
+"\n"
+"String encodings are implied by the database format: they may be either DOS\n"
+"codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded\n"
+"to be the same as the default Python encoding).\n"
+"\n"
+"tdbpack format strings:\n"
+"\n"
+"    'f': NUL-terminated string in codepage iso8859-1\n"
+"   \n"
+"    'P': same as 'f'\n"
+"\n"
+"    'F': NUL-terminated string in iso-8859-1\n"
+"\n"
+"    'd':  4 byte little-endian unsigned number\n"
+"\n"
+"    'w':  2 byte little-endian unsigned number\n"
+"\n"
+"    'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is\n"
+"          really just an \"exists\" or \"does not exist\" flag.  The boolean\n"
+"          value of the Python object is used.\n"
+"    \n"
+"    'B': 4-byte LE length, followed by that many bytes of binary data.\n"
+"         Corresponds to a Python integer giving the length, followed by a byte\n"
+"         string of the appropriate length.\n"
+"\n"
+"    '$': Special flag indicating that the preceding format code should be\n"
+"         repeated while data remains.  This is only supported for unpacking.\n"
+"\n"
+"    Every code corresponds to a single Python object, except 'B' which\n"
+"    corresponds to two values (length and contents), and '$', which produces\n"
+"    however many make sense.\n";
+
+static char const pytdbpack_doc[] = 
+"pack(format, values) -> buffer\n"
+"Pack Python objects into Samba binary format according to format string.\n"
+"\n"
+"arguments:\n"
+"    format -- string of tdbpack format characters\n"
+"    values -- sequence of value objects corresponding 1:1 to format characters\n"
+"\n"
+"returns:\n"
+"    buffer -- string containing packed data\n"
+"\n"
+"raises:\n"
+"    IndexError -- if there are too few values for the format\n"
+"    ValueError -- if any of the format characters is illegal\n"
+"    TypeError  -- if the format is not a string, or values is not a sequence,\n"
+"        or any of the values is of the wrong type for the corresponding\n"
+"        format character\n"
+"\n"
+"notes:\n"
+"    For historical reasons, it is not an error to pass more values than are consumed\n"
+"    by the format.\n";
+
+
+static char const pytdbunpack_doc[] =
+"unpack(format, buffer) -> (values, rest)\n"
+"Unpack Samba binary data according to format string.\n"
+"\n"
+"arguments:\n"
+"    format -- string of tdbpack characters\n"
+"    buffer -- string of packed binary data\n"
+"\n"
+"returns:\n"
+"    2-tuple of:\n"
+"        values -- sequence of values corresponding 1:1 to format characters\n"
+"        rest -- string containing data that was not decoded, or '' if the\n"
+"            whole string was consumed\n"
+"\n"
+"raises:\n"
+"    IndexError -- if there is insufficient data in the buffer for the\n"
+"        format (or if the data is corrupt and contains a variable-length\n"
+"        field extending past the end)\n"
+"    ValueError -- if any of the format characters is illegal\n"
+"\n"
+"notes:\n"
+"    Because unconsumed data is returned, you can feed it back in to the\n"
+"    unpacker to extract further fields.  Alternatively, if you wish to modify\n"
+"    some fields near the start of the data, you may be able to save time by\n"
+"    only unpacking and repacking the necessary part.\n";
+
+
+const char *pytdb_dos_encoding = "cp850";
+
+/* NULL, meaning that the Samba default encoding *must* be the same as the
+   Python default encoding. */
+const char *pytdb_unix_encoding = NULL;
  
-This module is conceptually similar to the standard 'struct' module, but it
-uses both a different binary format and a different description string.
  
-Samba's encoding is based on that used inside DCE-RPC and SMB: a
-little-endian, unpadded, non-self-describing binary format.  It is intended
-that these functions be as similar as possible to the routines in Samba's
-tdb/tdbutil module, with appropriate adjustments for Python datatypes.
+/*
+  * Pack objects to bytes.
+  *
+  * All objects are first individually encoded onto a list, and then the list
+  * of strings is concatenated.  This is faster than concatenating strings,
+  * and reasonably simple to code.
+  */
+static PyObject *
+pytdbpack(PyObject *self,
+              PyObject *args)
+{
+       char *format_str;
+       PyObject *val_seq, *val_iter = NULL,
+               *packed_list = NULL, *packed_str = NULL,
+               *empty_str = NULL;
  
-Python strings are used to specify the format of data to be packed or
-unpacked.
+       /* TODO: Test passing wrong types or too many arguments */
+       if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
+               return NULL;
  
-Strings in TDBs are typically stored in DOS codepages.  The caller of this
-module must make appropriate translations if necessary, typically to and from
-Unicode objects.
+       if (!(val_iter = PyObject_GetIter(val_seq)))
+               goto out;
  
-tdbpack format strings:
+       /* Create list to hold strings until we're done, then join them all. */
+       if (!(packed_list = PyList_New(0)))
+               goto out;
  
-    'f':  NULL-terminated string in DOS codepage
+       if (!pytdbpack_data(format_str, val_iter, packed_list))
+               goto out;
  
-    'P':  same as 'f'
+       /* this function is not officially documented but it works */
+       if (!(empty_str = PyString_InternFromString("")))
+               goto out;
+       
+       packed_str = _PyString_Join(empty_str, packed_list);
  
-    'd':  4 byte little-endian unsigned number
+  out:
+       Py_XDECREF(empty_str);
+       Py_XDECREF(val_iter);
+       Py_XDECREF(packed_list);
  
-    'w':  2 byte little-endian unsigned number
+       return packed_str;
+}
  
-    'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
-          really just an \"exists\" or \"does not exist\" flag.  The boolean
-          value of the Python object is used.
-    
-    'B': 4-byte LE length, followed by that many bytes of binary data.
-         Corresponds to a Python integer giving the length, followed by a byte
-         string of the appropriate length.
  
-    '$': Special flag indicating that the preceding format code should be
-         repeated while data remains.  This is only supported for unpacking.
+/*
+  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+  PACKED_BUF.
  
-    Every code corresponds to a single Python object, except 'B' which
-    corresponds to two values (length and contents), and '$', which produces
-    however many make sense.
-";
+  The string has already been checked out, so we know that VAL_SEQ is large
+  enough to hold the packed data, and that there are enough value items.
+  (However, their types may not have been thoroughly checked yet.)
  
+  In addition, val_seq is a Python Fast sequence.
  
-static char const pytdbpack_pack_doc[] = 
-"pack(format, values) -> buffer
-Pack Python objects into Samba binary format according to format string.
+  Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_data(const char *format_str,
+                   PyObject *val_iter,
+                   PyObject *packed_list)
+{
+       int format_i, val_i = 0;
+
+       for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
+               char ch = format_str[format_i];
  
-arguments:
-    format -- string of tdbpack format characters
-    values -- sequence of value objects corresponding 1:1 to format characters
+               switch (ch) {
+                       /* dispatch to the appropriate packer for this type,
+                          which should pull things off the iterator, and
+                          append them to the packed_list */
+               case 'w':
+               case 'd':
+               case 'p':
+                       if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
+                               return NULL;
+                       break;
+
+               case 'f':
+               case 'P':
+                       if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
+                               return NULL;
+                       break;
+
+               case 'B':
+                       if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
+                               return NULL;
+                       break;
+
+               default:
+                       PyErr_Format(PyExc_ValueError,
+                                    "%s: format character '%c' is not supported",
+                                    FUNCTION_MACRO, ch);
+                       return NULL;
+               }
+       }
  
-returns:
-    buffer -- string containing packed data
+       return packed_list;
+}
  
-raises:
-    IndexError -- if there are too few values for the format
-    ValueError -- if any of the format characters is illegal
-    TypeError  -- if the format is not a string, or values is not a sequence,
-        or any of the values is of the wrong type for the corresponding
-        format character
  
-notes:
-    For historical reasons, it is not an error to pass more values than are consumed
-    by the format.
-";
+static PyObject *
+pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
+{
+       unsigned long val_long;
+       PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
+       PyObject *new_list = NULL;
+       unsigned char pack_buf[4];
  
+       if (!(val_obj = PyIter_Next(val_iter)))
+               goto out;
  
-static char const pytdbpack_unpack_doc[] =
-"unpack(format, buffer) -> (values, rest)
-Unpack Samba binary data according to format string.
+       if (!(long_obj = PyNumber_Long(val_obj))) {
+               pytdbpack_bad_type(ch, "Number", val_obj);
+               goto out;
+       }
  
-arguments:
-    format -- string of tdbpack characters
-    buffer -- string of packed binary data
+       val_long = PyLong_AsUnsignedLong(long_obj);
+       pack_le_uint32(val_long, pack_buf);
  
-returns:
-    2-tuple of:
-        values -- sequence of values corresponding 1:1 to format characters
-        rest -- string containing data that was not decoded, or '' if the
-            whole string was consumed
+       /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
+          the first two bytes. */
+       
+       if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
+               goto out;
  
-raises:
-    IndexError -- if there is insufficient data in the buffer for the
-        format (or if the data is corrupt and contains a variable-length
-        field extending past the end)
-    ValueError -- if any of the format characters is illegal
+       if (PyList_Append(packed_list, result_obj) != -1)
+               new_list = packed_list;
  
-notes:
-    Because unconsumed data is returned, you can feed it back in to the
-    unpacker to extract further fields.  Alternatively, if you wish to modify
-    some fields near the start of the data, you may be able to save time by
-    only unpacking and repacking the necessary part.
-";
+  out:
+       Py_XDECREF(val_obj);
+       Py_XDECREF(long_obj);
+       Py_XDECREF(result_obj);
  
+       return new_list;
+}
  
  
  /*
-  Game plan is to first of all walk through the arguments and calculate the
-  total length that will be required.  We allocate a Python string of that
-  size, then walk through again and fill it in.
-
-  We just borrow references to all the passed arguments, since none of them
-  need to be permanently stored.  We transfer ownership to the returned
-  object.
- */    
+ * Take one string from the iterator val_iter, convert it to 8-bit, and return
+ * it.
+ *
+ * If the input is neither a string nor Unicode, an exception is raised.
+ *
+ * If the input is Unicode, then it is converted to the appropriate encoding.
+ *
+ * If the input is a String, and encoding is not null, then it is converted to
+ * Unicode using the default decoding method, and then converted to the
+ * encoding.  If the encoding is NULL, then the string is written out as-is --
+ * this is used when the default Python encoding is the same as the Samba
+ * encoding.
+ *
+ * I hope this approach avoids being too fragile w.r.t. being passed either
+ * Unicode or String objects.
+ */
  static PyObject *
-pytdbpack_pack(PyObject *self,
-              PyObject *args)
+pytdbpack_str(char ch,
+             PyObject *val_iter, PyObject *packed_list, const char *encoding)
  {
-       char *format_str;
-       PyObject *val_seq, *fast_seq, *buf_str;
-       int reqd_len;
-       char *packed_buf;
+       PyObject *val_obj = NULL;
+       PyObject *unicode_obj = NULL;
+       PyObject *coded_str = NULL;
+       PyObject *nul_str = NULL;
+       PyObject *new_list = NULL;
+
+       if (!(val_obj = PyIter_Next(val_iter)))
+               goto out;
+
+       if (PyUnicode_Check(val_obj)) {
+               if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
+                       goto out;
+       }
+       else if (PyString_Check(val_obj) && !encoding) {
+               /* For efficiency, we assume that the Python interpreter has
+                  the same default string encoding as Samba's native string
+                  encoding.  On the PSA, both are always 8859-1. */
+               coded_str = val_obj;
+               Py_INCREF(coded_str);
+       }
+       else if (PyString_Check(val_obj)) {
+               /* String, but needs to be converted */
+               if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
+                       goto out;
+               if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
+                       goto out;
+       }
+       else {
+               pytdbpack_bad_type(ch, "String or Unicode", val_obj);
+               goto out;
+       }
  
-       /* TODO: Test passing wrong types or too many arguments */
-       if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
-               return NULL;
+       if (!nul_str)
+               /* this is constant and often-used; hold it forever */
+               if (!(nul_str = PyString_FromStringAndSize("", 1)))
+                       goto out;
  
-       /* Convert into a list or tuple (if not already one), so that we can
-        * index more easily. */
-       fast_seq = PySequence_Fast(val_seq,
-                                  __FUNCTION__ ": argument 2 must be sequence");
-       if (!fast_seq)
-               return NULL;
-                       
-       reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
-       if (reqd_len == -1)     /* exception was thrown */
+       if ((PyList_Append(packed_list, coded_str) != -1)
+           && (PyList_Append(packed_list, nul_str) != -1))
+               new_list = packed_list;
+
+  out:
+       Py_XDECREF(val_obj);
+       Py_XDECREF(unicode_obj);
+       Py_XDECREF(coded_str);
+
+       return new_list;
+}
+
+
+/*
+ * Pack (LENGTH, BUFFER) pair onto the list.
+ *
+ * The buffer must already be a String, not Unicode, because it contains 8-bit
+ * untranslated data.  In some cases it will actually be UTF_16_LE data.
+ */
+static PyObject *
+pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
+{
+       PyObject *val_obj;
+       PyObject *new_list = NULL;
+       
+       /* pull off integer and stick onto list */
+       if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
                 return NULL;
  
-       /* Allocate space.
-        
-          This design causes an unnecessary copying of the data when Python
-          constructs an object, and that might possibly be avoided by using a
-          Buffer object of some kind instead.  I'm not doing that for now
-          though.  */
-       packed_buf = malloc(reqd_len);
-       if (!packed_buf) {
-               PyErr_Format(PyExc_MemoryError,
-                            "%s: couldn't allocate %d bytes for packed buffer",
-                            __FUNCTION__, reqd_len);
+       /* this assumes that the string is the right length; the old code did
+          the same. */
+       if (!(val_obj = PyIter_Next(val_iter)))
                 return NULL;
-       }       
+
+       if (!PyString_Check(val_obj)) {
+               pytdbpack_bad_type('B', "String", val_obj);
+               goto out;
+       }
         
-       if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
-               free(packed_buf);
+       if (PyList_Append(packed_list, val_obj) != -1)
+               new_list = packed_list;
+
+  out:
+       Py_XDECREF(val_obj);
+       return new_list;
+}
+
+
+static PyObject *pytdbpack_bad_type(char ch,
+                                   const char *expected,
+                                   PyObject *val_obj)
+{
+       PyObject *r = PyObject_Repr(val_obj);
+       if (!r)
                 return NULL;
-       }
+       PyErr_Format(PyExc_TypeError,
+                    "tdbpack: format '%c' requires %s, not %s",
+                    ch, expected, PyString_AS_STRING(r));
+       Py_DECREF(r);
+       return val_obj;
+}
  
-       buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
-       free(packed_buf);       /* get rid of tmp buf */
-       
-       return buf_str;
+
+/*
+  XXX: glib and Samba have quicker macro for doing the endianness conversions,
+  but I don't know of one in plain libc, and it's probably not a big deal.  I
+  realize this is kind of dumb because we'll almost always be on x86, but
+  being safe is important.
+*/
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
+{
+       pbuf[0] =         val_long & 0xff;
+       pbuf[1] = (val_long >> 8)  & 0xff;
+       pbuf[2] = (val_long >> 16) & 0xff;
+       pbuf[3] = (val_long >> 24) & 0xff;
  }
  
  
+#if 0  /* not used */
+static void pack_bytes(long len, const char *from,
+                      unsigned char **pbuf)
+{
+       memcpy(*pbuf, from, len);
+       (*pbuf) += len;
+}
+#endif
+
  
  static PyObject *
-pytdbpack_unpack(PyObject *self,
+pytdbunpack(PyObject *self,
                  PyObject *args)
  {
         char *format_str, *packed_str, *ppacked;
@@ -231,7 +480,7 @@ pytdbpack_unpack(PyObject *self,
         for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
                 last_format = format_str[i];
                 /* packed_len is reduced in place */
-               if (!pytdbpack_unpack_item(format_str[i], &ppacked, &packed_len, val_list))
+               if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
                         goto failed;
         }
  
@@ -240,11 +489,11 @@ pytdbpack_unpack(PyObject *self,
                 if (i == 0) {
                         PyErr_Format(PyExc_ValueError,
                                      "%s: '$' may not be first character in format",
-                                    __FUNCTION__);
+                                    FUNCTION_MACRO);
                         return NULL;
                 } 
                 while (packed_len > 0)
-                       if (!pytdbpack_unpack_item(last_format, &ppacked, &packed_len, val_list))
+                       if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
                                 goto failed;
         }
         
@@ -270,154 +519,22 @@ pytdbpack_unpack(PyObject *self,
  }
  
  
-/*
-  Internal routine that calculates how many bytes will be required to
-  encode the values in the format.
-
-  Also checks that the value list is the right size for the format list.
-
-  Returns number of bytes (may be 0), or -1 if there's something wrong, in
-  which case a Python exception has been raised.
-
-  Arguments:
-
-    val_seq: a Fast Sequence (list or tuple), being all the values
-*/
-static int
-pytdbpack_calc_reqd_len(char *format_str,
-                       PyObject *val_seq)
-{
-       int len = 0;
-       char *p;
-       int val_i;
-       int val_len;
-
-       val_len = PySequence_Length(val_seq);
-       if (val_len == -1)
-               return -1;
-
-       for (p = format_str, val_i = 0; *p; p++, val_i++) {
-               char ch = *p;
-
-               if (val_i >= val_len) {
-                       PyErr_Format(PyExc_IndexError,
-                                    "%s: value list is too short for format string",
-                                    __FUNCTION__);
-                       return -1;
-               }
-
-               /* borrow a reference to the item */
-               if (ch == 'd' || ch == 'p') 
-                       len += 4;
-               else if (ch == 'w')
-                       len += 2;
-               else if (ch == 'f' || ch == 'P') {
-                       /* nul-terminated 8-bit string */
-                       int item_len;
-                       PyObject *str_obj;
-
-                       str_obj = PySequence_GetItem(val_seq, val_i);
-                       if (!str_obj)
-                               return -1;
-
-                       if (!PyString_Check(str_obj) || ((item_len = PyString_Size(str_obj)) == -1)) {
-                               pytdbpack_bad_type(ch, "String", str_obj);
-                               return -1;
-                       }
-                       
-                       len += 1 + item_len;
-               }
-               else if (ch == 'B') {
-                       /* length-preceded byte buffer: n bytes, plus a preceding
-                        * word */
-                       PyObject *len_obj;
-                       long len_val;
-
-                       len_obj = PySequence_GetItem(val_seq, val_i);
-                       val_i++; /* skip over buffer */
-
-                       if (!PyNumber_Check(len_obj)) {
-                               pytdbpack_bad_type(ch, "Number", len_obj);
-                               return -1;
-                       }
-
-                       len_val = PyInt_AsLong(len_obj);
-                       if (len_val < 0) {
-                               PyErr_Format(PyExc_ValueError,
-                                            "%s: format 'B' requires positive integer", __FUNCTION__);
-                               return -1;
-                       }
-
-                       len += 4 + len_val;
-               }
-               else {  
-                       PyErr_Format(PyExc_ValueError,
-                                    "%s: format character '%c' is not supported",
-                                    __FUNCTION__, ch);
-               
-                       return -1;
-               }
-       }
-
-       return len;
-}
-
-
-static PyObject *pytdbpack_bad_type(char ch,
-                                   const char *expected,
-                                   PyObject *val_obj)
-{
-       PyObject *r = PyObject_Repr(val_obj);
-       if (!r)
-               return NULL;
-       PyErr_Format(PyExc_TypeError,
-                    "tdbpack: format '%c' requires %s, not %s",
-                    ch, expected, PyString_AS_STRING(r));
-       Py_DECREF(r);
-       return val_obj;
-}
-
-
-/*
-  XXX: glib and Samba have quicker macro for doing the endianness conversions,
-  but I don't know of one in plain libc, and it's probably not a big deal.  I
-  realize this is kind of dumb because we'll almost always be on x86, but
-  being safe is important.
-*/
-static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
-{
-       (*pbuf)[0] =         val_long & 0xff;
-       (*pbuf)[1] = (val_long >> 8)  & 0xff;
-       (*pbuf)[2] = (val_long >> 16) & 0xff;
-       (*pbuf)[3] = (val_long >> 24) & 0xff;
-       (*pbuf) += 4;
-}
-
-
-static void pack_bytes(long len, const char *from,
-                      unsigned char **pbuf)
-{
-       memcpy(*pbuf, from, len);
-       (*pbuf) += len;
-}
-
-
  static void
-unpack_err_too_short(void)
+pytdbunpack_err_too_short(void)
  {
         PyErr_Format(PyExc_IndexError,
-                    __FUNCTION__ ": data too short for unpack format");
+                    "%s: data too short for unpack format", FUNCTION_MACRO);
  }
  
  
  static PyObject *
-unpack_uint32(char **pbuf, int *plen)
+pytdbunpack_uint32(char **pbuf, int *plen)
  {
         unsigned long v;
         unsigned char *b;
         
         if (*plen < 4) {
-               unpack_err_too_short();
+               pytdbunpack_err_too_short();
                 return NULL;
         }
  
@@ -431,13 +548,13 @@ unpack_uint32(char **pbuf, int *plen)
  }
  
  
-static PyObject *unpack_int16(char **pbuf, int *plen)
+static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
  {
         long v;
         unsigned char *b;
         
         if (*plen < 2) {
-               unpack_err_too_short();
+               pytdbunpack_err_too_short();
                 return NULL;
         }
  
@@ -452,7 +569,7 @@ static PyObject *unpack_int16(char **pbuf, int *plen)
  
  
  static PyObject *
-unpack_string(char **pbuf, int *plen)
+pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
  {
         int len;
         char *nul_ptr, *start;
@@ -461,7 +578,7 @@ unpack_string(char **pbuf, int *plen)
         
         nul_ptr = memchr(start, '\0', *plen);
         if (!nul_ptr) {
-               unpack_err_too_short();
+               pytdbunpack_err_too_short();
                 return NULL;
         }
  
@@ -470,12 +587,12 @@ unpack_string(char **pbuf, int *plen)
         *pbuf += len + 1;       /* skip \0 */
         *plen -= len + 1;
  
-       return PyString_FromStringAndSize(start, len);
+       return PyString_Decode(start, len, encoding, NULL);
  }
  
  
  static PyObject *
-unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
+pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
  {
         /* first get 32-bit len */
         long slen;
@@ -484,7 +601,7 @@ unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
         PyObject *str_obj = NULL, *len_obj = NULL;
         
         if (*plen < 4) {
-               unpack_err_too_short();
+               pytdbunpack_err_too_short();
                 return NULL;
         }
         
@@ -493,7 +610,7 @@ unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
  
         if (slen < 0) { /* surely you jest */
                 PyErr_Format(PyExc_ValueError,
-                            __FUNCTION__ ": buffer seems to have negative length");
+                            "%s: buffer seems to have negative length", FUNCTION_MACRO);
                 return NULL;
         }
  
@@ -503,8 +620,8 @@ unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
  
         if (*plen < slen) {
                 PyErr_Format(PyExc_IndexError,
-                            __FUNCTION__ ": not enough data to unpack buffer: "
-                            "need %d bytes, have %d",
+                            "%s: not enough data to unpack buffer: "
+                            "need %d bytes, have %d", FUNCTION_MACRO,
                              (int) slen, *plen);
                 return NULL;
         }
@@ -524,6 +641,9 @@ unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
         if (PyList_Append(val_list, str_obj) == -1)
                 goto failed;
         
+       Py_DECREF(len_obj);
+       Py_DECREF(str_obj);
+       
         return val_list;
  
    failed:
@@ -541,39 +661,44 @@ unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
  
     Returns a reference to None, or NULL for failure.
  */
-static PyObject *pytdbpack_unpack_item(char ch,
-                                      char **pbuf,
-                                      int *plen,
-                                      PyObject *val_list)
+static PyObject *pytdbunpack_item(char ch,
+                                 char **pbuf,
+                                 int *plen,
+                                 PyObject *val_list)
  {
-       PyObject *result;
+       PyObject *unpacked;
         
         if (ch == 'w') {        /* 16-bit int */
-               result = unpack_int16(pbuf, plen);
+               unpacked = pytdbunpack_int16(pbuf, plen);
         }
         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
                 /* pointers can just come through as integers */
-               result = unpack_uint32(pbuf, plen);
+               unpacked = pytdbunpack_uint32(pbuf, plen);
         }
         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
-               result = unpack_string(pbuf, plen);
+               unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
         }
         else if (ch == 'B') { /* length, buffer */
-               return unpack_buffer(pbuf, plen, val_list);
+               return pytdbunpack_buffer(pbuf, plen, val_list);
         }
         else {
                 PyErr_Format(PyExc_ValueError,
-                            __FUNCTION__ ": format character '%c' is not supported",
-                            ch);
+                            "%s: format character '%c' is not supported", 
+                             FUNCTION_MACRO, ch);
                 
                 return NULL;
         }
  
         /* otherwise OK */
-       if (!result)
-               return NULL;
-       if (PyList_Append(val_list, result) == -1)
+       if (!unpacked)
                 return NULL;
+
+       if (PyList_Append(val_list, unpacked) == -1)
+               val_list = NULL;
+
+       /* PyList_Append takes a new reference to the inserted object.
+          Therefore, we no longer need the original reference. */
+       Py_DECREF(unpacked);
         
         return val_list;
  }
@@ -581,127 +706,11 @@ static PyObject *pytdbpack_unpack_item(char ch,
  
  
  
-/*
-  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
-  PACKED_BUF.
-
-  The string has already been checked out, so we know that VAL_SEQ is large
-  enough to hold the packed data, and that there are enough value items.
-  (However, their types may not have been thoroughly checked yet.)
-
-  In addition, val_seq is a Python Fast sequence.
-
-  Returns NULL for error (with exception set), or None.
-*/
-PyObject *
-pytdbpack_pack_data(const char *format_str,
-                   PyObject *val_seq,
-                   unsigned char *packed)
-{
-       int format_i, val_i = 0;
-
-       for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
-               char ch = format_str[format_i];
-               PyObject *val_obj;
-
-               /* borrow a reference to the item */
-               val_obj = PySequence_GetItem(val_seq, val_i++);
-               if (!val_obj)
-                       return NULL;
-
-               if (ch == 'w') {
-                       unsigned long val_long;
-                       PyObject *long_obj;
-                       
-                       if (!(long_obj = PyNumber_Long(val_obj))) {
-                               pytdbpack_bad_type(ch, "Long", val_obj);
-                               return NULL;
-                       }
-                       
-                       val_long = PyLong_AsUnsignedLong(long_obj);
-                       (packed)[0] = val_long & 0xff;
-                       (packed)[1] = (val_long >> 8) & 0xff;
-                       (packed) += 2;
-                       Py_DECREF(long_obj);
-               }
-               else if (ch == 'd') {
-                       /* 4-byte LE number */
-                       PyObject *long_obj;
-                       
-                       if (!(long_obj = PyNumber_Long(val_obj))) {
-                               pytdbpack_bad_type(ch, "Long", val_obj);
-                               return NULL;
-                       }
-                       
-                       pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
-
-                       Py_DECREF(long_obj);
-               }
-               else if (ch == 'p') {
-                       /* "Pointer" value -- in the subset of DCERPC used by Samba,
-                          this is really just an "exists" or "does not exist"
-                          flag. */
-                       pack_uint32(PyObject_IsTrue(val_obj), &packed);
-               }
-               else if (ch == 'f' || ch == 'P') {
-                       int size;
-                       char *sval;
-
-                       size = PySequence_Length(val_obj);
-                       if (size < 0)
-                               return NULL;
-                       sval = PyString_AsString(val_obj);
-                       if (!sval)
-                               return NULL;
-                       pack_bytes(size+1, sval, &packed); /* include nul */
-               }
-               else if (ch == 'B') {
-                       long size;
-                       char *sval;
-
-                       if (!PyNumber_Check(val_obj)) {
-                               pytdbpack_bad_type(ch, "Number", val_obj);
-                               return NULL;
-                       }
-
-                       if (!(val_obj = PyNumber_Long(val_obj)))
-                               return NULL;
-
-                       size = PyLong_AsLong(val_obj);
-                       pack_uint32(size, &packed);
-
-                       /* Release the new reference created by the cast */
-                       Py_DECREF(val_obj);
-
-                       val_obj = PySequence_GetItem(val_seq, val_i++);
-                       if (!val_obj)
-                               return NULL;
-                       
-                       sval = PyString_AsString(val_obj);
-                       if (!sval)
-                               return NULL;
-                       
-                       pack_bytes(size, sval, &packed); /* do not include nul */
-               }
-               else {
-                       /* this ought to be caught while calculating the length, but
-                          just in case. */
-                       PyErr_Format(PyExc_ValueError,
-                                    "%s: format character '%c' is not supported",
-                                    __FUNCTION__, ch);
-               
-                       return NULL;
-               }
-       }
-               
-       return Py_None;
-}
-
  
  
  static PyMethodDef pytdbpack_methods[] = {
-       { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
-       { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
+       { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
+       { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
  };
  
  DL_EXPORT(void)