Sync with HEAD.
authorTim Potter <tpot@samba.org>
Mon, 11 Nov 2002 05:48:12 +0000 (05:48 +0000)
committerTim Potter <tpot@samba.org>
Mon, 11 Nov 2002 05:48:12 +0000 (05:48 +0000)
(This used to be commit 130c1d0765502be839f87a8d0f4e47f5a3461c97)

source3/python/py_tdbpack.c

index 87cd804ed4ef14c1b457501f8cd85be063ae1cef..7180c3e12cf8804b4e196d65176da443e35be09d 100644 (file)
 
 #include "Python.h"
 
-static int pytdbpack_calc_reqd_len(char *format_str,
-                                  PyObject *val_seq);
+static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
 
 static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
 
-static PyObject *pytdbpack_pack_data(const char *format_str,
+static PyObject *pytdbpack_data(const char *format_str,
                                     PyObject *val_seq,
-                                    unsigned char *buf);
-
+                                    PyObject *val_list);
 
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
 
 
 static PyObject *pytdbpack_bad_type(char ch,
@@ -57,13 +58,17 @@ tdb/tdbutil module, with appropriate adjustments for Python datatypes.
 Python strings are used to specify the format of data to be packed or
 unpacked.
 
-Strings in TDBs are typically stored in DOS codepages.  The caller of this
-module must make appropriate translations if necessary, typically to and from
-Unicode objects.
+Strings are always stored in codepage 850.  Unicode objects are translated
+to cp850; plain strings are assumed to be in latin-1 and are also
+translated.
+
+This may be a problem in the future if it is different to the Samba codepage.
+It might be better to have the caller do the conversion, but that would conflict
+with existing CMI code.
 
 tdbpack format strings:
 
-    'f':  NULL-terminated string in DOS codepage
+    'f':  NULL-terminated string in codepage 850
 
     'P':  same as 'f'
 
@@ -88,7 +93,7 @@ tdbpack format strings:
 ";
 
 
-static char const pytdbpack_pack_doc[] = 
+static char const pytdbpack_doc[] = 
 "pack(format, values) -> buffer
 Pack Python objects into Samba binary format according to format string.
 
@@ -141,65 +146,274 @@ notes:
 
 
 
+
 /*
-  Game plan is to first of all walk through the arguments and calculate the
-  total length that will be required.  We allocate a Python string of that
-  size, then walk through again and fill it in.
-
-  We just borrow references to all the passed arguments, since none of them
-  need to be permanently stored.  We transfer ownership to the returned
-  object.
- */    
+  * Pack objects to bytes.
+  *
+  * All objects are first individually encoded onto a list, and then the list
+  * of strings is concatenated.  This is faster than concatenating strings,
+  * and reasonably simple to code.
+  */
 static PyObject *
-pytdbpack_pack(PyObject *self,
+pytdbpack(PyObject *self,
               PyObject *args)
 {
        char *format_str;
-       PyObject *val_seq, *fast_seq, *buf_str;
-       int reqd_len;
-       char *packed_buf;
+       PyObject *val_seq, *val_iter = NULL,
+               *packed_list = NULL, *packed_str = NULL,
+               *empty_str = NULL;
 
        /* TODO: Test passing wrong types or too many arguments */
        if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
                return NULL;
 
-       /* Convert into a list or tuple (if not already one), so that we can
-        * index more easily. */
-       fast_seq = PySequence_Fast(val_seq,
-                                  __FUNCTION__ ": argument 2 must be sequence");
-       if (!fast_seq)
-               return NULL;
-                       
-       reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
-       if (reqd_len == -1)     /* exception was thrown */
-               return NULL;
+       if (!(val_iter = PyObject_GetIter(val_seq)))
+               goto out;
 
-       /* Allocate space.
-        
-          This design causes an unnecessary copying of the data when Python
-          constructs an object, and that might possibly be avoided by using a
-          Buffer object of some kind instead.  I'm not doing that for now
-          though.  */
-       packed_buf = malloc(reqd_len);
-       if (!packed_buf) {
-               PyErr_Format(PyExc_MemoryError,
-                            "%s: couldn't allocate %d bytes for packed buffer",
-                            __FUNCTION__, reqd_len);
-               return NULL;
-       }       
+       /* Create list to hold strings until we're done, then join them all. */
+       if (!(packed_list = PyList_New(0)))
+               goto out;
+
+       if (!pytdbpack_data(format_str, val_iter, packed_list))
+               goto out;
+
+       /* this function is not officially documented but it works */
+       if (!(empty_str = PyString_InternFromString("")))
+               goto out;
        
-       if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
-               free(packed_buf);
-               return NULL;
+       packed_str = _PyString_Join(empty_str, packed_list);
+
+  out:
+       Py_XDECREF(empty_str);
+       Py_XDECREF(val_iter);
+       Py_XDECREF(packed_list);
+
+       return packed_str;
+}
+
+
+/*
+  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+  PACKED_BUF.
+
+  The string has already been checked out, so we know that VAL_SEQ is large
+  enough to hold the packed data, and that there are enough value items.
+  (However, their types may not have been thoroughly checked yet.)
+
+  In addition, val_seq is a Python Fast sequence.
+
+  Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_data(const char *format_str,
+                   PyObject *val_iter,
+                   PyObject *packed_list)
+{
+       int format_i, val_i = 0;
+
+       for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
+               char ch = format_str[format_i];
+
+               switch (ch) {
+                       /* dispatch to the appropriate packer for this type,
+                          which should pull things off the iterator, and
+                          append them to the packed_list */
+               case 'w':
+               case 'd':
+               case 'p':
+                       if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
+                               return NULL;
+                       break;
+
+               case 'f':
+               case 'P':
+                       if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
+                               return NULL;
+                       break;
+
+               case 'B':
+                       if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
+                               return NULL;
+                       break;
+
+               default:
+                       PyErr_Format(PyExc_ValueError,
+                                    "%s: format character '%c' is not supported",
+                                    __FUNCTION__, ch);
+                       return NULL;
+               }
+       }
+
+       return packed_list;
+}
+
+
+static PyObject *
+pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
+{
+       unsigned long val_long;
+       PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
+       PyObject *new_list = NULL;
+       unsigned char pack_buf[4];
+
+       if (!(val_obj = PyIter_Next(val_iter)))
+               goto out;
+
+       if (!(long_obj = PyNumber_Long(val_obj))) {
+               pytdbpack_bad_type(ch, "Number", val_obj);
+               goto out;
        }
 
-       buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
-       free(packed_buf);       /* get rid of tmp buf */
+       val_long = PyLong_AsUnsignedLong(long_obj);
+       pack_le_uint32(val_long, pack_buf);
+
+       /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
+          the first two bytes. */
        
-       return buf_str;
+       if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
+               goto out;
+
+       if (PyList_Append(packed_list, result_obj) != -1)
+               new_list = packed_list;
+
+  out:
+       Py_XDECREF(val_obj);
+       Py_XDECREF(long_obj);
+       Py_XDECREF(result_obj);
+
+       return new_list;
+}
+
+
+/*
+ * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
+ * return it.
+ *
+ * If the input is neither a string nor Unicode, an exception is raised.
+ *
+ * If the input is Unicode, then it is converted to CP850.
+ *
+ * If the input is a String, then it is converted to Unicode using the default
+ * decoding method, and then converted to CP850.  This in effect gives
+ * conversion from latin-1 (currently the PSA's default) to CP850, without
+ * needing a custom translation table.
+ *
+ * I hope this approach avoids being too fragile w.r.t. being passed either
+ * Unicode or String objects.
+ */
+static PyObject *
+pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
+{
+       PyObject *val_obj = NULL;
+       PyObject *unicode_obj = NULL;
+       PyObject *cp850_str = NULL;
+       PyObject *nul_str = NULL;
+       PyObject *new_list = NULL;
+
+       if (!(val_obj = PyIter_Next(val_iter)))
+               goto out;
+
+       if (PyUnicode_Check(val_obj)) {
+               unicode_obj = val_obj;
+       }
+       else {
+               /* string */
+               if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
+                       goto out;
+               Py_XDECREF(val_obj);
+               val_obj = NULL;
+       }
+
+       if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL)))
+               goto out;
+
+       if (!nul_str)
+               /* this is constant and often-used; hold it forever */
+               if (!(nul_str = PyString_FromStringAndSize("", 1)))
+                       goto out;
+
+       if ((PyList_Append(packed_list, cp850_str) != -1)
+           && (PyList_Append(packed_list, nul_str) != -1))
+               new_list = packed_list;
+
+  out:
+       Py_XDECREF(unicode_obj);
+       Py_XDECREF(cp850_str);
+
+       return new_list;
 }
 
 
+/*
+ * Pack (LENGTH, BUFFER) pair onto the list.
+ *
+ * The buffer must already be a String, not Unicode, because it contains 8-bit
+ * untranslated data.  In some cases it will actually be UTF_16_LE data.
+ */
+static PyObject *
+pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
+{
+       PyObject *val_obj;
+       PyObject *new_list = NULL;
+       
+       /* pull off integer and stick onto list */
+       if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
+               return NULL;
+
+       /* this assumes that the string is the right length; the old code did the same. */
+       if (!(val_obj = PyIter_Next(val_iter)))
+               return NULL;
+
+       if (!PyString_Check(val_obj)) {
+               pytdbpack_bad_type('B', "String", val_obj);
+               goto out;
+       }
+       
+       if (PyList_Append(packed_list, val_obj) != -1)
+               new_list = packed_list;
+
+  out:
+       Py_XDECREF(val_obj);
+       return new_list;
+}
+
+
+#if 0
+else if (ch == 'B') {
+                       long size;
+                       char *sval;
+
+                       if (!PyNumber_Check(val_obj)) {
+                               pytdbpack_bad_type(ch, "Number", val_obj);
+                               return NULL;
+                       }
+
+                       if (!(val_obj = PyNumber_Long(val_obj)))
+                               return NULL;
+
+                       size = PyLong_AsLong(val_obj);
+                       pack_le_uint32(size, &packed);
+
+                       /* Release the new reference created by the cast */
+                       Py_DECREF(val_obj);
+
+                       val_obj = PySequence_GetItem(val_seq, val_i++);
+                       if (!val_obj)
+                               return NULL;
+                       
+                       sval = PyString_AsString(val_obj);
+                       if (!sval)
+                               return NULL;
+                       
+                       pack_bytes(size, sval, &packed); /* do not include nul */
+               }
+               else {
+               
+       }
+               
+       return Py_None;
+}
+#endif
 
 static PyObject *
 pytdbpack_unpack(PyObject *self,
@@ -270,6 +484,8 @@ pytdbpack_unpack(PyObject *self,
 }
 
 
+
+#if 0
 /*
   Internal routine that calculates how many bytes will be required to
   encode the values in the format.
@@ -361,6 +577,7 @@ pytdbpack_calc_reqd_len(char *format_str,
 
        return len;
 }
+#endif
 
 
 static PyObject *pytdbpack_bad_type(char ch,
@@ -384,13 +601,12 @@ static PyObject *pytdbpack_bad_type(char ch,
   realize this is kind of dumb because we'll almost always be on x86, but
   being safe is important.
 */
-static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
 {
-       (*pbuf)[0] =         val_long & 0xff;
-       (*pbuf)[1] = (val_long >> 8)  & 0xff;
-       (*pbuf)[2] = (val_long >> 16) & 0xff;
-       (*pbuf)[3] = (val_long >> 24) & 0xff;
-       (*pbuf) += 4;
+       pbuf[0] =         val_long & 0xff;
+       pbuf[1] = (val_long >> 8)  & 0xff;
+       pbuf[2] = (val_long >> 16) & 0xff;
+       pbuf[3] = (val_long >> 24) & 0xff;
 }
 
 
@@ -581,126 +797,10 @@ static PyObject *pytdbpack_unpack_item(char ch,
 
 
 
-/*
-  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
-  PACKED_BUF.
-
-  The string has already been checked out, so we know that VAL_SEQ is large
-  enough to hold the packed data, and that there are enough value items.
-  (However, their types may not have been thoroughly checked yet.)
-
-  In addition, val_seq is a Python Fast sequence.
-
-  Returns NULL for error (with exception set), or None.
-*/
-PyObject *
-pytdbpack_pack_data(const char *format_str,
-                   PyObject *val_seq,
-                   unsigned char *packed)
-{
-       int format_i, val_i = 0;
-
-       for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
-               char ch = format_str[format_i];
-               PyObject *val_obj;
-
-               /* borrow a reference to the item */
-               val_obj = PySequence_GetItem(val_seq, val_i++);
-               if (!val_obj)
-                       return NULL;
-
-               if (ch == 'w') {
-                       unsigned long val_long;
-                       PyObject *long_obj;
-                       
-                       if (!(long_obj = PyNumber_Long(val_obj))) {
-                               pytdbpack_bad_type(ch, "Long", val_obj);
-                               return NULL;
-                       }
-                       
-                       val_long = PyLong_AsUnsignedLong(long_obj);
-                       (packed)[0] = val_long & 0xff;
-                       (packed)[1] = (val_long >> 8) & 0xff;
-                       (packed) += 2;
-                       Py_DECREF(long_obj);
-               }
-               else if (ch == 'd') {
-                       /* 4-byte LE number */
-                       PyObject *long_obj;
-                       
-                       if (!(long_obj = PyNumber_Long(val_obj))) {
-                               pytdbpack_bad_type(ch, "Long", val_obj);
-                               return NULL;
-                       }
-                       
-                       pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
-
-                       Py_DECREF(long_obj);
-               }
-               else if (ch == 'p') {
-                       /* "Pointer" value -- in the subset of DCERPC used by Samba,
-                          this is really just an "exists" or "does not exist"
-                          flag. */
-                       pack_uint32(PyObject_IsTrue(val_obj), &packed);
-               }
-               else if (ch == 'f' || ch == 'P') {
-                       int size;
-                       char *sval;
-
-                       size = PySequence_Length(val_obj);
-                       if (size < 0)
-                               return NULL;
-                       sval = PyString_AsString(val_obj);
-                       if (!sval)
-                               return NULL;
-                       pack_bytes(size+1, sval, &packed); /* include nul */
-               }
-               else if (ch == 'B') {
-                       long size;
-                       char *sval;
-
-                       if (!PyNumber_Check(val_obj)) {
-                               pytdbpack_bad_type(ch, "Number", val_obj);
-                               return NULL;
-                       }
-
-                       if (!(val_obj = PyNumber_Long(val_obj)))
-                               return NULL;
-
-                       size = PyLong_AsLong(val_obj);
-                       pack_uint32(size, &packed);
-
-                       /* Release the new reference created by the cast */
-                       Py_DECREF(val_obj);
-
-                       val_obj = PySequence_GetItem(val_seq, val_i++);
-                       if (!val_obj)
-                               return NULL;
-                       
-                       sval = PyString_AsString(val_obj);
-                       if (!sval)
-                               return NULL;
-                       
-                       pack_bytes(size, sval, &packed); /* do not include nul */
-               }
-               else {
-                       /* this ought to be caught while calculating the length, but
-                          just in case. */
-                       PyErr_Format(PyExc_ValueError,
-                                    "%s: format character '%c' is not supported",
-                                    __FUNCTION__, ch);
-               
-                       return NULL;
-               }
-       }
-               
-       return Py_None;
-}
-
 
 
 static PyMethodDef pytdbpack_methods[] = {
-       { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
+       { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
        { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 };