#include "Python.h"
-static int pytdbpack_calc_reqd_len(char *format_str,
- PyObject *val_seq);
+static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
-static PyObject *pytdbpack_unpack_item(char,
- char **pbuf,
- int *plen);
-static int
-pytdbpack_calc_item_len(char format_ch,
- PyObject *val_obj);
+static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
-static PyObject *pytdbpack_pack_data(const char *format_str,
+static PyObject *pytdbpack_data(const char *format_str,
PyObject *val_seq,
- unsigned char *buf);
+ PyObject *val_list);
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
+
+
+static PyObject *pytdbpack_bad_type(char ch,
+ const char *expected,
+ PyObject *val_obj);
-
static const char * pytdbpack_docstring =
"Convert between Python values and Samba binary encodings.
Python strings are used to specify the format of data to be packed or
unpacked.
-Strings in TDBs are typically stored in DOS codepages. The caller of this
-module must make appropriate translations if necessary, typically to and from
-Unicode objects.
+Strings are always stored in codepage 850. Unicode objects are translated
+to cp850; plain strings are assumed to be in latin-1 and are also
+translated.
+
+This may be a problem in the future if it is different to the Samba codepage.
+It might be better to have the caller do the conversion, but that would conflict
+with existing CMI code.
tdbpack format strings:
- 'f': NULL-terminated string in DOS codepage
+ 'f': NULL-terminated string in codepage 850
'P': same as 'f'
- 'd': 4 byte little-endian number
+ 'd': 4 byte little-endian unsigned number
- 'w': 2 byte little-endian number
+ 'w': 2 byte little-endian unsigned number
'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
really just an \"exists\" or \"does not exist\" flag. The boolean
value of the Python object is used.
'B': 4-byte LE length, followed by that many bytes of binary data.
- Corresponds to a Python byte string of the appropriate length.
+ Corresponds to a Python integer giving the length, followed by a byte
+ string of the appropriate length.
'$': Special flag indicating that the preceding format code should be
repeated while data remains. This is only supported for unpacking.
";
-static char const pytdbpack_pack_doc[] =
+static char const pytdbpack_doc[] =
"pack(format, values) -> buffer
Pack Python objects into Samba binary format according to format string.
buffer -- string containing packed data
raises:
- IndexError -- if there are not the same number of format codes as of
- values
+ IndexError -- if there are too few values for the format
ValueError -- if any of the format characters is illegal
TypeError -- if the format is not a string, or values is not a sequence,
or any of the values is of the wrong type for the corresponding
format character
+
+notes:
+ For historical reasons, it is not an error to pass more values than are consumed
+ by the format.
";
+
/*
- Game plan is to first of all walk through the arguments and calculate the
- total length that will be required. We allocate a Python string of that
- size, then walk through again and fill it in.
-
- We just borrow references to all the passed arguments, since none of them
- need to be permanently stored. We transfer ownership to the returned
- object.
- */
+ * Pack objects to bytes.
+ *
+ * All objects are first individually encoded onto a list, and then the list
+ * of strings is concatenated. This is faster than concatenating strings,
+ * and reasonably simple to code.
+ */
static PyObject *
-pytdbpack_pack(PyObject *self,
+pytdbpack(PyObject *self,
PyObject *args)
{
char *format_str;
- PyObject *val_seq, *fast_seq, *buf_str;
- int reqd_len;
- char *packed_buf;
+ PyObject *val_seq, *val_iter = NULL,
+ *packed_list = NULL, *packed_str = NULL,
+ *empty_str = NULL;
/* TODO: Test passing wrong types or too many arguments */
if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
return NULL;
- /* Convert into a list or tuple (if not already one), so that we can
- * index more easily. */
- fast_seq = PySequence_Fast(val_seq,
- __FUNCTION__ ": argument 2 must be sequence");
- if (!fast_seq)
- return NULL;
-
- reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
- if (reqd_len == -1) /* exception was thrown */
- return NULL;
+ if (!(val_iter = PyObject_GetIter(val_seq)))
+ goto out;
- /* Allocate space.
-
- This design causes an unnecessary copying of the data when Python
- constructs an object, and that might possibly be avoided by using a
- Buffer object of some kind instead. I'm not doing that for now
- though. */
- packed_buf = malloc(reqd_len);
- if (!packed_buf) {
- PyErr_Format(PyExc_MemoryError,
- "%s: couldn't allocate %d bytes for packed buffer",
- __FUNCTION__, reqd_len);
- return NULL;
- }
+ /* Create list to hold strings until we're done, then join them all. */
+ if (!(packed_list = PyList_New(0)))
+ goto out;
+
+ if (!pytdbpack_data(format_str, val_iter, packed_list))
+ goto out;
+
+ /* this function is not officially documented but it works */
+ if (!(empty_str = PyString_InternFromString("")))
+ goto out;
- if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
- free(packed_buf);
- return NULL;
+ packed_str = _PyString_Join(empty_str, packed_list);
+
+ out:
+ Py_XDECREF(empty_str);
+ Py_XDECREF(val_iter);
+ Py_XDECREF(packed_list);
+
+ return packed_str;
+}
+
+
+/*
+ Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+ PACKED_BUF.
+
+ The string has already been checked out, so we know that VAL_SEQ is large
+ enough to hold the packed data, and that there are enough value items.
+ (However, their types may not have been thoroughly checked yet.)
+
+ In addition, val_seq is a Python Fast sequence.
+
+ Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_data(const char *format_str,
+ PyObject *val_iter,
+ PyObject *packed_list)
+{
+ int format_i, val_i = 0;
+
+ for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
+ char ch = format_str[format_i];
+
+ switch (ch) {
+ /* dispatch to the appropriate packer for this type,
+ which should pull things off the iterator, and
+ append them to the packed_list */
+ case 'w':
+ case 'd':
+ case 'p':
+ if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
+ return NULL;
+ break;
+
+ case 'f':
+ case 'P':
+ if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
+ return NULL;
+ break;
+
+ case 'B':
+ if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
+ return NULL;
+ break;
+
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "%s: format character '%c' is not supported",
+ __FUNCTION__, ch);
+ return NULL;
+ }
+ }
+
+ return packed_list;
+}
+
+
+static PyObject *
+pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
+{
+ unsigned long val_long;
+ PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
+ PyObject *new_list = NULL;
+ unsigned char pack_buf[4];
+
+ if (!(val_obj = PyIter_Next(val_iter)))
+ goto out;
+
+ if (!(long_obj = PyNumber_Long(val_obj))) {
+ pytdbpack_bad_type(ch, "Number", val_obj);
+ goto out;
+ }
+
+ val_long = PyLong_AsUnsignedLong(long_obj);
+ pack_le_uint32(val_long, pack_buf);
+
+ /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
+ the first two bytes. */
+
+ if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
+ goto out;
+
+ if (PyList_Append(packed_list, result_obj) != -1)
+ new_list = packed_list;
+
+ out:
+ Py_XDECREF(val_obj);
+ Py_XDECREF(long_obj);
+ Py_XDECREF(result_obj);
+
+ return new_list;
+}
+
+
+/*
+ * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
+ * return it.
+ *
+ * If the input is neither a string nor Unicode, an exception is raised.
+ *
+ * If the input is Unicode, then it is converted to CP850.
+ *
+ * If the input is a String, then it is converted to Unicode using the default
+ * decoding method, and then converted to CP850. This in effect gives
+ * conversion from latin-1 (currently the PSA's default) to CP850, without
+ * needing a custom translation table.
+ *
+ * I hope this approach avoids being too fragile w.r.t. being passed either
+ * Unicode or String objects.
+ */
+static PyObject *
+pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
+{
+ PyObject *val_obj = NULL;
+ PyObject *unicode_obj = NULL;
+ PyObject *cp850_str = NULL;
+ PyObject *nul_str = NULL;
+ PyObject *new_list = NULL;
+
+ if (!(val_obj = PyIter_Next(val_iter)))
+ goto out;
+
+ if (PyUnicode_Check(val_obj)) {
+ unicode_obj = val_obj;
+ }
+ else {
+ /* string */
+ if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
+ goto out;
+ Py_XDECREF(val_obj);
+ val_obj = NULL;
}
- buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
- free(packed_buf); /* get rid of tmp buf */
+ if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL)))
+ goto out;
+
+ if (!nul_str)
+ /* this is constant and often-used; hold it forever */
+ if (!(nul_str = PyString_FromStringAndSize("", 1)))
+ goto out;
+
+ if ((PyList_Append(packed_list, cp850_str) != -1)
+ && (PyList_Append(packed_list, nul_str) != -1))
+ new_list = packed_list;
+
+ out:
+ Py_XDECREF(unicode_obj);
+ Py_XDECREF(cp850_str);
+
+ return new_list;
+}
+
+
+/*
+ * Pack (LENGTH, BUFFER) pair onto the list.
+ *
+ * The buffer must already be a String, not Unicode, because it contains 8-bit
+ * untranslated data. In some cases it will actually be UTF_16_LE data.
+ */
+static PyObject *
+pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
+{
+ PyObject *val_obj;
+ PyObject *new_list = NULL;
+
+ /* pull off integer and stick onto list */
+ if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
+ return NULL;
+
+ /* this assumes that the string is the right length; the old code did the same. */
+ if (!(val_obj = PyIter_Next(val_iter)))
+ return NULL;
+
+ if (!PyString_Check(val_obj)) {
+ pytdbpack_bad_type('B', "String", val_obj);
+ goto out;
+ }
- return buf_str;
+ if (PyList_Append(packed_list, val_obj) != -1)
+ new_list = packed_list;
+
+ out:
+ Py_XDECREF(val_obj);
+ return new_list;
}
PyObject *val_list = NULL, *ret_tuple = NULL;
PyObject *rest_string = NULL;
int format_len, packed_len;
+ char last_format = '#'; /* invalid */
int i;
- char last_format = '#';
/* get arguments */
if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
format_len = strlen(format_str);
- /* allocate list to hold results */
- val_list = PyList_New(format_len);
+ /* Allocate list to hold results. Initially empty, and we append
+ results as we go along. */
+ val_list = PyList_New(0);
if (!val_list)
goto failed;
ret_tuple = PyTuple_New(2);
goto failed;
/* For every object, unpack. */
- for (ppacked = packed_str, i = 0; i < format_len; i++) {
- PyObject *val_obj;
- char format;
-
- format = format_str[i];
- if (format == '$') {
- if (i == 0) {
- PyErr_Format(PyExc_ValueError,
- "%s: '$' may not be first character in format",
- __FUNCTION__);
- goto failed;
- }
- else {
- format = last_format; /* repeat */
- }
- }
-
- val_obj = pytdbpack_unpack_item(format,
- &ppacked,
- &packed_len);
- if (!val_obj)
+ for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
+ last_format = format_str[i];
+ /* packed_len is reduced in place */
+ if (!pytdbpack_unpack_item(format_str[i], &ppacked, &packed_len, val_list))
goto failed;
-
- PyList_SET_ITEM(val_list, i, val_obj);
- last_format = format;
}
- /* put leftovers in box for lunch tomorrow */
+ /* If the last character was '$', keep going until out of space */
+ if (format_str[i] == '$') {
+ if (i == 0) {
+ PyErr_Format(PyExc_ValueError,
+ "%s: '$' may not be first character in format",
+ __FUNCTION__);
+ return NULL;
+ }
+ while (packed_len > 0)
+ if (!pytdbpack_unpack_item(last_format, &ppacked, &packed_len, val_list))
+ goto failed;
+ }
+
+ /* save leftovers for next time */
rest_string = PyString_FromStringAndSize(ppacked, packed_len);
if (!rest_string)
goto failed;
return ret_tuple;
failed:
- /* handle failure: deallocate anything */
+ /* handle failure: deallocate anything. XDECREF forms handle NULL
+ pointers for objects that haven't been allocated yet. */
Py_XDECREF(val_list);
Py_XDECREF(ret_tuple);
Py_XDECREF(rest_string);
}
+
+#if 0
/*
Internal routine that calculates how many bytes will be required to
encode the values in the format.
int val_i;
int val_len;
- val_len = PySequence_Fast_GET_SIZE(val_seq);
+ val_len = PySequence_Length(val_seq);
+ if (val_len == -1)
+ return -1;
for (p = format_str, val_i = 0; *p; p++, val_i++) {
char ch = *p;
- PyObject *val_obj;
- int item_len;
if (val_i >= val_len) {
PyErr_Format(PyExc_IndexError,
- "samba.tdbpack.pack: value list is too short for format string");
+ "%s: value list is too short for format string",
+ __FUNCTION__);
return -1;
}
/* borrow a reference to the item */
- val_obj = PySequence_Fast_GET_ITEM(val_seq, val_i);
- if (!val_obj)
- return -1;
+ if (ch == 'd' || ch == 'p')
+ len += 4;
+ else if (ch == 'w')
+ len += 2;
+ else if (ch == 'f' || ch == 'P') {
+ /* nul-terminated 8-bit string */
+ int item_len;
+ PyObject *str_obj;
+
+ str_obj = PySequence_GetItem(val_seq, val_i);
+ if (!str_obj)
+ return -1;
+
+ if (!PyString_Check(str_obj) || ((item_len = PyString_Size(str_obj)) == -1)) {
+ pytdbpack_bad_type(ch, "String", str_obj);
+ return -1;
+ }
+
+ len += 1 + item_len;
+ }
+ else if (ch == 'B') {
+ /* length-preceded byte buffer: n bytes, plus a preceding
+ * word */
+ PyObject *len_obj;
+ long len_val;
+
+ len_obj = PySequence_GetItem(val_seq, val_i);
+ val_i++; /* skip over buffer */
+
+ if (!PyNumber_Check(len_obj)) {
+ pytdbpack_bad_type(ch, "Number", len_obj);
+ return -1;
+ }
- item_len = pytdbpack_calc_item_len(ch, val_obj);
- if (item_len == -1)
- return -1;
- else
- len += item_len;
- }
+ len_val = PyInt_AsLong(len_obj);
+ if (len_val < 0) {
+ PyErr_Format(PyExc_ValueError,
+ "%s: format 'B' requires positive integer", __FUNCTION__);
+ return -1;
+ }
- if (val_i != val_len) {
- PyErr_Format(PyExc_IndexError,
- "%s: value list is wrong length for format string",
- __FUNCTION__);
- return -1;
+ len += 4 + len_val;
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ "%s: format character '%c' is not supported",
+ __FUNCTION__, ch);
+
+ return -1;
+ }
}
return len;
}
+#endif
-/*
- Calculate the number of bytes required to pack a single value.
-*/
-static int
-pytdbpack_calc_item_len(char ch,
- PyObject *val_obj)
+static PyObject *pytdbpack_bad_type(char ch,
+ const char *expected,
+ PyObject *val_obj)
{
- if (ch == 'd' || ch == 'w') {
- if (!PyInt_Check(val_obj)) {
- PyErr_Format(PyExc_TypeError,
- "tdbpack: format '%c' requires an Int",
- ch);
- return -1;
- }
- if (ch == 'w')
- return 2;
- else
- return 4;
- } else if (ch == 'p') {
- return 4;
- }
- else if (ch == 'f' || ch == 'P' || ch == 'B') {
- /* nul-terminated 8-bit string */
- if (!PyString_Check(val_obj)) {
- PyErr_Format(PyExc_TypeError,
- "tdbpack: format '%c' requires a String",
- ch);
- return -1;
- }
-
- if (ch == 'B') {
- /* byte buffer; just use Python string's length, plus
- a preceding word */
- return 4 + PyString_GET_SIZE(val_obj);
- }
- else {
- /* one nul character */
- return 1 + PyString_GET_SIZE(val_obj);
- }
- }
- else {
- PyErr_Format(PyExc_ValueError,
- __FUNCTION__ ": format character '%c' is not supported",
- ch);
-
- return -1;
- }
+ PyObject *r = PyObject_Repr(val_obj);
+ if (!r)
+ return NULL;
+ PyErr_Format(PyExc_TypeError,
+ "tdbpack: format '%c' requires %s, not %s",
+ ch, expected, PyString_AS_STRING(r));
+ Py_DECREF(r);
+ return val_obj;
}
realize this is kind of dumb because we'll almost always be on x86, but
being safe is important.
*/
-static void pack_int32(unsigned long val_long, unsigned char **pbuf)
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
{
- (*pbuf)[0] = val_long & 0xff;
- (*pbuf)[1] = (val_long >> 8) & 0xff;
- (*pbuf)[2] = (val_long >> 16) & 0xff;
- (*pbuf)[3] = (val_long >> 24) & 0xff;
- (*pbuf) += 4;
+ pbuf[0] = val_long & 0xff;
+ pbuf[1] = (val_long >> 8) & 0xff;
+ pbuf[2] = (val_long >> 16) & 0xff;
+ pbuf[3] = (val_long >> 24) & 0xff;
}
static PyObject *
-unpack_int32(char **pbuf, int *plen)
+unpack_uint32(char **pbuf, int *plen)
{
- long v;
+ unsigned long v;
unsigned char *b;
if (*plen < 4) {
(*pbuf) += 4;
(*plen) -= 4;
- return PyInt_FromLong(v);
+ return PyLong_FromUnsignedLong(v);
}
static PyObject *
-unpack_buffer(char **pbuf, int *plen)
+unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
{
/* first get 32-bit len */
long slen;
unsigned char *b;
unsigned char *start;
+ PyObject *str_obj = NULL, *len_obj = NULL;
if (*plen < 4) {
unpack_err_too_short();
(*pbuf) += slen;
(*plen) -= slen;
- return PyString_FromStringAndSize(start, slen);
+ if (!(len_obj = PyInt_FromLong(slen)))
+ goto failed;
+
+ if (PyList_Append(val_list, len_obj) == -1)
+ goto failed;
+
+ if (!(str_obj = PyString_FromStringAndSize(start, slen)))
+ goto failed;
+
+ if (PyList_Append(val_list, str_obj) == -1)
+ goto failed;
+
+ return val_list;
+
+ failed:
+ Py_XDECREF(len_obj); /* handles NULL */
+ Py_XDECREF(str_obj);
+ return NULL;
}
*PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
been consumed.
- Returns a reference to the unpacked Python object, or NULL for failure.
+ Returns a reference to None, or NULL for failure.
*/
static PyObject *pytdbpack_unpack_item(char ch,
char **pbuf,
- int *plen)
+ int *plen,
+ PyObject *val_list)
{
+ PyObject *result;
+
if (ch == 'w') { /* 16-bit int */
- return unpack_int16(pbuf, plen);
+ result = unpack_int16(pbuf, plen);
}
else if (ch == 'd' || ch == 'p') { /* 32-bit int */
/* pointers can just come through as integers */
- return unpack_int32(pbuf, plen);
+ result = unpack_uint32(pbuf, plen);
}
else if (ch == 'f' || ch == 'P') { /* nul-term string */
- return unpack_string(pbuf, plen);
+ result = unpack_string(pbuf, plen);
}
else if (ch == 'B') { /* length, buffer */
- return unpack_buffer(pbuf, plen);
+ return unpack_buffer(pbuf, plen, val_list);
}
else {
PyErr_Format(PyExc_ValueError,
return NULL;
}
-}
-
-
-
-/*
- Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
- and advance the pointer. Buffer length has been pre-calculated so we are
- sure that there is enough space.
-
-*/
-static PyObject *
-pytdbpack_pack_item(char ch,
- PyObject *val_obj,
- unsigned char **pbuf)
-{
- if (ch == 'w') {
- unsigned long val_long = PyInt_AsLong(val_obj);
- (*pbuf)[0] = val_long & 0xff;
- (*pbuf)[1] = (val_long >> 8) & 0xff;
- (*pbuf) += 2;
- }
- else if (ch == 'd') {
- /* 4-byte LE number */
- pack_int32(PyInt_AsLong(val_obj), pbuf);
- }
- else if (ch == 'p') {
- /* "Pointer" value -- in the subset of DCERPC used by Samba,
- this is really just an "exists" or "does not exist"
- flag. */
- pack_int32(PyObject_IsTrue(val_obj), pbuf);
- }
- else if (ch == 'f' || ch == 'P') {
- int size;
- char *sval;
- size = PyString_GET_SIZE(val_obj);
- sval = PyString_AS_STRING(val_obj);
- pack_bytes(size+1, sval, pbuf); /* include nul */
- }
- else if (ch == 'B') {
- int size;
- char *sval;
-
- size = PyString_GET_SIZE(val_obj);
- pack_int32(size, pbuf);
- sval = PyString_AS_STRING(val_obj);
- pack_bytes(size, sval, pbuf); /* do not include nul */
- }
- else {
- /* this ought to be caught while calculating the length, but
- just in case. */
- PyErr_Format(PyExc_ValueError,
- "%s: format character '%c' is not supported",
- __FUNCTION__, ch);
-
+ /* otherwise OK */
+ if (!result)
return NULL;
- }
-
- return Py_None;
+ if (PyList_Append(val_list, result) == -1)
+ return NULL;
+
+ return val_list;
}
-/*
- Pack data according to FORMAT_STR from the elements of VAL_SEQ into
- PACKED_BUF.
-
- The string has already been checked out, so we know that VAL_SEQ is large
- enough to hold the packed data, and that there are enough value items.
- (However, their types may not have been thoroughly checked yet.)
-
- In addition, val_seq is a Python Fast sequence.
-
- Returns NULL for error (with exception set), or None.
-*/
-PyObject *
-pytdbpack_pack_data(const char *format_str,
- PyObject *val_seq,
- unsigned char *packed_buf)
-{
- int i;
-
- for (i = 0; format_str[i]; i++) {
- char ch = format_str[i];
- PyObject *val_obj;
-
- /* borrow a reference to the item */
- val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
- if (!val_obj)
- return NULL;
-
- if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
- return NULL;
- }
-
- return Py_None;
-}
-
static PyMethodDef pytdbpack_methods[] = {
- { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
+ { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
{ "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
};