1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 static int pytdbpack_calc_reqd_len(char *format_str,
33 static PyObject *pytdbpack_unpack_item(char,
37 static PyObject *pytdbpack_pack_data(const char *format_str,
44 static PyObject *pytdbpack_bad_type(char ch,
48 static const char * pytdbpack_docstring =
49 "Convert between Python values and Samba binary encodings.
51 This module is conceptually similar to the standard 'struct' module, but it
52 uses both a different binary format and a different description string.
54 Samba's encoding is based on that used inside DCE-RPC and SMB: a
55 little-endian, unpadded, non-self-describing binary format. It is intended
56 that these functions be as similar as possible to the routines in Samba's
57 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
59 Python strings are used to specify the format of data to be packed or
62 Strings in TDBs are typically stored in DOS codepages. The caller of this
63 module must make appropriate translations if necessary, typically to and from
66 tdbpack format strings:
68 'f': NULL-terminated string in DOS codepage
72 'd': 4 byte little-endian number
74 'w': 2 byte little-endian number
76 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
77 really just an \"exists\" or \"does not exist\" flag. The boolean
78 value of the Python object is used.
80 'B': 4-byte LE length, followed by that many bytes of binary data.
81 Corresponds to a Python integer giving the length, followed by a byte
82 string of the appropriate length.
84 '$': Special flag indicating that the preceding format code should be
85 repeated while data remains. This is only supported for unpacking.
87 Every code corresponds to a single Python object, except 'B' which
88 corresponds to two values (length and contents), and '$', which produces
89 however many make sense.
93 static char const pytdbpack_pack_doc[] =
94 "pack(format, values) -> buffer
95 Pack Python objects into Samba binary format according to format string.
98 format -- string of tdbpack format characters
99 values -- sequence of value objects corresponding 1:1 to format characters
102 buffer -- string containing packed data
105 IndexError -- if there are too few values for the format
106 ValueError -- if any of the format characters is illegal
107 TypeError -- if the format is not a string, or values is not a sequence,
108 or any of the values is of the wrong type for the corresponding
112 For historical reasons, it is not an error to pass more values than are consumed
117 static char const pytdbpack_unpack_doc[] =
118 "unpack(format, buffer) -> (values, rest)
119 Unpack Samba binary data according to format string.
122 format -- string of tdbpack characters
123 buffer -- string of packed binary data
127 values -- sequence of values corresponding 1:1 to format characters
128 rest -- string containing data that was not decoded, or '' if the
129 whole string was consumed
132 IndexError -- if there is insufficient data in the buffer for the
133 format (or if the data is corrupt and contains a variable-length
134 field extending past the end)
135 ValueError -- if any of the format characters is illegal
138 Because unconsumed data is returned, you can feed it back in to the
139 unpacker to extract further fields. Alternatively, if you wish to modify
140 some fields near the start of the data, you may be able to save time by
141 only unpacking and repacking the necessary part.
147 Game plan is to first of all walk through the arguments and calculate the
148 total length that will be required. We allocate a Python string of that
149 size, then walk through again and fill it in.
151 We just borrow references to all the passed arguments, since none of them
152 need to be permanently stored. We transfer ownership to the returned
156 pytdbpack_pack(PyObject *self,
160 PyObject *val_seq, *fast_seq, *buf_str;
164 /* TODO: Test passing wrong types or too many arguments */
165 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
168 /* Convert into a list or tuple (if not already one), so that we can
169 * index more easily. */
170 fast_seq = PySequence_Fast(val_seq,
171 __FUNCTION__ ": argument 2 must be sequence");
175 reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
176 if (reqd_len == -1) /* exception was thrown */
181 This design causes an unnecessary copying of the data when Python
182 constructs an object, and that might possibly be avoided by using a
183 Buffer object of some kind instead. I'm not doing that for now
185 packed_buf = malloc(reqd_len);
187 PyErr_Format(PyExc_MemoryError,
188 "%s: couldn't allocate %d bytes for packed buffer",
189 __FUNCTION__, reqd_len);
193 if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
198 buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
199 free(packed_buf); /* get rid of tmp buf */
207 pytdbpack_unpack(PyObject *self,
210 char *format_str, *packed_str, *ppacked;
211 PyObject *val_list = NULL, *ret_tuple = NULL;
212 PyObject *rest_string = NULL;
213 int format_len, packed_len;
215 char last_format = '#';
218 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
221 format_len = strlen(format_str);
223 /* allocate list to hold results */
224 val_list = PyList_New(format_len);
227 ret_tuple = PyTuple_New(2);
231 /* For every object, unpack. */
232 for (ppacked = packed_str, i = 0; i < format_len; i++) {
236 format = format_str[i];
239 PyErr_Format(PyExc_ValueError,
240 "%s: '$' may not be first character in format",
245 format = last_format; /* repeat */
249 val_obj = pytdbpack_unpack_item(format,
255 PyList_SET_ITEM(val_list, i, val_obj);
256 last_format = format;
259 /* save leftovers for next time */
260 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
264 /* return (values, rest) tuple; give up references to them */
265 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
267 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
272 /* handle failure: deallocate anything */
273 Py_XDECREF(val_list);
274 Py_XDECREF(ret_tuple);
275 Py_XDECREF(rest_string);
281 Internal routine that calculates how many bytes will be required to
282 encode the values in the format.
284 Also checks that the value list is the right size for the format list.
286 Returns number of bytes (may be 0), or -1 if there's something wrong, in
287 which case a Python exception has been raised.
291 val_seq: a Fast Sequence (list or tuple), being all the values
294 pytdbpack_calc_reqd_len(char *format_str,
302 val_len = PySequence_Length(val_seq);
306 for (p = format_str, val_i = 0; *p; p++, val_i++) {
309 if (val_i >= val_len) {
310 PyErr_Format(PyExc_IndexError,
311 "%s: value list is too short for format string",
316 /* borrow a reference to the item */
317 if (ch == 'd' || ch == 'p')
321 else if (ch == 'f' || ch == 'P') {
322 /* nul-terminated 8-bit string */
326 str_obj = PySequence_GetItem(val_seq, val_i);
330 item_len = PyString_Size(str_obj);
331 if (item_len == -1) {
332 pytdbpack_bad_type(ch, "String", str_obj);
338 else if (ch == 'B') {
339 /* length-preceded byte buffer: n bytes, plus a preceding
344 len_obj = PySequence_GetItem(val_seq, val_i);
345 val_i++; /* skip over buffer */
347 if (!PyNumber_Check(len_obj)) {
348 pytdbpack_bad_type(ch, "Number", len_obj);
352 len_val = PyInt_AsLong(len_obj);
354 PyErr_Format(PyExc_ValueError,
355 "%s: format 'B' requires positive integer", __FUNCTION__);
362 PyErr_Format(PyExc_ValueError,
363 "%s: format character '%c' is not supported",
374 static PyObject *pytdbpack_bad_type(char ch,
375 const char *expected,
378 PyObject *r = PyObject_Repr(val_obj);
381 PyErr_Format(PyExc_TypeError,
382 "tdbpack: format '%c' requires %s, not %s",
383 ch, expected, PyString_AS_STRING(r));
390 XXX: glib and Samba have quicker macro for doing the endianness conversions,
391 but I don't know of one in plain libc, and it's probably not a big deal. I
392 realize this is kind of dumb because we'll almost always be on x86, but
393 being safe is important.
395 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
397 (*pbuf)[0] = val_long & 0xff;
398 (*pbuf)[1] = (val_long >> 8) & 0xff;
399 (*pbuf)[2] = (val_long >> 16) & 0xff;
400 (*pbuf)[3] = (val_long >> 24) & 0xff;
405 static void pack_bytes(long len, const char *from,
406 unsigned char **pbuf)
408 memcpy(*pbuf, from, len);
414 unpack_err_too_short(void)
416 PyErr_Format(PyExc_IndexError,
417 __FUNCTION__ ": data too short for unpack format");
422 unpack_int32(char **pbuf, int *plen)
428 unpack_err_too_short();
433 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
438 return PyInt_FromLong(v);
442 static PyObject *unpack_int16(char **pbuf, int *plen)
448 unpack_err_too_short();
458 return PyInt_FromLong(v);
463 unpack_string(char **pbuf, int *plen)
466 char *nul_ptr, *start;
470 nul_ptr = memchr(start, '\0', *plen);
472 unpack_err_too_short();
476 len = nul_ptr - start;
478 *pbuf += len + 1; /* skip \0 */
481 return PyString_FromStringAndSize(start, len);
486 unpack_buffer(char **pbuf, int *plen)
488 /* first get 32-bit len */
491 unsigned char *start;
494 unpack_err_too_short();
499 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
501 if (slen < 0) { /* surely you jest */
502 PyErr_Format(PyExc_ValueError,
503 __FUNCTION__ ": buffer seems to have negative length");
512 PyErr_Format(PyExc_IndexError,
513 __FUNCTION__ ": not enough data to unpack buffer: "
514 "need %d bytes, have %d",
522 return PyString_FromStringAndSize(start, slen);
526 /* Unpack a single field from packed data, according to format character CH.
527 Remaining data is at *PBUF, of *PLEN.
529 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
532 Returns a reference to the unpacked Python object, or NULL for failure.
534 static PyObject *pytdbpack_unpack_item(char ch,
538 if (ch == 'w') { /* 16-bit int */
539 return unpack_int16(pbuf, plen);
541 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
542 /* pointers can just come through as integers */
543 return unpack_int32(pbuf, plen);
545 else if (ch == 'f' || ch == 'P') { /* nul-term string */
546 return unpack_string(pbuf, plen);
548 else if (ch == 'B') { /* length, buffer */
549 return unpack_buffer(pbuf, plen);
552 PyErr_Format(PyExc_ValueError,
553 __FUNCTION__ ": format character '%c' is not supported",
564 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
567 The string has already been checked out, so we know that VAL_SEQ is large
568 enough to hold the packed data, and that there are enough value items.
569 (However, their types may not have been thoroughly checked yet.)
571 In addition, val_seq is a Python Fast sequence.
573 Returns NULL for error (with exception set), or None.
576 pytdbpack_pack_data(const char *format_str,
578 unsigned char *packed)
582 for (i = 0; format_str[i]; i++) {
583 char ch = format_str[i];
586 /* borrow a reference to the item */
587 val_obj = PySequence_GetItem(val_seq, i);
592 unsigned long val_long = PyInt_AsLong(val_obj);
593 (packed)[0] = val_long & 0xff;
594 (packed)[1] = (val_long >> 8) & 0xff;
597 else if (ch == 'd') {
598 /* 4-byte LE number */
599 pack_int32(PyInt_AsLong(val_obj), &packed);
601 else if (ch == 'p') {
602 /* "Pointer" value -- in the subset of DCERPC used by Samba,
603 this is really just an "exists" or "does not exist"
605 pack_int32(PyObject_IsTrue(val_obj), &packed);
607 else if (ch == 'f' || ch == 'P') {
611 size = PyString_GET_SIZE(val_obj);
612 sval = PyString_AS_STRING(val_obj);
613 pack_bytes(size+1, sval, &packed); /* include nul */
615 else if (ch == 'B') {
619 size = PyInt_AsLong(val_obj);
620 pack_int32(size, &packed);
622 val_obj = PySequence_GetItem(val_seq, ++i);
626 sval = PyString_AsString(val_obj);
630 pack_bytes(size, sval, &packed); /* do not include nul */
633 /* this ought to be caught while calculating the length, but
635 PyErr_Format(PyExc_ValueError,
636 "%s: format character '%c' is not supported",
648 static PyMethodDef pytdbpack_methods[] = {
649 { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
650 { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
656 Py_InitModule3("tdbpack", pytdbpack_methods,
657 (char *) pytdbpack_docstring);