source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static int pytdbpack_calc_reqd_len(char *format_str,
  31                                    PyObject *val_seq);
  32
  33 static PyObject *pytdbpack_unpack_item(char,
  34                                        char **pbuf,
  35                                        int *plen);
  36
  37 static PyObject *pytdbpack_pack_data(const char *format_str,
  38                                      PyObject *val_seq,
  39                                      unsigned char *buf);
  40
  41
  42
  43
  44 static PyObject *pytdbpack_bad_type(char ch,
  45                                     const char *expected,
  46                                     PyObject *val_obj);
  47
  48 static const char * pytdbpack_docstring =
  49 "Convert between Python values and Samba binary encodings.
  50
  51 This module is conceptually similar to the standard 'struct' module, but it
  52 uses both a different binary format and a different description string.
  53
  54 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  55 little-endian, unpadded, non-self-describing binary format.  It is intended
  56 that these functions be as similar as possible to the routines in Samba's
  57 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  58
  59 Python strings are used to specify the format of data to be packed or
  60 unpacked.
  61
  62 Strings in TDBs are typically stored in DOS codepages.  The caller of this
  63 module must make appropriate translations if necessary, typically to and from
  64 Unicode objects.
  65
  66 tdbpack format strings:
  67
  68     'f':  NULL-terminated string in DOS codepage
  69
  70     'P':  same as 'f'
  71
  72     'd':  4 byte little-endian number
  73
  74     'w':  2 byte little-endian number
  75
  76     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  77           really just an \"exists\" or \"does not exist\" flag.  The boolean
  78           value of the Python object is used.
  79
  80     'B': 4-byte LE length, followed by that many bytes of binary data.
  81          Corresponds to a Python integer giving the length, followed by a byte
  82          string of the appropriate length.
  83
  84     '$': Special flag indicating that the preceding format code should be
  85          repeated while data remains.  This is only supported for unpacking.
  86
  87     Every code corresponds to a single Python object, except 'B' which
  88     corresponds to two values (length and contents), and '$', which produces
  89     however many make sense.
  90 ";
  91
  92
  93 static char const pytdbpack_pack_doc[] =
  94 "pack(format, values) -> buffer
  95 Pack Python objects into Samba binary format according to format string.
  96
  97 arguments:
  98     format -- string of tdbpack format characters
  99     values -- sequence of value objects corresponding 1:1 to format characters
 100
 101 returns:
 102     buffer -- string containing packed data
 103
 104 raises:
 105     IndexError -- if there are too few values for the format
 106     ValueError -- if any of the format characters is illegal
 107     TypeError  -- if the format is not a string, or values is not a sequence,
 108         or any of the values is of the wrong type for the corresponding
 109         format character
 110
 111 notes:
 112     For historical reasons, it is not an error to pass more values than are consumed
 113     by the format.
 114 ";
 115
 116
 117 static char const pytdbpack_unpack_doc[] =
 118 "unpack(format, buffer) -> (values, rest)
 119 Unpack Samba binary data according to format string.
 120
 121 arguments:
 122     format -- string of tdbpack characters
 123     buffer -- string of packed binary data
 124
 125 returns:
 126     2-tuple of:
 127         values -- sequence of values corresponding 1:1 to format characters
 128         rest -- string containing data that was not decoded, or '' if the
 129             whole string was consumed
 130
 131 raises:
 132     IndexError -- if there is insufficient data in the buffer for the
 133         format (or if the data is corrupt and contains a variable-length
 134         field extending past the end)
 135     ValueError -- if any of the format characters is illegal
 136
 137 notes:
 138     Because unconsumed data is returned, you can feed it back in to the
 139     unpacker to extract further fields.  Alternatively, if you wish to modify
 140     some fields near the start of the data, you may be able to save time by
 141     only unpacking and repacking the necessary part.
 142 ";
 143
 144
 145
 146 /*
 147   Game plan is to first of all walk through the arguments and calculate the
 148   total length that will be required.  We allocate a Python string of that
 149   size, then walk through again and fill it in.
 150
 151   We just borrow references to all the passed arguments, since none of them
 152   need to be permanently stored.  We transfer ownership to the returned
 153   object.
 154  */
 155 static PyObject *
 156 pytdbpack_pack(PyObject *self,
 157                PyObject *args)
 158 {
 159         char *format_str;
 160         PyObject *val_seq, *fast_seq, *buf_str;
 161         int reqd_len;
 162         char *packed_buf;
 163
 164         /* TODO: Test passing wrong types or too many arguments */
 165         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 166                 return NULL;
 167
 168         /* Convert into a list or tuple (if not already one), so that we can
 169          * index more easily. */
 170         fast_seq = PySequence_Fast(val_seq,
 171                                    __FUNCTION__ ": argument 2 must be sequence");
 172         if (!fast_seq)
 173                 return NULL;
 174
 175         reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 176         if (reqd_len == -1)     /* exception was thrown */
 177                 return NULL;
 178
 179         /* Allocate space.
 180
 181            This design causes an unnecessary copying of the data when Python
 182            constructs an object, and that might possibly be avoided by using a
 183            Buffer object of some kind instead.  I'm not doing that for now
 184            though.  */
 185         packed_buf = malloc(reqd_len);
 186         if (!packed_buf) {
 187                 PyErr_Format(PyExc_MemoryError,
 188                              "%s: couldn't allocate %d bytes for packed buffer",
 189                              __FUNCTION__, reqd_len);
 190                 return NULL;
 191         }
 192
 193         if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 194                 free(packed_buf);
 195                 return NULL;
 196         }
 197
 198         buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 199         free(packed_buf);       /* get rid of tmp buf */
 200
 201         return buf_str;
 202 }
 203
 204
 205
 206 static PyObject *
 207 pytdbpack_unpack(PyObject *self,
 208                  PyObject *args)
 209 {
 210         char *format_str, *packed_str, *ppacked;
 211         PyObject *val_list = NULL, *ret_tuple = NULL;
 212         PyObject *rest_string = NULL;
 213         int format_len, packed_len;
 214         int i;
 215         char last_format = '#';
 216
 217         /* get arguments */
 218         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 219                 return NULL;
 220
 221         format_len = strlen(format_str);
 222
 223         /* allocate list to hold results */
 224         val_list = PyList_New(format_len);
 225         if (!val_list)
 226                 goto failed;
 227         ret_tuple = PyTuple_New(2);
 228         if (!ret_tuple)
 229                 goto failed;
 230
 231         /* For every object, unpack.  */
 232         for (ppacked = packed_str, i = 0; i < format_len; i++) {
 233                 PyObject *val_obj;
 234                 char format;
 235
 236                 format = format_str[i];
 237                 if (format == '$') {
 238                         if (i == 0) {
 239                                 PyErr_Format(PyExc_ValueError,
 240                                              "%s: '$' may not be first character in format",
 241                                              __FUNCTION__);
 242                                 goto failed;
 243                         }
 244                         else {
 245                                 format = last_format; /* repeat */
 246                         }
 247                 }
 248
 249                 val_obj = pytdbpack_unpack_item(format,
 250                                                 &ppacked,
 251                                                 &packed_len);
 252                 if (!val_obj)
 253                         goto failed;
 254
 255                 PyList_SET_ITEM(val_list, i, val_obj);
 256                 last_format = format;
 257         }
 258
 259         /* save leftovers for next time */
 260         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 261         if (!rest_string)
 262                 goto failed;
 263
 264         /* return (values, rest) tuple; give up references to them */
 265         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 266         val_list = NULL;
 267         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 268         val_list = NULL;
 269         return ret_tuple;
 270
 271   failed:
 272         /* handle failure: deallocate anything */
 273         Py_XDECREF(val_list);
 274         Py_XDECREF(ret_tuple);
 275         Py_XDECREF(rest_string);
 276         return NULL;
 277 }
 278
 279
 280 /*
 281   Internal routine that calculates how many bytes will be required to
 282   encode the values in the format.
 283
 284   Also checks that the value list is the right size for the format list.
 285
 286   Returns number of bytes (may be 0), or -1 if there's something wrong, in
 287   which case a Python exception has been raised.
 288
 289   Arguments:
 290
 291     val_seq: a Fast Sequence (list or tuple), being all the values
 292 */
 293 static int
 294 pytdbpack_calc_reqd_len(char *format_str,
 295                         PyObject *val_seq)
 296 {
 297         int len = 0;
 298         char *p;
 299         int val_i;
 300         int val_len;
 301
 302         val_len = PySequence_Length(val_seq);
 303         if (val_len == -1)
 304                 return -1;
 305
 306         for (p = format_str, val_i = 0; *p; p++, val_i++) {
 307                 char ch = *p;
 308
 309                 if (val_i >= val_len) {
 310                         PyErr_Format(PyExc_IndexError,
 311                                      "%s: value list is too short for format string",
 312                                      __FUNCTION__);
 313                         return -1;
 314                 }
 315
 316                 /* borrow a reference to the item */
 317                 if (ch == 'd' || ch == 'p')
 318                         len += 4;
 319                 else if (ch == 'w')
 320                         len += 2;
 321                 else if (ch == 'f' || ch == 'P') {
 322                         /* nul-terminated 8-bit string */
 323                         int item_len;
 324                         PyObject *str_obj;
 325
 326                         str_obj = PySequence_GetItem(val_seq, val_i);
 327                         if (!str_obj)
 328                                 return -1;
 329
 330                         item_len = PyString_Size(str_obj);
 331                         if (item_len == -1) {
 332                                 pytdbpack_bad_type(ch, "String", str_obj);
 333                                 return -1;
 334                         }
 335
 336                         len += item_len;
 337                 }
 338                 else if (ch == 'B') {
 339                         /* length-preceded byte buffer: n bytes, plus a preceding
 340                          * word */
 341                         PyObject *len_obj;
 342                         long len_val;
 343
 344                         len_obj = PySequence_GetItem(val_seq, val_i);
 345                         val_i++; /* skip over buffer */
 346
 347                         if (!PyNumber_Check(len_obj)) {
 348                                 pytdbpack_bad_type(ch, "Number", len_obj);
 349                                 return -1;
 350                         }
 351
 352                         len_val = PyInt_AsLong(len_obj);
 353                         if (len_val < 0) {
 354                                 PyErr_Format(PyExc_ValueError,
 355                                              "%s: format 'B' requires positive integer", __FUNCTION__);
 356                                 return -1;
 357                         }
 358
 359                         len += 4 + len_val;
 360                 }
 361                 else {
 362                         PyErr_Format(PyExc_ValueError,
 363                                      "%s: format character '%c' is not supported",
 364                                      __FUNCTION__, ch);
 365
 366                         return -1;
 367                 }
 368         }
 369
 370         return len;
 371 }
 372
 373
 374 static PyObject *pytdbpack_bad_type(char ch,
 375                                     const char *expected,
 376                                     PyObject *val_obj)
 377 {
 378         PyObject *r = PyObject_Repr(val_obj);
 379         if (!r)
 380                 return NULL;
 381         PyErr_Format(PyExc_TypeError,
 382                      "tdbpack: format '%c' requires %s, not %s",
 383                      ch, expected, PyString_AS_STRING(r));
 384         Py_DECREF(r);
 385         return val_obj;
 386 }
 387
 388
 389 /*
 390   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 391   but I don't know of one in plain libc, and it's probably not a big deal.  I
 392   realize this is kind of dumb because we'll almost always be on x86, but
 393   being safe is important.
 394 */
 395 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
 396 {
 397         (*pbuf)[0] =         val_long & 0xff;
 398         (*pbuf)[1] = (val_long >> 8)  & 0xff;
 399         (*pbuf)[2] = (val_long >> 16) & 0xff;
 400         (*pbuf)[3] = (val_long >> 24) & 0xff;
 401         (*pbuf) += 4;
 402 }
 403
 404
 405 static void pack_bytes(long len, const char *from,
 406                        unsigned char **pbuf)
 407 {
 408         memcpy(*pbuf, from, len);
 409         (*pbuf) += len;
 410 }
 411
 412
 413 static void
 414 unpack_err_too_short(void)
 415 {
 416         PyErr_Format(PyExc_IndexError,
 417                      __FUNCTION__ ": data too short for unpack format");
 418 }
 419
 420
 421 static PyObject *
 422 unpack_int32(char **pbuf, int *plen)
 423 {
 424         long v;
 425         unsigned char *b;
 426
 427         if (*plen < 4) {
 428                 unpack_err_too_short();
 429                 return NULL;
 430         }
 431
 432         b = *pbuf;
 433         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 434
 435         (*pbuf) += 4;
 436         (*plen) -= 4;
 437
 438         return PyInt_FromLong(v);
 439 }
 440
 441
 442 static PyObject *unpack_int16(char **pbuf, int *plen)
 443 {
 444         long v;
 445         unsigned char *b;
 446
 447         if (*plen < 2) {
 448                 unpack_err_too_short();
 449                 return NULL;
 450         }
 451
 452         b = *pbuf;
 453         v = b[0] | b[1]<<8;
 454
 455         (*pbuf) += 2;
 456         (*plen) -= 2;
 457
 458         return PyInt_FromLong(v);
 459 }
 460
 461
 462 static PyObject *
 463 unpack_string(char **pbuf, int *plen)
 464 {
 465         int len;
 466         char *nul_ptr, *start;
 467
 468         start = *pbuf;
 469
 470         nul_ptr = memchr(start, '\0', *plen);
 471         if (!nul_ptr) {
 472                 unpack_err_too_short();
 473                 return NULL;
 474         }
 475
 476         len = nul_ptr - start;
 477
 478         *pbuf += len + 1;       /* skip \0 */
 479         *plen -= len + 1;
 480
 481         return PyString_FromStringAndSize(start, len);
 482 }
 483
 484
 485 static PyObject *
 486 unpack_buffer(char **pbuf, int *plen)
 487 {
 488         /* first get 32-bit len */
 489         long slen;
 490         unsigned char *b;
 491         unsigned char *start;
 492
 493         if (*plen < 4) {
 494                 unpack_err_too_short();
 495                 return NULL;
 496         }
 497
 498         b = *pbuf;
 499         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 500
 501         if (slen < 0) { /* surely you jest */
 502                 PyErr_Format(PyExc_ValueError,
 503                              __FUNCTION__ ": buffer seems to have negative length");
 504                 return NULL;
 505         }
 506
 507         (*pbuf) += 4;
 508         (*plen) -= 4;
 509         start = *pbuf;
 510
 511         if (*plen < slen) {
 512                 PyErr_Format(PyExc_IndexError,
 513                              __FUNCTION__ ": not enough data to unpack buffer: "
 514                              "need %d bytes, have %d",
 515                              (int) slen, *plen);
 516                 return NULL;
 517         }
 518
 519         (*pbuf) += slen;
 520         (*plen) -= slen;
 521
 522         return PyString_FromStringAndSize(start, slen);
 523 }
 524
 525
 526 /* Unpack a single field from packed data, according to format character CH.
 527    Remaining data is at *PBUF, of *PLEN.
 528
 529    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 530    been consumed.
 531
 532    Returns a reference to the unpacked Python object, or NULL for failure.
 533 */
 534 static PyObject *pytdbpack_unpack_item(char ch,
 535                                        char **pbuf,
 536                                        int *plen)
 537 {
 538         if (ch == 'w') {        /* 16-bit int */
 539                 return unpack_int16(pbuf, plen);
 540         }
 541         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 542                 /* pointers can just come through as integers */
 543                 return unpack_int32(pbuf, plen);
 544         }
 545         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 546                 return unpack_string(pbuf, plen);
 547         }
 548         else if (ch == 'B') { /* length, buffer */
 549                 return unpack_buffer(pbuf, plen);
 550         }
 551         else {
 552                 PyErr_Format(PyExc_ValueError,
 553                              __FUNCTION__ ": format character '%c' is not supported",
 554                              ch);
 555
 556                 return NULL;
 557         }
 558 }
 559
 560
 561
 562
 563 /*
 564   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 565   PACKED_BUF.
 566
 567   The string has already been checked out, so we know that VAL_SEQ is large
 568   enough to hold the packed data, and that there are enough value items.
 569   (However, their types may not have been thoroughly checked yet.)
 570
 571   In addition, val_seq is a Python Fast sequence.
 572
 573   Returns NULL for error (with exception set), or None.
 574 */
 575 PyObject *
 576 pytdbpack_pack_data(const char *format_str,
 577                     PyObject *val_seq,
 578                     unsigned char *packed)
 579 {
 580         int i;
 581
 582         for (i = 0; format_str[i]; i++) {
 583                 char ch = format_str[i];
 584                 PyObject *val_obj;
 585
 586                 /* borrow a reference to the item */
 587                 val_obj = PySequence_GetItem(val_seq, i);
 588                 if (!val_obj)
 589                         return NULL;
 590
 591                 if (ch == 'w') {
 592                         unsigned long val_long = PyInt_AsLong(val_obj);
 593                         (packed)[0] = val_long & 0xff;
 594                         (packed)[1] = (val_long >> 8) & 0xff;
 595                         (packed) += 2;
 596                 }
 597                 else if (ch == 'd') {
 598                         /* 4-byte LE number */
 599                         pack_int32(PyInt_AsLong(val_obj), &packed);
 600                 }
 601                 else if (ch == 'p') {
 602                         /* "Pointer" value -- in the subset of DCERPC used by Samba,
 603                            this is really just an "exists" or "does not exist"
 604                            flag. */
 605                         pack_int32(PyObject_IsTrue(val_obj), &packed);
 606                 }
 607                 else if (ch == 'f' || ch == 'P') {
 608                         int size;
 609                         char *sval;
 610
 611                         size = PyString_GET_SIZE(val_obj);
 612                         sval = PyString_AS_STRING(val_obj);
 613                         pack_bytes(size+1, sval, &packed); /* include nul */
 614                 }
 615                 else if (ch == 'B') {
 616                         long size;
 617                         char *sval;
 618
 619                         size = PyInt_AsLong(val_obj);
 620                         pack_int32(size, &packed);
 621
 622                         val_obj = PySequence_GetItem(val_seq, ++i);
 623                         if (!val_obj)
 624                                 return NULL;
 625
 626                         sval = PyString_AsString(val_obj);
 627                         if (!sval)
 628                                 return NULL;
 629
 630                         pack_bytes(size, sval, &packed); /* do not include nul */
 631                 }
 632                 else {
 633                         /* this ought to be caught while calculating the length, but
 634                            just in case. */
 635                         PyErr_Format(PyExc_ValueError,
 636                                      "%s: format character '%c' is not supported",
 637                                      __FUNCTION__, ch);
 638
 639                         return NULL;
 640                 }
 641         }
 642
 643         return Py_None;
 644 }
 645
 646
 647
 648 static PyMethodDef pytdbpack_methods[] = {
 649         { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 650         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 651 };
 652
 653 DL_EXPORT(void)
 654 inittdbpack(void)
 655 {
 656         Py_InitModule3("tdbpack", pytdbpack_methods,
 657                        (char *) pytdbpack_docstring);
 658 }