source3/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static int pytdbpack_calc_reqd_len(char *format_str,
  31                                    PyObject *val_seq);
  32
  33 static PyObject *pytdbpack_unpack_item(char,
  34                                       char **pbuf,
  35                                       int *plen);
  36 static int
  37 pytdbpack_calc_item_len(char format_ch,
  38                         PyObject *val_obj);
  39
  40 static PyObject *pytdbpack_pack_data(const char *format_str,
  41                                      PyObject *val_seq,
  42                                      unsigned char *buf);
  43
  44
  45
  46 static const char * pytdbpack_docstring =
  47 "Convert between Python values and Samba binary encodings.
  48
  49 This module is conceptually similar to the standard 'struct' module, but it
  50 uses both a different binary format and a different description string.
  51
  52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  53 little-endian, unpadded, non-self-describing binary format.  It is intended
  54 that these functions be as similar as possible to the routines in Samba's
  55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  56
  57 Python strings are used to specify the format of data to be packed or
  58 unpacked.
  59
  60 Strings in TDBs are typically stored in DOS codepages.  The caller of this
  61 module must make appropriate translations if necessary, typically to and from
  62 Unicode objects.
  63
  64 tdbpack format strings:
  65
  66     'f':  NULL-terminated string in DOS codepage
  67
  68     'P':  same as 'f'
  69
  70     'd':  4 byte little-endian number
  71
  72     'w':  2 byte little-endian number
  73
  74     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  75           really just an \"exists\" or \"does not exist\" flag.  The boolean
  76           value of the Python object is used.
  77
  78     'B': 4-byte LE length, followed by that many bytes of binary data.
  79          Corresponds to a Python byte string of the appropriate length.
  80
  81     '$': Special flag indicating that the preceding format code should be
  82          repeated while data remains.  This is only supported for unpacking.
  83
  84     Every code corresponds to a single Python object, except 'B' which
  85     corresponds to two values (length and contents), and '$', which produces
  86     however many make sense.
  87 ";
  88
  89
  90 static char const pytdbpack_pack_doc[] =
  91 "pack(format, values) -> buffer
  92 Pack Python objects into Samba binary format according to format string.
  93
  94 arguments:
  95     format -- string of tdbpack format characters
  96     values -- sequence of value objects corresponding 1:1 to format characters
  97
  98 returns:
  99     buffer -- string containing packed data
 100
 101 raises:
 102     IndexError -- if there are too few values for the format
 103     ValueError -- if any of the format characters is illegal
 104     TypeError  -- if the format is not a string, or values is not a sequence,
 105         or any of the values is of the wrong type for the corresponding
 106         format character
 107
 108 notes:
 109     For historical reasons, it is not an error to pass more values than are consumed
 110     by the format.
 111 ";
 112
 113
 114 static char const pytdbpack_unpack_doc[] =
 115 "unpack(format, buffer) -> (values, rest)
 116 Unpack Samba binary data according to format string.
 117
 118 arguments:
 119     format -- string of tdbpack characters
 120     buffer -- string of packed binary data
 121
 122 returns:
 123     2-tuple of:
 124         values -- sequence of values corresponding 1:1 to format characters
 125         rest -- string containing data that was not decoded, or '' if the
 126             whole string was consumed
 127
 128 raises:
 129     IndexError -- if there is insufficient data in the buffer for the
 130         format (or if the data is corrupt and contains a variable-length
 131         field extending past the end)
 132     ValueError -- if any of the format characters is illegal
 133
 134 notes:
 135     Because unconsumed data is returned, you can feed it back in to the
 136     unpacker to extract further fields.  Alternatively, if you wish to modify
 137     some fields near the start of the data, you may be able to save time by
 138     only unpacking and repacking the necessary part.
 139 ";
 140
 141
 142
 143 /*
 144   Game plan is to first of all walk through the arguments and calculate the
 145   total length that will be required.  We allocate a Python string of that
 146   size, then walk through again and fill it in.
 147
 148   We just borrow references to all the passed arguments, since none of them
 149   need to be permanently stored.  We transfer ownership to the returned
 150   object.
 151  */
 152 static PyObject *
 153 pytdbpack_pack(PyObject *self,
 154                PyObject *args)
 155 {
 156         char *format_str;
 157         PyObject *val_seq, *fast_seq, *buf_str;
 158         int reqd_len;
 159         char *packed_buf;
 160
 161         /* TODO: Test passing wrong types or too many arguments */
 162         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 163                 return NULL;
 164
 165         /* Convert into a list or tuple (if not already one), so that we can
 166          * index more easily. */
 167         fast_seq = PySequence_Fast(val_seq,
 168                                    __FUNCTION__ ": argument 2 must be sequence");
 169         if (!fast_seq)
 170                 return NULL;
 171
 172         reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 173         if (reqd_len == -1)     /* exception was thrown */
 174                 return NULL;
 175
 176         /* Allocate space.
 177
 178            This design causes an unnecessary copying of the data when Python
 179            constructs an object, and that might possibly be avoided by using a
 180            Buffer object of some kind instead.  I'm not doing that for now
 181            though.  */
 182         packed_buf = malloc(reqd_len);
 183         if (!packed_buf) {
 184                 PyErr_Format(PyExc_MemoryError,
 185                              "%s: couldn't allocate %d bytes for packed buffer",
 186                              __FUNCTION__, reqd_len);
 187                 return NULL;
 188         }
 189
 190         if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 191                 free(packed_buf);
 192                 return NULL;
 193         }
 194
 195         buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 196         free(packed_buf);       /* get rid of tmp buf */
 197
 198         return buf_str;
 199 }
 200
 201
 202
 203 static PyObject *
 204 pytdbpack_unpack(PyObject *self,
 205                  PyObject *args)
 206 {
 207         char *format_str, *packed_str, *ppacked;
 208         PyObject *val_list = NULL, *ret_tuple = NULL;
 209         PyObject *rest_string = NULL;
 210         int format_len, packed_len;
 211         int i;
 212         char last_format = '#';
 213
 214         /* get arguments */
 215         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 216                 return NULL;
 217
 218         format_len = strlen(format_str);
 219
 220         /* allocate list to hold results */
 221         val_list = PyList_New(format_len);
 222         if (!val_list)
 223                 goto failed;
 224         ret_tuple = PyTuple_New(2);
 225         if (!ret_tuple)
 226                 goto failed;
 227
 228         /* For every object, unpack.  */
 229         for (ppacked = packed_str, i = 0; i < format_len; i++) {
 230                 PyObject *val_obj;
 231                 char format;
 232
 233                 format = format_str[i];
 234                 if (format == '$') {
 235                         if (i == 0) {
 236                                 PyErr_Format(PyExc_ValueError,
 237                                              "%s: '$' may not be first character in format",
 238                                              __FUNCTION__);
 239                                 goto failed;
 240                         }
 241                         else {
 242                                 format = last_format; /* repeat */
 243                         }
 244                 }
 245
 246                 val_obj = pytdbpack_unpack_item(format,
 247                                                 &ppacked,
 248                                                 &packed_len);
 249                 if (!val_obj)
 250                         goto failed;
 251
 252                 PyList_SET_ITEM(val_list, i, val_obj);
 253                 last_format = format;
 254         }
 255
 256         /* put leftovers in box for lunch tomorrow */
 257         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 258         if (!rest_string)
 259                 goto failed;
 260
 261         /* return (values, rest) tuple; give up references to them */
 262         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 263         val_list = NULL;
 264         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 265         val_list = NULL;
 266         return ret_tuple;
 267
 268   failed:
 269         /* handle failure: deallocate anything */
 270         Py_XDECREF(val_list);
 271         Py_XDECREF(ret_tuple);
 272         Py_XDECREF(rest_string);
 273         return NULL;
 274 }
 275
 276
 277 /*
 278   Internal routine that calculates how many bytes will be required to
 279   encode the values in the format.
 280
 281   Also checks that the value list is the right size for the format list.
 282
 283   Returns number of bytes (may be 0), or -1 if there's something wrong, in
 284   which case a Python exception has been raised.
 285
 286   Arguments:
 287
 288     val_seq: a Fast Sequence (list or tuple), being all the values
 289 */
 290 static int
 291 pytdbpack_calc_reqd_len(char *format_str,
 292                         PyObject *val_seq)
 293 {
 294         int len = 0;
 295         char *p;
 296         int val_i;
 297         int val_len;
 298
 299         val_len = PySequence_Length(val_seq);
 300         if (val_len == -1)
 301                 return -1;
 302
 303         for (p = format_str, val_i = 0; *p; p++, val_i++) {
 304                 char ch = *p;
 305                 PyObject *val_obj;
 306                 int item_len;
 307
 308                 if (val_i >= val_len) {
 309                         PyErr_Format(PyExc_IndexError,
 310                                      "%s: value list is too short for format string",
 311                                      __FUNCTION__);
 312                         return -1;
 313                 }
 314
 315                 /* borrow a reference to the item */
 316                 val_obj = PySequence_GetItem(val_seq, val_i);
 317                 if (!val_obj)
 318                         return -1;
 319
 320                 item_len = pytdbpack_calc_item_len(ch, val_obj);
 321                 if (item_len == -1)
 322                         return -1;
 323                 else
 324                         len += item_len;
 325         }
 326
 327         return len;
 328 }
 329
 330
 331 static PyObject *pytdbpack_bad_type(char ch,
 332                                     const char *expected,
 333                                     PyObject *val_obj)
 334 {
 335         PyObject *r = PyObject_Repr(val_obj);
 336         if (!r)
 337                 return NULL;
 338         PyErr_Format(PyExc_TypeError,
 339                      "tdbpack: format '%c' requires %s, not %s",
 340                      ch, expected, PyString_AS_STRING(r));
 341         Py_DECREF(r);
 342         return val_obj;
 343 }
 344
 345
 346 /*
 347  * Calculate the number of bytes required to pack a single value.  While doing
 348  * this, also conduct some initial checks that the argument types are
 349  * reasonable.
 350  *
 351  * Returns -1 on exception.
 352  */
 353 static int
 354 pytdbpack_calc_item_len(char ch,
 355                         PyObject *val_obj)
 356 {
 357         if (ch == 'd' || ch == 'w') {
 358                 if (!PyInt_Check(val_obj)) {
 359                         pytdbpack_bad_type(ch, "Int", val_obj);
 360                         return -1;
 361                 }
 362                 if (ch == 'w')
 363                         return 2;
 364                 else
 365                         return 4;
 366         } else if (ch == 'p') {
 367                 return 4;
 368         }
 369         else if (ch == 'f' || ch == 'P' || ch == 'B') {
 370                 /* nul-terminated 8-bit string */
 371                 if (!PyString_Check(val_obj)) {
 372                         pytdbpack_bad_type(ch, "String", val_obj);
 373                         return -1;
 374                 }
 375
 376                 if (ch == 'B') {
 377                         /* byte buffer; just use Python string's length, plus
 378                            a preceding word */
 379                         return 4 + PyString_GET_SIZE(val_obj);
 380                 }
 381                 else {
 382                         /* one nul character */
 383                         return 1 + PyString_GET_SIZE(val_obj);
 384                 }
 385         }
 386         else {
 387                 PyErr_Format(PyExc_ValueError,
 388                              "tdbpack: format character '%c' is not supported",
 389                              ch);
 390
 391                 return -1;
 392         }
 393 }
 394
 395
 396 /*
 397   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 398   but I don't know of one in plain libc, and it's probably not a big deal.  I
 399   realize this is kind of dumb because we'll almost always be on x86, but
 400   being safe is important.
 401 */
 402 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
 403 {
 404         (*pbuf)[0] =         val_long & 0xff;
 405         (*pbuf)[1] = (val_long >> 8)  & 0xff;
 406         (*pbuf)[2] = (val_long >> 16) & 0xff;
 407         (*pbuf)[3] = (val_long >> 24) & 0xff;
 408         (*pbuf) += 4;
 409 }
 410
 411
 412 static void pack_bytes(long len, const char *from,
 413                        unsigned char **pbuf)
 414 {
 415         memcpy(*pbuf, from, len);
 416         (*pbuf) += len;
 417 }
 418
 419
 420 static void
 421 unpack_err_too_short(void)
 422 {
 423         PyErr_Format(PyExc_IndexError,
 424                      __FUNCTION__ ": data too short for unpack format");
 425 }
 426
 427
 428 static PyObject *
 429 unpack_int32(char **pbuf, int *plen)
 430 {
 431         long v;
 432         unsigned char *b;
 433
 434         if (*plen < 4) {
 435                 unpack_err_too_short();
 436                 return NULL;
 437         }
 438
 439         b = *pbuf;
 440         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 441
 442         (*pbuf) += 4;
 443         (*plen) -= 4;
 444
 445         return PyInt_FromLong(v);
 446 }
 447
 448
 449 static PyObject *unpack_int16(char **pbuf, int *plen)
 450 {
 451         long v;
 452         unsigned char *b;
 453
 454         if (*plen < 2) {
 455                 unpack_err_too_short();
 456                 return NULL;
 457         }
 458
 459         b = *pbuf;
 460         v = b[0] | b[1]<<8;
 461
 462         (*pbuf) += 2;
 463         (*plen) -= 2;
 464
 465         return PyInt_FromLong(v);
 466 }
 467
 468
 469 static PyObject *
 470 unpack_string(char **pbuf, int *plen)
 471 {
 472         int len;
 473         char *nul_ptr, *start;
 474
 475         start = *pbuf;
 476
 477         nul_ptr = memchr(start, '\0', *plen);
 478         if (!nul_ptr) {
 479                 unpack_err_too_short();
 480                 return NULL;
 481         }
 482
 483         len = nul_ptr - start;
 484
 485         *pbuf += len + 1;       /* skip \0 */
 486         *plen -= len + 1;
 487
 488         return PyString_FromStringAndSize(start, len);
 489 }
 490
 491
 492 static PyObject *
 493 unpack_buffer(char **pbuf, int *plen)
 494 {
 495         /* first get 32-bit len */
 496         long slen;
 497         unsigned char *b;
 498         unsigned char *start;
 499
 500         if (*plen < 4) {
 501                 unpack_err_too_short();
 502                 return NULL;
 503         }
 504
 505         b = *pbuf;
 506         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 507
 508         if (slen < 0) { /* surely you jest */
 509                 PyErr_Format(PyExc_ValueError,
 510                              __FUNCTION__ ": buffer seems to have negative length");
 511                 return NULL;
 512         }
 513
 514         (*pbuf) += 4;
 515         (*plen) -= 4;
 516         start = *pbuf;
 517
 518         if (*plen < slen) {
 519                 PyErr_Format(PyExc_IndexError,
 520                              __FUNCTION__ ": not enough data to unpack buffer: "
 521                              "need %d bytes, have %d",
 522                              (int) slen, *plen);
 523                 return NULL;
 524         }
 525
 526         (*pbuf) += slen;
 527         (*plen) -= slen;
 528
 529         return PyString_FromStringAndSize(start, slen);
 530 }
 531
 532
 533 /* Unpack a single field from packed data, according to format character CH.
 534    Remaining data is at *PBUF, of *PLEN.
 535
 536    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 537    been consumed.
 538
 539    Returns a reference to the unpacked Python object, or NULL for failure.
 540 */
 541 static PyObject *pytdbpack_unpack_item(char ch,
 542                                        char **pbuf,
 543                                        int *plen)
 544 {
 545         if (ch == 'w') {        /* 16-bit int */
 546                 return unpack_int16(pbuf, plen);
 547         }
 548         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 549                 /* pointers can just come through as integers */
 550                 return unpack_int32(pbuf, plen);
 551         }
 552         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 553                 return unpack_string(pbuf, plen);
 554         }
 555         else if (ch == 'B') { /* length, buffer */
 556                 return unpack_buffer(pbuf, plen);
 557         }
 558         else {
 559                 PyErr_Format(PyExc_ValueError,
 560                              __FUNCTION__ ": format character '%c' is not supported",
 561                              ch);
 562
 563                 return NULL;
 564         }
 565 }
 566
 567
 568
 569 /*
 570   Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
 571   and advance the pointer.  Buffer length has been pre-calculated so we are
 572   sure that there is enough space.
 573
 574 */
 575 static PyObject *
 576 pytdbpack_pack_item(char ch,
 577                     PyObject *val_obj,
 578                     unsigned char **pbuf)
 579 {
 580         if (ch == 'w') {
 581                 unsigned long val_long = PyInt_AsLong(val_obj);
 582                 (*pbuf)[0] = val_long & 0xff;
 583                 (*pbuf)[1] = (val_long >> 8) & 0xff;
 584                 (*pbuf) += 2;
 585         }
 586         else if (ch == 'd') {
 587                 /* 4-byte LE number */
 588                 pack_int32(PyInt_AsLong(val_obj), pbuf);
 589         }
 590         else if (ch == 'p') {
 591                 /* "Pointer" value -- in the subset of DCERPC used by Samba,
 592                    this is really just an "exists" or "does not exist"
 593                    flag. */
 594                 pack_int32(PyObject_IsTrue(val_obj), pbuf);
 595         }
 596         else if (ch == 'f' || ch == 'P') {
 597                 int size;
 598                 char *sval;
 599
 600                 size = PyString_GET_SIZE(val_obj);
 601                 sval = PyString_AS_STRING(val_obj);
 602                 pack_bytes(size+1, sval, pbuf); /* include nul */
 603         }
 604         else if (ch == 'B') {
 605                 int size;
 606                 char *sval;
 607
 608                 size = PyString_GET_SIZE(val_obj);
 609                 pack_int32(size, pbuf);
 610                 sval = PyString_AS_STRING(val_obj);
 611                 pack_bytes(size, sval, pbuf); /* do not include nul */
 612         }
 613         else {
 614                 /* this ought to be caught while calculating the length, but
 615                    just in case. */
 616                 PyErr_Format(PyExc_ValueError,
 617                              "%s: format character '%c' is not supported",
 618                              __FUNCTION__, ch);
 619
 620                 return NULL;
 621         }
 622
 623         return Py_None;
 624 }
 625
 626
 627 /*
 628   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 629   PACKED_BUF.
 630
 631   The string has already been checked out, so we know that VAL_SEQ is large
 632   enough to hold the packed data, and that there are enough value items.
 633   (However, their types may not have been thoroughly checked yet.)
 634
 635   In addition, val_seq is a Python Fast sequence.
 636
 637   Returns NULL for error (with exception set), or None.
 638 */
 639 PyObject *
 640 pytdbpack_pack_data(const char *format_str,
 641                     PyObject *val_seq,
 642                     unsigned char *packed_buf)
 643 {
 644         int i;
 645
 646         for (i = 0; format_str[i]; i++) {
 647                 char ch = format_str[i];
 648                 PyObject *val_obj;
 649
 650                 /* borrow a reference to the item */
 651                 val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
 652                 if (!val_obj)
 653                         return NULL;
 654
 655                 if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
 656                         return NULL;
 657         }
 658
 659         return Py_None;
 660 }
 661
 662
 663
 664
 665
 666 static PyMethodDef pytdbpack_methods[] = {
 667         { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 668         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 669 };
 670
 671 DL_EXPORT(void)
 672 inittdbpack(void)
 673 {
 674         Py_InitModule3("tdbpack", pytdbpack_methods,
 675                        (char *) pytdbpack_docstring);
 676 }