source3/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static int pytdbpack_calc_reqd_len(char *format_str,
  31                                    PyObject *val_seq);
  32
  33 static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
  34
  35 static PyObject *pytdbpack_pack_data(const char *format_str,
  36                                      PyObject *val_seq,
  37                                      unsigned char *buf);
  38
  39
  40
  41
  42 static PyObject *pytdbpack_bad_type(char ch,
  43                                     const char *expected,
  44                                     PyObject *val_obj);
  45
  46 static const char * pytdbpack_docstring =
  47 "Convert between Python values and Samba binary encodings.
  48
  49 This module is conceptually similar to the standard 'struct' module, but it
  50 uses both a different binary format and a different description string.
  51
  52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  53 little-endian, unpadded, non-self-describing binary format.  It is intended
  54 that these functions be as similar as possible to the routines in Samba's
  55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  56
  57 Python strings are used to specify the format of data to be packed or
  58 unpacked.
  59
  60 Strings in TDBs are typically stored in DOS codepages.  The caller of this
  61 module must make appropriate translations if necessary, typically to and from
  62 Unicode objects.
  63
  64 tdbpack format strings:
  65
  66     'f':  NULL-terminated string in DOS codepage
  67
  68     'P':  same as 'f'
  69
  70     'd':  4 byte little-endian unsigned number
  71
  72     'w':  2 byte little-endian unsigned number
  73
  74     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  75           really just an \"exists\" or \"does not exist\" flag.  The boolean
  76           value of the Python object is used.
  77
  78     'B': 4-byte LE length, followed by that many bytes of binary data.
  79          Corresponds to a Python integer giving the length, followed by a byte
  80          string of the appropriate length.
  81
  82     '$': Special flag indicating that the preceding format code should be
  83          repeated while data remains.  This is only supported for unpacking.
  84
  85     Every code corresponds to a single Python object, except 'B' which
  86     corresponds to two values (length and contents), and '$', which produces
  87     however many make sense.
  88 ";
  89
  90
  91 static char const pytdbpack_pack_doc[] =
  92 "pack(format, values) -> buffer
  93 Pack Python objects into Samba binary format according to format string.
  94
  95 arguments:
  96     format -- string of tdbpack format characters
  97     values -- sequence of value objects corresponding 1:1 to format characters
  98
  99 returns:
 100     buffer -- string containing packed data
 101
 102 raises:
 103     IndexError -- if there are too few values for the format
 104     ValueError -- if any of the format characters is illegal
 105     TypeError  -- if the format is not a string, or values is not a sequence,
 106         or any of the values is of the wrong type for the corresponding
 107         format character
 108
 109 notes:
 110     For historical reasons, it is not an error to pass more values than are consumed
 111     by the format.
 112 ";
 113
 114
 115 static char const pytdbpack_unpack_doc[] =
 116 "unpack(format, buffer) -> (values, rest)
 117 Unpack Samba binary data according to format string.
 118
 119 arguments:
 120     format -- string of tdbpack characters
 121     buffer -- string of packed binary data
 122
 123 returns:
 124     2-tuple of:
 125         values -- sequence of values corresponding 1:1 to format characters
 126         rest -- string containing data that was not decoded, or '' if the
 127             whole string was consumed
 128
 129 raises:
 130     IndexError -- if there is insufficient data in the buffer for the
 131         format (or if the data is corrupt and contains a variable-length
 132         field extending past the end)
 133     ValueError -- if any of the format characters is illegal
 134
 135 notes:
 136     Because unconsumed data is returned, you can feed it back in to the
 137     unpacker to extract further fields.  Alternatively, if you wish to modify
 138     some fields near the start of the data, you may be able to save time by
 139     only unpacking and repacking the necessary part.
 140 ";
 141
 142
 143
 144 /*
 145   Game plan is to first of all walk through the arguments and calculate the
 146   total length that will be required.  We allocate a Python string of that
 147   size, then walk through again and fill it in.
 148
 149   We just borrow references to all the passed arguments, since none of them
 150   need to be permanently stored.  We transfer ownership to the returned
 151   object.
 152  */
 153 static PyObject *
 154 pytdbpack_pack(PyObject *self,
 155                PyObject *args)
 156 {
 157         char *format_str;
 158         PyObject *val_seq, *fast_seq, *buf_str;
 159         int reqd_len;
 160         char *packed_buf;
 161
 162         /* TODO: Test passing wrong types or too many arguments */
 163         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 164                 return NULL;
 165
 166         /* Convert into a list or tuple (if not already one), so that we can
 167          * index more easily. */
 168         fast_seq = PySequence_Fast(val_seq,
 169                                    __FUNCTION__ ": argument 2 must be sequence");
 170         if (!fast_seq)
 171                 return NULL;
 172
 173         reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 174         if (reqd_len == -1)     /* exception was thrown */
 175                 return NULL;
 176
 177         /* Allocate space.
 178
 179            This design causes an unnecessary copying of the data when Python
 180            constructs an object, and that might possibly be avoided by using a
 181            Buffer object of some kind instead.  I'm not doing that for now
 182            though.  */
 183         packed_buf = malloc(reqd_len);
 184         if (!packed_buf) {
 185                 PyErr_Format(PyExc_MemoryError,
 186                              "%s: couldn't allocate %d bytes for packed buffer",
 187                              __FUNCTION__, reqd_len);
 188                 return NULL;
 189         }
 190
 191         if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 192                 free(packed_buf);
 193                 return NULL;
 194         }
 195
 196         buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 197         free(packed_buf);       /* get rid of tmp buf */
 198
 199         return buf_str;
 200 }
 201
 202
 203
 204 static PyObject *
 205 pytdbpack_unpack(PyObject *self,
 206                  PyObject *args)
 207 {
 208         char *format_str, *packed_str, *ppacked;
 209         PyObject *val_list = NULL, *ret_tuple = NULL;
 210         PyObject *rest_string = NULL;
 211         int format_len, packed_len;
 212         int i;
 213         char last_format = '#';
 214
 215         /* get arguments */
 216         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 217                 return NULL;
 218
 219         format_len = strlen(format_str);
 220
 221         /* Allocate list to hold results.  Initially empty, and we append
 222            results as we go along. */
 223         val_list = PyList_New(0);
 224         if (!val_list)
 225                 goto failed;
 226         ret_tuple = PyTuple_New(2);
 227         if (!ret_tuple)
 228                 goto failed;
 229
 230         /* For every object, unpack.  */
 231         for (ppacked = packed_str, i = 0; i < format_len; i++) {
 232                 char format;
 233
 234                 format = format_str[i];
 235                 if (format == '$') {
 236                         if (i == 0) {
 237                                 PyErr_Format(PyExc_ValueError,
 238                                              "%s: '$' may not be first character in format",
 239                                              __FUNCTION__);
 240                                 goto failed;
 241                         }
 242                         else {
 243                                 format = last_format; /* repeat */
 244                         }
 245                 }
 246
 247                 if (!pytdbpack_unpack_item(format, &ppacked, &packed_len, val_list))
 248                         goto failed;
 249
 250                 last_format = format;
 251         }
 252
 253         /* save leftovers for next time */
 254         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 255         if (!rest_string)
 256                 goto failed;
 257
 258         /* return (values, rest) tuple; give up references to them */
 259         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 260         val_list = NULL;
 261         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 262         val_list = NULL;
 263         return ret_tuple;
 264
 265   failed:
 266         /* handle failure: deallocate anything.  XDECREF forms handle NULL
 267            pointers for objects that haven't been allocated yet. */
 268         Py_XDECREF(val_list);
 269         Py_XDECREF(ret_tuple);
 270         Py_XDECREF(rest_string);
 271         return NULL;
 272 }
 273
 274
 275 /*
 276   Internal routine that calculates how many bytes will be required to
 277   encode the values in the format.
 278
 279   Also checks that the value list is the right size for the format list.
 280
 281   Returns number of bytes (may be 0), or -1 if there's something wrong, in
 282   which case a Python exception has been raised.
 283
 284   Arguments:
 285
 286     val_seq: a Fast Sequence (list or tuple), being all the values
 287 */
 288 static int
 289 pytdbpack_calc_reqd_len(char *format_str,
 290                         PyObject *val_seq)
 291 {
 292         int len = 0;
 293         char *p;
 294         int val_i;
 295         int val_len;
 296
 297         val_len = PySequence_Length(val_seq);
 298         if (val_len == -1)
 299                 return -1;
 300
 301         for (p = format_str, val_i = 0; *p; p++, val_i++) {
 302                 char ch = *p;
 303
 304                 if (val_i >= val_len) {
 305                         PyErr_Format(PyExc_IndexError,
 306                                      "%s: value list is too short for format string",
 307                                      __FUNCTION__);
 308                         return -1;
 309                 }
 310
 311                 /* borrow a reference to the item */
 312                 if (ch == 'd' || ch == 'p')
 313                         len += 4;
 314                 else if (ch == 'w')
 315                         len += 2;
 316                 else if (ch == 'f' || ch == 'P') {
 317                         /* nul-terminated 8-bit string */
 318                         int item_len;
 319                         PyObject *str_obj;
 320
 321                         str_obj = PySequence_GetItem(val_seq, val_i);
 322                         if (!str_obj)
 323                                 return -1;
 324
 325                         if (!PyString_Check(str_obj) || ((item_len = PyString_Size(str_obj)) == -1)) {
 326                                 pytdbpack_bad_type(ch, "String", str_obj);
 327                                 return -1;
 328                         }
 329
 330                         len += 1 + item_len;
 331                 }
 332                 else if (ch == 'B') {
 333                         /* length-preceded byte buffer: n bytes, plus a preceding
 334                          * word */
 335                         PyObject *len_obj;
 336                         long len_val;
 337
 338                         len_obj = PySequence_GetItem(val_seq, val_i);
 339                         val_i++; /* skip over buffer */
 340
 341                         if (!PyNumber_Check(len_obj)) {
 342                                 pytdbpack_bad_type(ch, "Number", len_obj);
 343                                 return -1;
 344                         }
 345
 346                         len_val = PyInt_AsLong(len_obj);
 347                         if (len_val < 0) {
 348                                 PyErr_Format(PyExc_ValueError,
 349                                              "%s: format 'B' requires positive integer", __FUNCTION__);
 350                                 return -1;
 351                         }
 352
 353                         len += 4 + len_val;
 354                 }
 355                 else {
 356                         PyErr_Format(PyExc_ValueError,
 357                                      "%s: format character '%c' is not supported",
 358                                      __FUNCTION__, ch);
 359
 360                         return -1;
 361                 }
 362         }
 363
 364         return len;
 365 }
 366
 367
 368 static PyObject *pytdbpack_bad_type(char ch,
 369                                     const char *expected,
 370                                     PyObject *val_obj)
 371 {
 372         PyObject *r = PyObject_Repr(val_obj);
 373         if (!r)
 374                 return NULL;
 375         PyErr_Format(PyExc_TypeError,
 376                      "tdbpack: format '%c' requires %s, not %s",
 377                      ch, expected, PyString_AS_STRING(r));
 378         Py_DECREF(r);
 379         return val_obj;
 380 }
 381
 382
 383 /*
 384   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 385   but I don't know of one in plain libc, and it's probably not a big deal.  I
 386   realize this is kind of dumb because we'll almost always be on x86, but
 387   being safe is important.
 388 */
 389 static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
 390 {
 391         (*pbuf)[0] =         val_long & 0xff;
 392         (*pbuf)[1] = (val_long >> 8)  & 0xff;
 393         (*pbuf)[2] = (val_long >> 16) & 0xff;
 394         (*pbuf)[3] = (val_long >> 24) & 0xff;
 395         (*pbuf) += 4;
 396 }
 397
 398
 399 static void pack_bytes(long len, const char *from,
 400                        unsigned char **pbuf)
 401 {
 402         memcpy(*pbuf, from, len);
 403         (*pbuf) += len;
 404 }
 405
 406
 407 static void
 408 unpack_err_too_short(void)
 409 {
 410         PyErr_Format(PyExc_IndexError,
 411                      __FUNCTION__ ": data too short for unpack format");
 412 }
 413
 414
 415 static PyObject *
 416 unpack_uint32(char **pbuf, int *plen)
 417 {
 418         unsigned long v;
 419         unsigned char *b;
 420
 421         if (*plen < 4) {
 422                 unpack_err_too_short();
 423                 return NULL;
 424         }
 425
 426         b = *pbuf;
 427         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 428
 429         (*pbuf) += 4;
 430         (*plen) -= 4;
 431
 432         return PyLong_FromUnsignedLong(v);
 433 }
 434
 435
 436 static PyObject *unpack_int16(char **pbuf, int *plen)
 437 {
 438         long v;
 439         unsigned char *b;
 440
 441         if (*plen < 2) {
 442                 unpack_err_too_short();
 443                 return NULL;
 444         }
 445
 446         b = *pbuf;
 447         v = b[0] | b[1]<<8;
 448
 449         (*pbuf) += 2;
 450         (*plen) -= 2;
 451
 452         return PyInt_FromLong(v);
 453 }
 454
 455
 456 static PyObject *
 457 unpack_string(char **pbuf, int *plen)
 458 {
 459         int len;
 460         char *nul_ptr, *start;
 461
 462         start = *pbuf;
 463
 464         nul_ptr = memchr(start, '\0', *plen);
 465         if (!nul_ptr) {
 466                 unpack_err_too_short();
 467                 return NULL;
 468         }
 469
 470         len = nul_ptr - start;
 471
 472         *pbuf += len + 1;       /* skip \0 */
 473         *plen -= len + 1;
 474
 475         return PyString_FromStringAndSize(start, len);
 476 }
 477
 478
 479 static PyObject *
 480 unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
 481 {
 482         /* first get 32-bit len */
 483         long slen;
 484         unsigned char *b;
 485         unsigned char *start;
 486         PyObject *str_obj = NULL, *len_obj = NULL;
 487
 488         if (*plen < 4) {
 489                 unpack_err_too_short();
 490                 return NULL;
 491         }
 492
 493         b = *pbuf;
 494         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 495
 496         if (slen < 0) { /* surely you jest */
 497                 PyErr_Format(PyExc_ValueError,
 498                              __FUNCTION__ ": buffer seems to have negative length");
 499                 return NULL;
 500         }
 501
 502         (*pbuf) += 4;
 503         (*plen) -= 4;
 504         start = *pbuf;
 505
 506         if (*plen < slen) {
 507                 PyErr_Format(PyExc_IndexError,
 508                              __FUNCTION__ ": not enough data to unpack buffer: "
 509                              "need %d bytes, have %d",
 510                              (int) slen, *plen);
 511                 return NULL;
 512         }
 513
 514         (*pbuf) += slen;
 515         (*plen) -= slen;
 516
 517         if (!(len_obj = PyInt_FromLong(slen)))
 518                 goto failed;
 519
 520         if (PyList_Append(val_list, len_obj) == -1)
 521                 goto failed;
 522
 523         if (!(str_obj = PyString_FromStringAndSize(start, slen)))
 524                 goto failed;
 525
 526         if (PyList_Append(val_list, str_obj) == -1)
 527                 goto failed;
 528
 529         return val_list;
 530
 531   failed:
 532         Py_XDECREF(len_obj);    /* handles NULL */
 533         Py_XDECREF(str_obj);
 534         return NULL;
 535 }
 536
 537
 538 /* Unpack a single field from packed data, according to format character CH.
 539    Remaining data is at *PBUF, of *PLEN.
 540
 541    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 542    been consumed.
 543
 544    Returns a reference to None, or NULL for failure.
 545 */
 546 static PyObject *pytdbpack_unpack_item(char ch,
 547                                        char **pbuf,
 548                                        int *plen,
 549                                        PyObject *val_list)
 550 {
 551         PyObject *result;
 552
 553         if (ch == 'w') {        /* 16-bit int */
 554                 result = unpack_int16(pbuf, plen);
 555         }
 556         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 557                 /* pointers can just come through as integers */
 558                 result = unpack_uint32(pbuf, plen);
 559         }
 560         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 561                 result = unpack_string(pbuf, plen);
 562         }
 563         else if (ch == 'B') { /* length, buffer */
 564                 return unpack_buffer(pbuf, plen, val_list);
 565         }
 566         else {
 567                 PyErr_Format(PyExc_ValueError,
 568                              __FUNCTION__ ": format character '%c' is not supported",
 569                              ch);
 570
 571                 return NULL;
 572         }
 573
 574         /* otherwise OK */
 575         if (!result)
 576                 return NULL;
 577         if (PyList_Append(val_list, result) == -1)
 578                 return NULL;
 579
 580         return val_list;
 581 }
 582
 583
 584
 585
 586 /*
 587   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 588   PACKED_BUF.
 589
 590   The string has already been checked out, so we know that VAL_SEQ is large
 591   enough to hold the packed data, and that there are enough value items.
 592   (However, their types may not have been thoroughly checked yet.)
 593
 594   In addition, val_seq is a Python Fast sequence.
 595
 596   Returns NULL for error (with exception set), or None.
 597 */
 598 PyObject *
 599 pytdbpack_pack_data(const char *format_str,
 600                     PyObject *val_seq,
 601                     unsigned char *packed)
 602 {
 603         int format_i, val_i = 0;
 604
 605         for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
 606                 char ch = format_str[format_i];
 607                 PyObject *val_obj;
 608
 609                 /* borrow a reference to the item */
 610                 val_obj = PySequence_GetItem(val_seq, val_i++);
 611                 if (!val_obj)
 612                         return NULL;
 613
 614                 if (ch == 'w') {
 615                         unsigned long val_long;
 616                         PyObject *long_obj;
 617
 618                         if (!(long_obj = PyNumber_Long(val_obj))) {
 619                                 pytdbpack_bad_type(ch, "Long", val_obj);
 620                                 return NULL;
 621                         }
 622
 623                         val_long = PyLong_AsUnsignedLong(long_obj);
 624                         (packed)[0] = val_long & 0xff;
 625                         (packed)[1] = (val_long >> 8) & 0xff;
 626                         (packed) += 2;
 627                         Py_DECREF(long_obj);
 628                 }
 629                 else if (ch == 'd') {
 630                         /* 4-byte LE number */
 631                         PyObject *long_obj;
 632
 633                         if (!(long_obj = PyNumber_Long(val_obj))) {
 634                                 pytdbpack_bad_type(ch, "Long", val_obj);
 635                                 return NULL;
 636                         }
 637
 638                         pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
 639
 640                         Py_DECREF(long_obj);
 641                 }
 642                 else if (ch == 'p') {
 643                         /* "Pointer" value -- in the subset of DCERPC used by Samba,
 644                            this is really just an "exists" or "does not exist"
 645                            flag. */
 646                         pack_uint32(PyObject_IsTrue(val_obj), &packed);
 647                 }
 648                 else if (ch == 'f' || ch == 'P') {
 649                         int size;
 650                         char *sval;
 651
 652                         size = PySequence_Length(val_obj);
 653                         if (size < 0)
 654                                 return NULL;
 655                         sval = PyString_AsString(val_obj);
 656                         if (!sval)
 657                                 return NULL;
 658                         pack_bytes(size+1, sval, &packed); /* include nul */
 659                 }
 660                 else if (ch == 'B') {
 661                         long size;
 662                         char *sval;
 663
 664                         if (!PyInt_Check(val_obj)) {
 665                                 pytdbpack_bad_type(ch, "Integer", val_obj);
 666                                 return NULL;
 667                         }
 668
 669                         size = PyInt_AsLong(val_obj);
 670                         pack_uint32(size, &packed);
 671
 672                         val_obj = PySequence_GetItem(val_seq, val_i++);
 673                         if (!val_obj)
 674                                 return NULL;
 675
 676                         sval = PyString_AsString(val_obj);
 677                         if (!sval)
 678                                 return NULL;
 679
 680                         pack_bytes(size, sval, &packed); /* do not include nul */
 681                 }
 682                 else {
 683                         /* this ought to be caught while calculating the length, but
 684                            just in case. */
 685                         PyErr_Format(PyExc_ValueError,
 686                                      "%s: format character '%c' is not supported",
 687                                      __FUNCTION__, ch);
 688
 689                         return NULL;
 690                 }
 691         }
 692
 693         return Py_None;
 694 }
 695
 696
 697
 698 static PyMethodDef pytdbpack_methods[] = {
 699         { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 700         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 701 };
 702
 703 DL_EXPORT(void)
 704 inittdbpack(void)
 705 {
 706         Py_InitModule3("tdbpack", pytdbpack_methods,
 707                        (char *) pytdbpack_docstring);
 708 }