Optional removal of any base64 content-transfer-encoding from a MIME
[metze/wireshark/wip.git] / epan / dissectors / packet-multipart.c
1 /* packet-multipart.c
2  * Routines for multipart media encapsulation dissection
3  * Copyright 2004, Anders Broman.
4  * Copyright 2004, Olivier Biot.
5  *
6  * $Id$
7  *
8  * Refer to the AUTHORS file or the AUTHORS section in the man page
9  * for contacting the author(s) of this file.
10  *
11  * Wireshark - Network traffic analyzer
12  * By Gerald Combs <gerald@wireshark.org>
13  * Copyright 1998 Gerald Combs
14  *
15  *
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * This program is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU General Public License
27  * along with this program; if not, write to the Free Software
28  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29  *
30  * References for "media-type multipart/mixed :
31  * http://www.iana.org/assignments/media-types/index.html
32  * http://www.rfc-editor.org/rfc/rfc2045.txt
33  * http://www.rfc-editor.org/rfc/rfc2046.txt
34  * http://www.rfc-editor.org/rfc/rfc2047.txt
35  * http://www.rfc-editor.org/rfc/rfc2048.txt
36  * http://www.rfc-editor.org/rfc/rfc2049.txt
37  *
38  * Part of the code is modeled from the SIP and HTTP dissectors
39  *
40  * General format of a MIME multipart document:
41  *              [ preamble line-end ]
42  *              dash-boundary transport-padding line-end
43  *              body-part
44  *              *encapsulation
45  *              close-delimiter transport-padding
46  *              [ line-end epilogue ]
47  *
48  * Where:
49  *              dash-boundary     := "--" boundary
50  *              encapsulation     := delimiter transport-padding line-end body-part
51  *              delimiter         := line-end body-part
52  *              close-delimiter   := delimiter "--"
53  *              body-part         := MIME-part-headers [ line-end *OCTET ]
54  *              transport-padding := *LWSP-char
55  * 
56  * Note that line-end is often a LF instead of a CRLF.
57 */
58
59 #ifdef HAVE_CONFIG_H
60 #include "config.h"
61 #endif
62
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <epan/prefs.h>
67 #include <glib.h>
68 #include <ctype.h>
69 #include <epan/base64.h>
70 #include <epan/emem.h>
71
72 #include <epan/packet.h>
73
74 #include "packet-imf.h"
75
76 /* Dissector table for media requiring special attention in multipart
77  * encapsulation. */
78 static dissector_table_t multipart_media_subdissector_table;
79
80 /* Initialize the protocol and registered fields */
81 static int proto_multipart = -1;
82
83 /* Initialize the subtree pointers */
84 static gint ett_multipart = -1;
85 static gint ett_multipart_main = -1;
86 static gint ett_multipart_body = -1;
87
88 /* Not sure that compact_name exists for multipart, but choose to keep
89  * the structure from SIP dissector, all the content- is also from SIP */
90
91
92 typedef struct {
93         const char *name;
94         const char *compact_name;
95 } multipart_header_t;
96
97 static const multipart_header_t multipart_headers[] = {
98         { "Unknown-header", NULL },             /* Pad so that the real headers start at index 1 */
99         { "Content-Disposition", NULL },
100         { "Content-Encoding", "e" },
101         { "Content-Id", NULL },
102         { "Content-Language", NULL },
103         { "Content-Length", "l" },
104         { "Content-Transfer-Encoding", NULL },
105         { "Content-Type", "c" },
106 };
107
108 #define POS_CONTENT_DISPOSITION                 1
109 #define POS_CONTENT_ENCODING                    2
110 #define POS_CONTENT_ID                                  3
111 #define POS_CONTENT_LANGUAGE                    4
112 #define POS_CONTENT_LENGTH                              5
113 #define POS_CONTENT_TRANSFER_ENCODING   6
114 #define POS_CONTENT_TYPE                                7
115
116 /* Initialize the header fields */
117 static gint hf_multipart_type = -1;
118 static gint hf_multipart_part = -1;
119
120 static gint hf_header_array[] = {
121         -1, /* "Unknown-header" - Pad so that the real headers start at index 1 */
122         -1, /* "Content-Disposition" */
123         -1, /* "Content-Encoding" */
124         -1, /* "Content-Id" */
125         -1, /* "Content-Language" */
126         -1, /* "Content-Length" */
127         -1, /* "Content-Transfer-Encoding" */
128         -1, /* "Content-Type" */
129 };
130
131 /* Define media_type/Content type table */
132 static dissector_table_t media_type_dissector_table;
133
134 /* Data and media dissector handles */
135 static dissector_handle_t data_handle;
136 static dissector_handle_t media_handle;
137
138 /* Determins if bodies with no media type dissector shoud be displayed
139  * as raw text, may cause problems with images sound etc
140  * TODO improve to check for different content types ?
141  */
142 static gboolean display_unknown_body_as_text = FALSE;
143 static gboolean remove_base64_encoding = FALSE;
144
145
146 typedef struct {
147         const char *type; /* Type of multipart */
148         char *boundary; /* Boundary string (enclosing quotes removed if any) */
149         guint boundary_length; /* Length of the boundary string */
150 } multipart_info_t;
151
152
153
154 static gint
155 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
156                 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
157 static gint
158 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
159                 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
160 static gint
161 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
162                 gint boundary_len, gboolean *last_boundary);
163 static gint
164 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
165                 gint boundary_len, packet_info *pinfo, gint start,
166                 gboolean *last_boundary);
167 static gint
168 is_known_multipart_header(const char *header_str, guint len);
169 static gint
170 index_of_char(const char *str, const char c);
171 char *
172 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset);
173
174
175 /* Return a tvb that contains the binary representation of a base64
176    string */
177
178 static tvbuff_t *
179 base64_decode(packet_info *pinfo, tvbuff_t *b64_tvb, char *name)
180 {
181         tvbuff_t *tvb;
182         char *data;
183         size_t len;
184
185         data = g_strdup(tvb_get_ephemeral_string(b64_tvb, 0, tvb_length_remaining(b64_tvb, 0)));
186
187         len = epan_base64_decode(data);
188         tvb = tvb_new_real_data((const guint8 *)data, len, len);
189
190         tvb_set_free_cb(tvb, g_free);
191
192         add_new_data_source(pinfo, tvb, name);
193
194         return tvb;
195 }
196
197 /*
198  * Unfold and clean up a MIME-like header, and process LWS as follows:
199  *              o Preserves LWS in quoted text
200  *              o Remove LWS before and after a separator
201  *              o Remove trailing LWS
202  *              o Replace other LWS with a single space
203  * Set value to the start of the value 
204  * Return the cleaned-up RFC2822 header (buffer must be freed).
205  */
206 char *
207 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset)
208 {
209         const char *p = lines;
210         char c;
211         char *ret, *q;
212         char sep_seen = 0; /* Did we see a separator ":;," */
213         char lws = FALSE; /* Did we see LWS (incl. folding) */
214         gint colon = -1;
215
216         if (! lines) return NULL;
217
218         c = *p;
219         ret = g_malloc(strlen(lines) + 1);
220         q = ret;
221
222         while (c) {
223                 if (c == ':') {
224                         lws = FALSE; /* Prevent leading LWS from showing up */
225                         if (colon == -1) {/* First colon */
226                                 colon = q - ret;
227                         }
228                         *(q++) = sep_seen = c;
229                         p++;
230                 } else if (c == ';' || c == ',' || c == '=') {
231                         lws = FALSE; /* Prevent leading LWS from showing up */
232                         *(q++) = sep_seen = c;
233                         p++;
234                 } else if (c == ' ' || c == '\t') {
235                         lws = TRUE;
236                         p++;
237                 } else if (c == '\n') {
238                         lws = FALSE; /* Skip trailing LWS */
239                         if ((c = *(p+1))) {
240                                 if (c == ' ' || c == '\t') { /* Header unfolding */
241                                         lws = TRUE;
242                                         p += 2;
243                                 } else {
244                                         *q = c = 0; /* Stop */
245                                 }
246                         }
247                 } else if (c == '\r') {
248                         lws = FALSE;
249                         if ((c = *(p+1))) {
250                                 if (c == '\n') {
251                                         if ((c = *(p+2))) {
252                                                 if (c == ' ' || c == '\t') { /* Header unfolding */
253                                                         lws = TRUE;
254                                                         p += 3;
255                                                 } else {
256                                                         *q = c = 0; /* Stop */
257                                                 }
258                                         }
259                                 } else if (c == ' ' || c == '\t') { /* Header unfolding */
260                                         lws = TRUE;
261                                         p += 2;
262                                 } else {
263                                         *q = c = 0; /* Stop */
264                                 }
265                         }
266                 } else if (c == '"') { /* Start of quoted-string */
267                         lws = FALSE;
268                         *(q++) = c;
269                         while (c) {
270                                 c = *(q++) = *(++p);
271                                 if (c == '"') {
272                                         p++; /* Skip closing quote */
273                                         break;
274                                 }
275                         }
276                         /* if already zero terminated now, rewind one char to avoid an "off by one" */
277                         if(c == 0) {
278                                 q--;
279                         }
280                 } else { /* Regular character */
281                         if (sep_seen) {
282                                 sep_seen = 0;
283                                 lws = FALSE;
284                         } else {
285                                 if (lws) {
286                                         *(q++) = ' ';
287                                         lws = FALSE;
288                                 }
289                         }
290                         lws = FALSE;
291                         *(q++) = c;
292                         p++; /* OK */
293                 }
294
295                 if (c) {
296                         c = *p;
297                 }
298         }
299         *q = 0;
300
301         *first_colon_offset = colon;
302         return (ret);
303 }
304
305 /* Return the index of a given char in the given string,
306  * or -1 if not found.
307  */
308 static gint
309 index_of_char(const char *str, const char c)
310 {
311         gint len = 0;
312         const char *p = str;
313
314         while (*p && *p != c) {
315                 p++;
316                 len++;
317         }
318
319         if (*p)
320                 return len;
321         return -1;
322 }
323
324 static char *find_parameter(char *parameters, const char *key, int *retlen)
325 {
326         char *start, *p;
327         int   keylen = 0;
328         int   len = 0;
329
330         if(!parameters || !*parameters || !key || !(keylen = strlen(key)))
331                 /* we won't be able to find anything */
332                 return NULL;
333
334         p = parameters;
335
336         while (*p) {
337
338                 while ((*p) && isspace((guchar)*p))
339                         p++; /* Skip white space */
340                 
341                 if (strncasecmp(p, key, keylen) == 0)
342                         break;
343                 /* Skip to next parameter */
344                 p = strchr(p, ';');
345                 if (p == NULL)
346                 {
347                         return NULL;
348                 }
349                 p++; /* Skip semicolon */
350
351         }
352         start = p + keylen;
353         if (start[0] == 0) {
354                 return NULL;
355         }
356
357         /*
358          * Process the parameter value
359          */
360         if (start[0] == '"') {
361                 /*
362                  * Parameter value is a quoted-string
363                  */
364                 start++; /* Skip the quote */
365                 len = index_of_char(start, '"');
366                 if (len < 0) {
367                         /*
368                          * No closing quote
369                          */
370                         return NULL;
371                 }
372         } else {
373                 /*
374                  * Look for end of boundary
375                  */
376                 p = start;
377                 while (*p) {
378                         if (*p == ';' || isspace((guchar)*p))
379                                 break;
380                         p++;
381                         len++;
382                 }
383         }
384
385         if(retlen)
386                 (*retlen) = len;
387
388         return start;
389 }
390
391 /* Retrieve the media information from pinfo->private_data,
392  * and compute the boundary string and its length.
393  * Return a pointer to a filled-in multipart_info_t, or NULL on failure.
394  * 
395  * Boundary delimiters must not appear within the encapsulated material,
396  * and must be no longer than 70 characters, not counting the two
397  * leading hyphens. (quote from rfc2046)
398  */
399 static multipart_info_t *
400 get_multipart_info(packet_info *pinfo)
401 {
402         const char *start;
403         int len = 0;
404         multipart_info_t *m_info = NULL;
405         const char *type = pinfo->match_string;
406         char *parameters;
407         gint dummy;
408
409         if ((type == NULL) || (pinfo->private_data == NULL)) {
410                 /*
411                  * We need both a content type AND parameters
412                  * for multipart dissection.
413                  */
414                 return NULL;
415         }
416
417         /* Clean up the parameters */
418         parameters = unfold_and_compact_mime_header(pinfo->private_data, &dummy);
419
420         start = find_parameter(parameters, "boundary=", &len);
421
422         if(!start) {
423                 g_free(parameters);
424                 return NULL;
425         }
426         
427         /*
428          * There is a value for the boundary string
429          */
430         m_info = g_malloc(sizeof(multipart_info_t));
431         m_info->type = type;
432         m_info->boundary = g_strndup(start, len);
433         m_info->boundary_length = len;
434         g_free(parameters);
435
436         return m_info;
437 }
438
439 static void
440 cleanup_multipart_info(void *data)
441 {
442         multipart_info_t *m_info = data;
443         if (m_info) {
444                 if (m_info->boundary)
445                         g_free(m_info->boundary);
446                 g_free(m_info);
447         }
448 }
449
450 /*
451  * The first boundary does not implicitly contain the leading
452  * line-end sequence.
453  *
454  * Return the offset to the 1st byte of the boundary delimiter line.
455  * Set boundary_line_len to the length of the entire boundary delimiter.
456  * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
457  */
458 static gint
459 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
460                 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
461 {
462         gint offset = start, next_offset, line_len, boundary_start;
463
464         while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
465                 boundary_start = offset;
466                 if (((tvb_strneql(tvb, offset, (const guint8 *)"--", 2) == 0)
467                                         && (tvb_strneql(tvb, offset + 2, boundary,      boundary_len) == 0)))
468                 {
469                         /* Boundary string; now check if last */
470                         if ((tvb_length_remaining(tvb, offset + 2 + boundary_len + 2) >= 0)
471                                         && (tvb_strneql(tvb, offset + 2 + boundary_len,
472                                                         (const guint8 *)"--", 2) == 0)) {
473                                 *last_boundary = TRUE;
474                         } else {
475                                 *last_boundary = FALSE;
476                         }
477                         /* Look for line end of the boundary line */
478                         line_len =  tvb_find_line_end(tvb, offset, -1, &offset, FALSE);
479                         if (line_len == -1) {
480                                 *boundary_line_len = -1;
481                         } else {
482                                 *boundary_line_len = offset - boundary_start;
483                         }
484                         return boundary_start;
485                 }
486                 line_len =  tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
487                 if (line_len == -1) {
488                         return -1;
489                 }
490                 offset = next_offset;
491         }
492
493         return -1;
494 }
495
496 /*
497  * Unless the first boundary, subsequent boundaries include a line-end sequence
498  * before the dashed boundary string.
499  *
500  * Return the offset to the 1st byte of the boundary delimiter line.
501  * Set boundary_line_len to the length of the entire boundary delimiter.
502  * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
503  */
504 static gint
505 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
506                 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
507 {
508         gint offset = start, next_offset, line_len, boundary_start;
509
510         while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
511                 line_len =  tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
512                 if (line_len == -1) {
513                         return -1;
514                 }
515                 boundary_start = offset + line_len;
516                 if (((tvb_strneql(tvb, next_offset, (const guint8 *)"--", 2) == 0)
517                                         && (tvb_strneql(tvb, next_offset + 2, boundary, boundary_len) == 0)))
518                 {
519                         /* Boundary string; now check if last */
520                         if ((tvb_length_remaining(tvb, next_offset + 2 + boundary_len + 2) >= 0)
521                                         && (tvb_strneql(tvb, next_offset + 2 + boundary_len,
522                                                         (const guint8 *)"--", 2) == 0)) {
523                                 *last_boundary = TRUE;
524                         } else {
525                                 *last_boundary = FALSE;
526                         }
527                         /* Look for line end of the boundary line */
528                         line_len =  tvb_find_line_end(tvb, next_offset, -1, &offset, FALSE);
529                         if (line_len == -1) {
530                                 *boundary_line_len = -1;
531                         } else {
532                                 *boundary_line_len = offset - boundary_start;
533                         }
534                         return boundary_start;
535                 }
536                 offset = next_offset;
537         }
538
539         return -1;
540 }
541
542 /*
543  * Process the multipart preamble:
544  *              [ preamble line-end ] dashed-boundary transport-padding line-end
545  *
546  * Return the offset to the start of the first body-part.
547  */
548 static gint
549 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
550                 gint boundary_len, gboolean *last_boundary)
551 {
552         gint boundary_start, boundary_line_len, body_part_start;
553
554         body_part_start = 0;
555         boundary_start = find_first_boundary(tvb, 0, boundary, boundary_len,
556                         &boundary_line_len, last_boundary);
557         if (boundary_start == 0) {
558                 if (tree) {
559                         proto_tree_add_text(tree, tvb, boundary_start, boundary_line_len,
560                                         "First boundary: %s",
561                                         tvb_format_text(tvb, boundary_start, boundary_line_len));
562                 }
563                 return boundary_start + boundary_line_len;
564         } else if (boundary_start > 0) {
565                 if (boundary_line_len > 0) {
566                         gint body_part_start = boundary_start + boundary_line_len;
567
568                         if (tree) {
569                                 if (body_part_start > 0) {
570                                         proto_tree_add_text(tree, tvb, 0, body_part_start,
571                                                         "Preamble");
572                                 }
573                                 proto_tree_add_text(tree, tvb, boundary_start,
574                                                 boundary_line_len, "First boundary: %s",
575                                                 tvb_format_text(tvb, boundary_start,
576                                                         boundary_line_len));
577                         }
578                         return body_part_start;
579                 }
580         }
581         return -1;
582 }
583
584 /*
585  * Process a multipart body-part:
586  *              MIME-part-headers [ line-end *OCTET ]
587  *              line-end dashed-boundary transport-padding line-end
588  *
589  * If applicable, call a media subdissector.
590  *
591  * Return the offset to the start of the next body-part.
592  */
593 static gint
594 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
595                 gint boundary_len, packet_info *pinfo, gint start,
596                 gboolean *last_boundary)
597 {
598         proto_tree *subtree = NULL;
599         proto_item *ti = NULL;
600         gint offset = start, next_offset;
601         char *parameters = NULL;
602         gint body_start, boundary_start, boundary_line_len;
603
604         char *content_type_str = NULL;
605         char *content_encoding_str = NULL;
606         char *filename = NULL;
607         char *typename = NULL;
608         int  len = 0;
609         gboolean last_field = FALSE;
610
611         if (tree) {
612                 ti = proto_tree_add_item(tree, hf_multipart_part, tvb, start, 0, FALSE);
613                 subtree = proto_item_add_subtree(ti, ett_multipart_body);
614         }
615         /*
616          * Process the MIME-part-headers
617          */
618
619         while (!last_field)
620         {
621                 gint colon_offset;
622                 char *hdr_str;
623                 char *header_str;
624
625                 next_offset = imf_find_field_end(tvb, offset, tvb_length_remaining(tvb, offset), &last_field);
626
627                 hdr_str = tvb_get_ephemeral_string(tvb, offset, next_offset - offset);
628
629                 header_str = unfold_and_compact_mime_header(hdr_str, &colon_offset);
630                 if (colon_offset <= 0) {
631                         if (tree) {
632                                 proto_tree_add_text(subtree, tvb, offset, next_offset - offset,
633                                                 "%s",
634                                                 tvb_format_text(tvb, offset, next_offset - offset));
635                         }
636                 } else {
637                         gint hf_index;
638
639                         /* Split header name from header value */
640                         header_str[colon_offset] = '\0';
641                         hf_index = is_known_multipart_header(header_str, colon_offset);
642
643                         if (hf_index == -1) {
644                                 if (tree) {
645                                         proto_tree_add_text(subtree, tvb, offset,
646                                                         next_offset - offset,
647                                                         "%s",
648                                                         tvb_format_text(tvb, offset, next_offset - offset));
649                                 }
650                         } else {
651                                 char *value_str = header_str + colon_offset + 1;
652
653                                 if (tree) {
654                                         proto_tree_add_string_format(subtree,
655                                                         hf_header_array[hf_index], tvb,
656                                                         offset, next_offset - offset,
657                                                         (const char *)value_str, "%s",
658                                                         tvb_format_text(tvb, offset, next_offset - offset));
659                                 }
660
661                                 switch (hf_index) {
662                                         case POS_CONTENT_TYPE:
663                                                 {
664                                                         /* The Content-Type starts at colon_offset + 1 */
665                                                         gint semicolon_offset = index_of_char(
666                                                                         value_str, ';');
667
668                                                         if (semicolon_offset > 0) {
669                                                                 value_str[semicolon_offset] = '\0';
670                                                                 parameters = ep_strdup(value_str + semicolon_offset + 1);
671                                                         } else {
672                                                                 parameters = NULL;
673                                                         }
674 #if GLIB_MAJOR_VERSION < 2
675                                                         content_type_str = g_strdup(value_str);
676                                                         g_strdown(content_type_str);
677 #else
678                                                         content_type_str = g_ascii_strdown(value_str, -1);
679 #endif
680                                                         /* Show content-type in root 'part' label */
681                                                         proto_item_append_text(ti, " (%s)", content_type_str);
682                                                         
683                                                         /* find the "name" parameter in case we don't find a content disposition "filename" */
684                                                         if(typename = find_parameter(parameters, "name=", &len)) {
685                                                           typename = g_strndup(typename, len);
686                                                         }
687                                                 }
688
689
690                                                 break;
691                                         case POS_CONTENT_TRANSFER_ENCODING:
692                                                 {
693                                                         /* The Content-Transfeing starts at colon_offset + 1 */
694                                                         gint cr_offset = index_of_char(value_str, '\r');
695
696                                                         if (cr_offset > 0) {
697                                                                 value_str[cr_offset] = '\0';
698                                                         }
699 #if GLIB_MAJOR_VERSION < 2
700                                                         content_encoding_str = g_strdup(value_str);
701                                                         g_strdown(content_encoding_str);
702 #else
703                                                         content_encoding_str = g_ascii_strdown(value_str, -1);
704 #endif
705                                                 }
706                                                 break;
707                                         case POS_CONTENT_DISPOSITION:
708                                                 {
709                                                         /* find the "filename" parameter */
710                                                         if(filename = find_parameter(value_str, "filename=", &len)) {
711                                                                 filename = g_strndup(filename, len);
712                                                         }
713                                                 }
714                                                 break;
715                                         default:
716                                                 break;
717                                 }
718                         }
719                 }
720                 g_free(header_str);
721                 offset = next_offset;
722         }
723
724         body_start = next_offset;
725
726         /*
727          * Process the body
728          */
729
730         boundary_start = find_next_boundary(tvb, body_start, boundary, boundary_len,
731                         &boundary_line_len, last_boundary);
732         if (boundary_start > 0) {
733                 gint body_len = boundary_start - body_start;
734                 tvbuff_t *tmp_tvb = tvb_new_subset(tvb, body_start,
735                                 body_len, body_len);
736
737                 if (content_type_str) {
738
739                         /*
740                          * subdissection
741                          */
742                         void *save_private_data = pinfo->private_data;
743                         gboolean dissected;
744
745                         /* 
746                          * Try and remove any content transfer encoding so that each sub-dissector
747                          * doesn't have to do it itself 
748                          *
749                          */
750
751                         if(content_encoding_str && remove_base64_encoding) {
752
753                                 if(!strncasecmp(content_encoding_str, "base64", 6))
754                                         tmp_tvb = base64_decode(pinfo, tmp_tvb, filename ? filename : (typename ? typename : content_type_str));
755
756                         }
757
758                         pinfo->private_data = parameters;
759                         /*
760                          * First try the dedicated multipart dissector table
761                          */
762                         dissected = dissector_try_string(multipart_media_subdissector_table,
763                                                 content_type_str, tmp_tvb, pinfo, subtree);
764                         if (! dissected) {
765                                 /*
766                                  * Fall back to the default media dissector table
767                                  */
768                                 dissected = dissector_try_string(media_type_dissector_table,
769                                                 content_type_str, tmp_tvb, pinfo, subtree);
770                         }
771                         if (! dissected) {
772                                 const char *save_match_string = pinfo->match_string;
773                                 pinfo->match_string = content_type_str;
774                                 call_dissector(media_handle, tmp_tvb, pinfo, subtree);
775                                 pinfo->match_string = save_match_string;
776                         }
777                         pinfo->private_data = save_private_data;
778                         g_free(content_type_str);
779                         content_type_str = NULL;
780                         parameters = NULL; /* Shares same memory as content_type_str */
781                 } else {
782                         call_dissector(data_handle, tmp_tvb, pinfo, subtree);
783                 }
784                 if (tree) {
785                         proto_item_set_len(ti, boundary_start - start);
786                         if (*last_boundary == TRUE) {
787                                 proto_tree_add_text(tree, tvb,
788                                                 boundary_start, boundary_line_len,
789                                                 "Last boundary: %s",
790                                                 tvb_format_text(tvb, boundary_start,
791                                                         boundary_line_len));
792                         } else {
793                                 proto_tree_add_text(tree, tvb,
794                                                 boundary_start, boundary_line_len,
795                                                 "Boundary: %s",
796                                                 tvb_format_text(tvb, boundary_start,
797                                                         boundary_line_len));
798                         }
799                 }
800
801                 if(filename)
802                         g_free(filename);
803                 if(typename)
804                         g_free(typename);
805
806                 return boundary_start + boundary_line_len;
807         }
808
809         return -1;
810 }
811
812 /*
813  * Call this method to actually dissect the multipart body.
814  * NOTE - Only do so if a boundary string has been found!
815  */
816 static void dissect_multipart(tvbuff_t *tvb, packet_info *pinfo,
817                 proto_tree *tree)
818 {
819         proto_tree *subtree = NULL;
820         proto_item *ti = NULL;
821         multipart_info_t *m_info = get_multipart_info(pinfo);
822         gint header_start = 0;
823         guint8 *boundary;
824         gint boundary_len;
825         gint offset = 0;
826         gboolean last_boundary = FALSE;
827
828         if (m_info == NULL) {
829                 /*
830                  * We can't get the required multipart information
831                  */
832                 proto_tree_add_text(tree, tvb, 0, -1,
833                                 "The multipart dissector could not find "
834                                 "the required boundary parameter.");
835                 call_dissector(data_handle, tvb, pinfo, tree);
836                 return;
837         }
838         boundary = (guint8 *)m_info->boundary;
839         boundary_len = m_info->boundary_length;
840         /* Clean up the memory if an exception is thrown */
841         /* CLEANUP_PUSH(cleanup_multipart_info, m_info); */
842
843         /* Add stuff to the protocol tree */
844         if (tree) {
845                 proto_item *type_ti;
846                 ti = proto_tree_add_item(tree, proto_multipart,
847                                 tvb, 0, -1, FALSE);
848                 subtree = proto_item_add_subtree(ti, ett_multipart);
849                 proto_item_append_text(ti, ", Type: %s, Boundary: \"%s\"",
850                                 m_info->type, m_info->boundary);
851
852                 /* Show multi-part type as a generated field */
853                 type_ti = proto_tree_add_string(subtree, hf_multipart_type,
854                                                 tvb, 0, 0, pinfo->match_string);
855                 PROTO_ITEM_SET_GENERATED(type_ti);
856         }
857
858         /*
859          * Make no entries in Protocol column and Info column on summary display,
860          * but stop sub-dissectors from clearing entered text in summary display.
861          */
862         if (check_col(pinfo->cinfo, COL_INFO))
863                 col_set_fence(pinfo->cinfo, COL_INFO);
864
865         offset = 0;
866
867         /*
868          * Process the multipart preamble
869          */
870         header_start = process_preamble(subtree, tvb, boundary,
871                         boundary_len, &last_boundary);
872         if (header_start == -1) {
873                 call_dissector(data_handle, tvb, pinfo, subtree);
874                 /* Clean up the dynamically allocated memory */
875                 cleanup_multipart_info(m_info);
876                 return;
877         }
878         /*
879          * Process the encapsulated bodies
880          */
881         while (last_boundary == FALSE) {
882                 header_start = process_body_part(subtree, tvb, boundary, boundary_len,
883                                 pinfo, header_start, &last_boundary);
884                 if (header_start == -1) {
885                         /* Clean up the dynamically allocated memory */
886                         cleanup_multipart_info(m_info);
887                         return;
888                 }
889         }
890         /*
891          * Process the multipart trailer
892          */
893         if (tree) {
894                 if (tvb_length_remaining(tvb, header_start) > 0) {
895                         proto_tree_add_text(subtree, tvb, header_start, -1, "Trailer");
896                 }
897         }
898         /* Clean up the dynamically allocated memory */
899         cleanup_multipart_info(m_info);
900         return;
901 }
902
903 /* Returns index of method in multipart_headers */
904 static gint
905 is_known_multipart_header(const char *header_str, guint len)
906 {
907         guint i;
908
909         for (i = 1; i < array_length(multipart_headers); i++) {
910                 if (len == strlen(multipart_headers[i].name) &&
911                     strncasecmp(header_str, multipart_headers[i].name, len) == 0)
912                         return i;
913                 if (multipart_headers[i].compact_name != NULL &&
914                     len == strlen(multipart_headers[i].compact_name) &&
915                     strncasecmp(header_str, multipart_headers[i].compact_name, len) == 0)
916                         return i;
917         }
918
919         return -1;
920 }
921
922 /*
923  * Register the protocol with Wireshark.
924  *
925  * This format is required because a script is used to build the C function
926  * that calls all the protocol registration.
927  */
928
929 void
930 proto_register_multipart(void)
931 {
932
933 /* Setup list of header fields  See Section 1.6.1 for details */
934         static hf_register_info hf[] = {
935                 { &hf_multipart_type,
936                         {       "Type",
937                                 "mime_multipart.type",
938                                 FT_STRING, BASE_NONE, NULL, 0x00,
939                                 "MIME multipart encapsulation type", HFILL
940                         }
941                 },
942                 { &hf_multipart_part,
943                         {       "Encapsulated multipart part",
944                                 "mime_multipart.part",
945                                 FT_STRING, BASE_NONE, NULL, 0x00,
946                                 "Encapsulated multipart part", HFILL
947                         }
948                 },
949                 { &hf_header_array[POS_CONTENT_DISPOSITION],
950                         {       "Content-Disposition",
951                                 "mime_multipart.header.content-disposition",
952                                 FT_STRING, BASE_NONE, NULL, 0x00,
953                                 "RFC 2183: Content-Disposition Header", HFILL
954                         }
955                 },
956                 { &hf_header_array[POS_CONTENT_ENCODING],
957                         {       "Content-Encoding",
958                                 "mime_multipart.header.content-encoding",
959                                 FT_STRING, BASE_NONE, NULL, 0x00,
960                                 "Content-Encoding Header", HFILL
961                         }
962                 },
963                 { &hf_header_array[POS_CONTENT_ID],
964                         {       "Content-Id",
965                                 "mime_multipart.header.content-id",
966                                 FT_STRING, BASE_NONE, NULL, 0x00,
967                                 "RFC 2045: Content-Id Header", HFILL
968                         }
969                 },
970                 { &hf_header_array[POS_CONTENT_LANGUAGE],
971                         {       "Content-Language",
972                                 "mime_multipart.header.content-language",
973                                 FT_STRING, BASE_NONE, NULL, 0x00,
974                                 "Content-Language Header", HFILL
975                         }
976                 },
977                 { &hf_header_array[POS_CONTENT_LENGTH],
978                         {       "Content-Length",
979                                 "mime_multipart.header.content-length",
980                                 FT_STRING, BASE_NONE, NULL, 0x0,
981                                 "Content-Length Header", HFILL
982                         }
983                 },
984                 { &hf_header_array[POS_CONTENT_TRANSFER_ENCODING],
985                         {       "Content-Transfer-Encoding",
986                                 "mime_multipart.header.content-transfer-encoding",
987                                 FT_STRING, BASE_NONE, NULL, 0x00,
988                                 "RFC 2045: Content-Transfer-Encoding Header", HFILL
989                         }
990                 },
991                 { &hf_header_array[POS_CONTENT_TYPE],
992                         {       "Content-Type",
993                                 "mime_multipart.header.content-type",
994                                 FT_STRING, BASE_NONE,NULL,0x0,
995                                 "Content-Type Header", HFILL
996                         }
997                 },
998         };
999
1000         /*
1001          * Preferences
1002          */
1003         module_t *multipart_module;
1004
1005         /*
1006          * Setup protocol subtree array
1007          */
1008         static gint *ett[] = {
1009                 &ett_multipart,
1010                 &ett_multipart_main,
1011                 &ett_multipart_body,
1012         };
1013
1014         /*
1015          * Register the protocol name and description
1016          */
1017         proto_multipart = proto_register_protocol(
1018                         "MIME Multipart Media Encapsulation",
1019                         "MIME multipart",
1020                         "mime_multipart");
1021
1022         /*
1023          * Required function calls to register
1024          * the header fields and subtrees used.
1025          */
1026         proto_register_field_array(proto_multipart, hf, array_length(hf));
1027         proto_register_subtree_array(ett, array_length(ett));
1028
1029         /*
1030          * Get the content type and Internet media type table
1031          */
1032         media_type_dissector_table = find_dissector_table("media_type");
1033
1034         multipart_module = prefs_register_protocol(proto_multipart, NULL);
1035
1036         prefs_register_bool_preference(multipart_module,
1037                         "display_unknown_body_as_text",
1038                         "Display bodies without media type as text",
1039                         "Display multipart bodies with no media type dissector"
1040                         " as raw text (may cause problems with binary data).",
1041                         &display_unknown_body_as_text);
1042
1043         prefs_register_bool_preference(multipart_module,
1044                                        "remove_base64_encoding",
1045                                        "Remove base64 encoding from bodies",
1046                                        "Remove any base64 content-transfer encoding from bodies. "
1047                                        "This supports export of the body and its further dissection.",
1048                                        &remove_base64_encoding);
1049
1050         /*
1051          * Dissectors requiring different behavior in cases where the media
1052          * is contained in a multipart entity should register their multipart
1053          * dissector in the dissector table below, which is similar to the
1054          * "media_type" dissector table defined in the HTTP dissector code.
1055          */
1056         multipart_media_subdissector_table = register_dissector_table(
1057                         "multipart_media_type",
1058                         "Internet media type (for multipart processing)",
1059                         FT_STRING, BASE_NONE);
1060 }
1061
1062
1063 /* If this dissector uses sub-dissector registration add a registration routine.
1064    This format is required because a script is used to find these routines and
1065    create the code that calls these routines.
1066 */
1067 void
1068 proto_reg_handoff_multipart(void)
1069 {
1070         dissector_handle_t multipart_handle;
1071
1072         /*
1073          * When we cannot display the data, call the data dissector.
1074          * When there is no dissector for the given media, call the media dissector.
1075          */
1076         data_handle = find_dissector("data");
1077         media_handle = find_dissector("media");
1078
1079         /*
1080          * Handle for multipart dissection
1081          */
1082         multipart_handle = create_dissector_handle(
1083                         dissect_multipart, proto_multipart);
1084
1085         dissector_add_string("media_type",
1086                         "multipart/mixed", multipart_handle);
1087         dissector_add_string("media_type",
1088                         "multipart/related", multipart_handle);
1089         dissector_add_string("media_type",
1090                         "multipart/alternative", multipart_handle);
1091         dissector_add_string("media_type",
1092                         "multipart/form-data", multipart_handle);
1093
1094 }