2 * Routines for multipart media encapsulation dissection
3 * Copyright 2004, Anders Broman.
4 * Copyright 2004, Olivier Biot.
6 * $Id: packet-multipart.c,v 1.9 2004/04/30 17:07:21 obiot Exp $
8 * Refer to the AUTHORS file or the AUTHORS section in the man page
9 * for contacting the author(s) of this file.
11 * Ethereal - Network traffic analyzer
12 * By Gerald Combs <gerald@ethereal.com>
13 * Copyright 1998 Gerald Combs
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
30 * References for "media-type multipart/mixed :
31 * http://www.iana.org/assignments/media-types/index.html
32 * http://www.rfc-editor.org/rfc/rfc2045.txt
33 * http://www.rfc-editor.org/rfc/rfc2046.txt
34 * http://www.rfc-editor.org/rfc/rfc2047.txt
35 * http://www.rfc-editor.org/rfc/rfc2048.txt
36 * http://www.rfc-editor.org/rfc/rfc2049.txt
38 * Part of the code is modeled from the SIP and HTTP dissectors
40 * General format of a MIME multipart document:
41 * [ preamble line-end ]
42 * dash-boundary transport-padding line-end
45 * close-delimiter transport-padding
46 * [ line-end epilogue ]
49 * dash-boundary := "--" boundary
50 * encapsulation := delimiter transport-padding line-end body-part
51 * delimiter := line-end body-part
52 * close-delimiter := delimiter "--"
53 * body-part := MIME-part-headers [ line-end *OCTET ]
54 * transport-padding := *LWSP-char
56 * Note that line-end is often a LF instead of a CRLF.
70 #ifdef NEED_SNPRINTF_H
71 # include "snprintf.h"
74 #include <epan/packet.h>
76 /* Dissector table for media requiring special attention in multipart
78 static dissector_table_t multipart_media_subdissector_table;
80 /* Initialize the protocol and registered fields */
81 static int proto_multipart = -1;
83 /* Initialize the subtree pointers */
84 static gint ett_multipart = -1;
85 static gint ett_multipart_main = -1;
86 static gint ett_multipart_body = -1;
88 /* Not sure that compact_name exists for multipart, but choose to keep
89 * the structure from SIP dissector, all the content- is also from SIP */
92 static const char *multipart_headers[] = {
93 "Unknown-header", /* Pad so that the real headers start at index 1 */
94 "Content-Disposition",
101 #define POS_CONTENT_DISPOSITION 1
102 #define POS_CONTENT_ENCODING 2
103 #define POS_CONTENT_LANGUAGE 3
104 #define POS_CONTENT_LENGTH 4
105 #define POS_CONTENT_TYPE 5
107 /* Initialize the header fields */
108 static gint hf_multipart_type = -1;
109 static gint hf_header_array[] = {
110 -1, /* "Unknown-header" - Pad so that the real headers start at index 1 */
111 -1, /* "Content-Disposition" */
112 -1, /* "Content-Encoding" */
113 -1, /* "Content-Language" */
114 -1, /* "Content-Length" */
115 -1, /* "Content-Type" */
118 /* Define media_type/Content type table */
119 static dissector_table_t media_type_dissector_table;
121 /* Data and media dissector handles */
122 static dissector_handle_t data_handle;
123 static dissector_handle_t media_handle;
125 /* Determins if bodies with no media type dissector shoud be displayed
126 * as raw text, may cause problems with images sound etc
127 * TODO improve to check for different content types ?
129 static gboolean display_unknown_body_as_text = FALSE;
133 const char *type; /* Type of multipart */
134 char *boundary; /* Boundary string (enclosing quotes removed if any) */
135 guint boundary_length; /* Length of the boundary string */
141 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
142 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
144 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
145 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
147 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
148 gint boundary_len, gboolean *last_boundary);
150 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
151 gint boundary_len, packet_info *pinfo, gint start,
152 gboolean *last_boundary);
154 is_known_multipart_header(const char *header_str, guint len);
156 index_of_char(const char *str, const char c);
158 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset);
161 * Unfold and clean up a MIME-like header, and process LWS as follows:
162 * o Preserves LWS in quoted text
163 * o Remove LWS before and after a separator
164 * o Remove trailing LWS
165 * o Replace other LWS with a single space
166 * Set value to the start of the value
167 * Return the cleaned-up RFC2822 header (buffer must be freed).
170 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset)
172 const char *p = lines;
175 char sep_seen = 0; /* Did we see a separator ":;," */
176 char lws = FALSE; /* Did we see LWS (incl. folding) */
179 if (! lines) return NULL;
182 ret = g_malloc(strlen(lines) + 1);
187 lws = FALSE; /* Prevent leading LWS from showing up */
188 if (colon == -1) {/* First colon */
191 *(q++) = sep_seen = c;
193 } else if (c == ';' || c == ',' || c == '=') {
194 lws = FALSE; /* Prevent leading LWS from showing up */
195 *(q++) = sep_seen = c;
197 } else if (c == ' ' || c == '\t') {
200 } else if (c == '\n') {
201 lws = FALSE; /* Skip trailing LWS */
203 if (c == ' ' || c == '\t') { /* Header unfolding */
207 *q = c = 0; /* Stop */
210 } else if (c == '\r') {
215 if (c == ' ' || c == '\t') { /* Header unfolding */
219 *q = c = 0; /* Stop */
222 } else if (c == ' ' || c == '\t') { /* Header unfolding */
226 *q = c = 0; /* Stop */
229 } else if (c == '"') { /* Start of quoted-string */
235 p++; /* Skip closing quote */
239 } else { /* Regular character */
260 *first_colon_offset = colon;
264 /* Return the index of a given char in the given string,
265 * or -1 if not found.
268 index_of_char(const char *str, const char c)
273 while (*p && *p != c) {
283 /* Retrieve the media information from pinfo->private_data,
284 * and compute the boundary string and its length.
285 * Return a pointer to a filled-in multipart_info_t, or NULL on failure.
287 * Boundary delimiters must not appear within the encapsulated material,
288 * and must be no longer than 70 characters, not counting the two
289 * leading hyphens. (quote from rfc2046)
291 static multipart_info_t *
292 get_multipart_info(packet_info *pinfo)
294 const char *start, *p;
296 multipart_info_t *m_info = NULL;
297 const char *type = pinfo->match_string;
298 const char *parameters = pinfo->private_data;
301 if ((type == NULL) || (parameters == NULL)) {
303 * We need both a content type AND parameters
304 * for multipart dissection.
309 /* Clean up the parameters */
310 parameters = unfold_and_compact_mime_header(parameters, &dummy);
313 * Process the private data
314 * The parameters must contain the boundary string
318 if (strncasecmp(p, "boundary=", 9) == 0)
320 /* Skip to next parameter */
324 p++; /* Skip semicolon */
325 while ((*p) && isspace((guchar)*p))
326 p++; /* Skip white space */
334 * Process the parameter value
336 if (start[0] == '"') {
338 * Boundary string is a quoted-string
340 start++; /* Skip the quote */
341 len = index_of_char(start, '"');
350 * Look for end of boundary
354 if (*p == ';' || isspace((guchar)*p))
361 * There is a value for the boundary string
363 m_info = g_malloc(sizeof(multipart_info_t));
365 m_info->boundary = g_strndup(start, len);
366 m_info->boundary_length = len;
372 cleanup_multipart_info(void *data)
374 multipart_info_t *m_info = data;
376 if (m_info->boundary)
377 g_free(m_info->boundary);
383 * The first boundary does not implicitly contain the leading
386 * Return the offset to the 1st byte of the boundary delimiter line.
387 * Set boundary_line_len to the length of the entire boundary delimiter.
388 * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
391 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
392 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
394 gint offset = start, next_offset, line_len, boundary_start;
396 while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
397 boundary_start = offset;
398 if (((tvb_strneql(tvb, offset, (const guint8 *)"--", 2) == 0)
399 && (tvb_strneql(tvb, offset + 2, boundary, boundary_len) == 0)))
401 /* Boundary string; now check if last */
402 if ((tvb_length_remaining(tvb, offset + 2 + boundary_len + 2) >= 0)
403 && (tvb_strneql(tvb, offset + 2 + boundary_len,
404 (const guint8 *)"--", 2) == 0)) {
405 *last_boundary = TRUE;
407 *last_boundary = FALSE;
409 /* Look for line end of the boundary line */
410 line_len = tvb_find_line_end(tvb, offset, -1, &offset, FALSE);
411 if (line_len == -1) {
412 *boundary_line_len = -1;
414 *boundary_line_len = offset - boundary_start;
416 return boundary_start;
418 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
419 if (line_len == -1) {
422 offset = next_offset;
429 * Unless the first boundary, subsequent boundaries include a line-end sequence
430 * before the dashed boundary string.
432 * Return the offset to the 1st byte of the boundary delimiter line.
433 * Set boundary_line_len to the length of the entire boundary delimiter.
434 * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
437 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
438 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
440 gint offset = start, next_offset, line_len, boundary_start;
442 while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
443 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
444 if (line_len == -1) {
447 boundary_start = offset + line_len;
448 if (((tvb_strneql(tvb, next_offset, (const guint8 *)"--", 2) == 0)
449 && (tvb_strneql(tvb, next_offset + 2, boundary, boundary_len) == 0)))
451 /* Boundary string; now check if last */
452 if ((tvb_length_remaining(tvb, next_offset + 2 + boundary_len + 2) >= 0)
453 && (tvb_strneql(tvb, next_offset + 2 + boundary_len,
454 (const guint8 *)"--", 2) == 0)) {
455 *last_boundary = TRUE;
457 *last_boundary = FALSE;
459 /* Look for line end of the boundary line */
460 line_len = tvb_find_line_end(tvb, next_offset, -1, &offset, FALSE);
461 if (line_len == -1) {
462 *boundary_line_len = -1;
464 *boundary_line_len = offset - boundary_start;
466 return boundary_start;
468 offset = next_offset;
475 * Process the multipart preamble:
476 * [ preamble line-end ] dashed-boundary transport-padding line-end
478 * Return the offset to the start of the first body-part.
481 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
482 gint boundary_len, gboolean *last_boundary)
484 gint boundary_start, boundary_line_len, body_part_start;
487 boundary_start = find_first_boundary(tvb, 0, boundary, boundary_len,
488 &boundary_line_len, last_boundary);
489 if (boundary_start == 0) {
491 proto_tree_add_text(tree, tvb, boundary_start, boundary_line_len,
492 "First boundary: %s",
493 tvb_format_text(tvb, boundary_start, boundary_line_len));
495 return boundary_start + boundary_line_len;
496 } else if (boundary_start > 0) {
497 if (boundary_line_len > 0) {
498 gint body_part_start = boundary_start + boundary_line_len;
501 if (body_part_start > 0) {
502 proto_tree_add_text(tree, tvb, 0, body_part_start,
505 proto_tree_add_text(tree, tvb, boundary_start,
506 boundary_line_len, "First boundary: %s",
507 tvb_format_text(tvb, boundary_start,
510 return body_part_start;
517 * Process a multipart body-part:
518 * MIME-part-headers [ line-end *OCTET ]
519 * line-end dashed-boundary transport-padding line-end
521 * If applicable, call a media subdissector.
523 * Return the offset to the start of the next body-part.
526 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
527 gint boundary_len, packet_info *pinfo, gint start,
528 gboolean *last_boundary)
530 proto_tree *subtree = NULL;
531 proto_item *ti = NULL;
532 gint offset = start, next_offset;
533 gint line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
534 char *parameters = NULL;
535 gint body_start, boundary_start, boundary_line_len;
537 char *content_type_str = NULL;
540 ti = proto_tree_add_text(tree, tvb, start, 0,
541 "Encapsulated multipart part");
542 subtree = proto_item_add_subtree(ti, ett_multipart_body);
545 * Process the MIME-part-headers
551 char *header_str = tvb_get_string(tvb, offset, next_offset - offset);
553 header_str = unfold_and_compact_mime_header(header_str, &colon_offset);
554 if (colon_offset <= 0) {
556 proto_tree_add_text(subtree, tvb, offset, next_offset - offset,
558 tvb_format_text(tvb, offset, next_offset - offset));
563 /* Split header name from header value */
564 header_str[colon_offset] = '\0';
565 hf_index = is_known_multipart_header(header_str, colon_offset);
567 if (hf_index == -1) {
569 proto_tree_add_text(subtree, tvb, offset,
570 next_offset - offset,
572 tvb_format_text(tvb, offset, next_offset - offset));
575 char *value_str = header_str + colon_offset + 1;
578 proto_tree_add_string_format(subtree,
579 hf_header_array[hf_index], tvb,
580 offset, next_offset - offset,
581 (const char *)value_str, "%s",
582 tvb_format_text(tvb, offset, next_offset - offset));
586 case POS_CONTENT_TYPE:
588 /* The Content-Type starts at colon_offset + 1 */
589 gint semicolon_offset = index_of_char(
592 if (semicolon_offset > 0) {
593 value_str[semicolon_offset] = '\0';
594 parameters = value_str + semicolon_offset + 1;
598 #if GLIB_MAJOR_VERSION < 2
599 content_type_str = g_strdup(value_str);
600 g_strdown(content_type_str);
602 content_type_str = g_ascii_strdown(value_str, -1);
612 offset = next_offset;
613 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
619 proto_tree_add_text(subtree, tvb, offset, next_offset - offset,
620 "%s", tvb_format_text(tvb, offset, next_offset - offset));
622 body_start = next_offset;
628 boundary_start = find_next_boundary(tvb, body_start, boundary, boundary_len,
629 &boundary_line_len, last_boundary);
630 if (boundary_start > 0) {
631 gint body_len = boundary_start - body_start;
632 tvbuff_t *tmp_tvb = tvb_new_subset(tvb, body_start,
635 if (content_type_str) {
639 void *save_private_data = pinfo->private_data;
642 pinfo->private_data = parameters;
644 * First try the dedicated multipart dissector table
646 dissected = dissector_try_string(multipart_media_subdissector_table,
647 content_type_str, tmp_tvb, pinfo, subtree);
650 * Fall back to the default media dissector table
652 dissected = dissector_try_string(media_type_dissector_table,
653 content_type_str, tmp_tvb, pinfo, subtree);
655 pinfo->private_data = save_private_data;
656 g_free(content_type_str);
657 content_type_str = NULL;
658 parameters = NULL; /* Shares same memory as content_type_str */
660 call_dissector(media_handle, tmp_tvb, pinfo, tree);
663 call_dissector(data_handle, tmp_tvb, pinfo, subtree);
666 if (*last_boundary == TRUE) {
667 proto_tree_add_text(tree, tvb,
668 boundary_start, boundary_line_len,
670 tvb_format_text(tvb, boundary_start,
673 proto_tree_add_text(tree, tvb,
674 boundary_start, boundary_line_len,
676 tvb_format_text(tvb, boundary_start,
680 return boundary_start + boundary_line_len;
687 * Call this method to actually dissect the multipart body.
688 * NOTE - Only do so if a boundary string has been found!
690 static void dissect_multipart(tvbuff_t *tvb, packet_info *pinfo,
693 proto_tree *subtree = NULL;
694 proto_item *ti = NULL;
695 multipart_info_t *m_info = get_multipart_info(pinfo);
696 gint header_start = 0;
700 gboolean last_boundary = FALSE;
702 if (m_info == NULL) {
704 * We can't get the required multipart information
706 proto_tree_add_text(tree, tvb, 0, -1,
707 "The multipart dissector could not find "
708 "the required boundary parameter.");
709 call_dissector(data_handle, tvb, pinfo, tree);
712 boundary = (guint8 *)m_info->boundary;
713 boundary_len = m_info->boundary_length;
714 /* Clean up the memory if an exception is thrown */
715 /* CLEANUP_PUSH(cleanup_multipart_info, m_info); */
717 /* Add stuff to the protocol tree */
719 ti = proto_tree_add_item(tree, proto_multipart,
721 subtree = proto_item_add_subtree(ti, ett_multipart);
722 proto_item_append_text(ti, ", Type: %s, Boundary: \"%s\"",
723 m_info->type, m_info->boundary);
724 proto_tree_add_string(subtree, hf_multipart_type,
725 tvb, 0, 0, pinfo->match_string);
729 * Make no entries in Protocol column and Info column on summary display,
730 * but stop sub-dissectors from clearing entered text in summary display.
732 if (check_col(pinfo->cinfo, COL_INFO))
733 col_set_fence(pinfo->cinfo, COL_INFO);
738 * Process the multipart preamble
740 header_start = process_preamble(subtree, tvb, boundary,
741 boundary_len, &last_boundary);
742 if (header_start == -1) {
743 call_dissector(data_handle, tvb, pinfo, subtree);
744 /* Clean up the dynamically allocated memory */
745 cleanup_multipart_info(m_info);
749 * Process the encapsulated bodies
751 while (last_boundary == FALSE) {
752 header_start = process_body_part(subtree, tvb, boundary, boundary_len,
753 pinfo, header_start, &last_boundary);
754 if (header_start == -1) {
755 /* Clean up the dynamically allocated memory */
756 cleanup_multipart_info(m_info);
761 * Process the multipart trailer
764 if (tvb_length_remaining(tvb, header_start) > 0) {
765 proto_tree_add_text(subtree, tvb, header_start, -1, "Trailer");
768 /* Clean up the dynamically allocated memory */
769 cleanup_multipart_info(m_info);
773 /* Returns index of method in multipart_headers */
775 is_known_multipart_header(const char *header_str, guint len)
779 for (i = 1; i < array_length(multipart_headers); i++) {
780 if (len == strlen(multipart_headers[i]) &&
781 strncasecmp(header_str, multipart_headers[i], len) == 0) {
790 * Register the protocol with Ethereal.
792 * This format is required because a script is used to build the C function
793 * that calls all the protocol registration.
797 proto_register_multipart(void)
800 /* Setup list of header fields See Section 1.6.1 for details */
801 static hf_register_info hf[] = {
802 { &hf_multipart_type,
804 "mime_multipart.type",
805 FT_STRING, BASE_NONE, NULL, 0x00,
806 "RFC 3261: MIME multipart encapsulation type", HFILL
809 { &hf_header_array[POS_CONTENT_DISPOSITION],
810 { "Content-Disposition",
811 "mime_multipart.header.content-disposition",
812 FT_STRING, BASE_NONE, NULL, 0x00,
813 "RFC 3261: Content-Disposition Header", HFILL
816 { &hf_header_array[POS_CONTENT_ENCODING],
817 { "Content-Encoding",
818 "mime_multipart.header.content-encoding",
819 FT_STRING, BASE_NONE, NULL, 0x00,
820 "RFC 3261: Content-Encoding Header", HFILL
823 { &hf_header_array[POS_CONTENT_LANGUAGE],
824 { "Content-Language",
825 "mime_multipart.header.content-language",
826 FT_STRING, BASE_NONE, NULL, 0x00,
827 "RFC 3261: Content-Language Header", HFILL
830 { &hf_header_array[POS_CONTENT_LENGTH],
832 "mime_multipart.header.content-length",
833 FT_STRING, BASE_NONE, NULL, 0x0,
834 "RFC 3261: Content-Length Header", HFILL
837 { &hf_header_array[POS_CONTENT_TYPE],
839 "mime_multipart.header.content-type",
840 FT_STRING, BASE_NONE,NULL,0x0,
841 "RFC 3261: Content-Type Header", HFILL
849 module_t *multipart_module;
852 * Setup protocol subtree array
854 static gint *ett[] = {
861 * Register the protocol name and description
863 proto_multipart = proto_register_protocol(
864 "MIME Multipart Media Encapsulation",
869 * Required function calls to register
870 * the header fields and subtrees used.
872 proto_register_field_array(proto_multipart, hf, array_length(hf));
873 proto_register_subtree_array(ett, array_length(ett));
876 * Get the content type and Internet media type table
878 media_type_dissector_table = find_dissector_table("media_type");
880 multipart_module = prefs_register_protocol(proto_multipart, NULL);
882 prefs_register_bool_preference(multipart_module,
883 "display_unknown_body_as_text",
884 "Display bodies without media type as text",
885 "Display multipart bodies with no media type dissector"
886 " as raw text (may cause problems with binary data).",
887 &display_unknown_body_as_text);
890 * Dissectors requiring different behavior in cases where the media
891 * is contained in a multipart entity should register their multipart
892 * dissector in the dissector table below, which is similar to the
893 * "media_type" dissector table defined in the HTTP dissector code.
895 multipart_media_subdissector_table = register_dissector_table(
896 "multipart_media_type",
897 "Internet media type (for multipart processing)",
898 FT_STRING, BASE_NONE);
902 /* If this dissector uses sub-dissector registration add a registration routine.
903 This format is required because a script is used to find these routines and
904 create the code that calls these routines.
907 proto_reg_handoff_multipart(void)
909 dissector_handle_t multipart_handle;
912 * When we cannot display the data, call the data dissector.
913 * When there is no dissector for the given media, call the media dissector.
915 data_handle = find_dissector("data");
916 media_handle = find_dissector("media");
919 * Handle for multipart dissection
921 multipart_handle = create_dissector_handle(
922 dissect_multipart, proto_multipart);
924 dissector_add_string("media_type",
925 "multipart/mixed", multipart_handle);
926 dissector_add_string("media_type",
927 "multipart/related", multipart_handle);
928 dissector_add_string("media_type",
929 "multipart/alternative", multipart_handle);
930 dissector_add_string("media_type",
931 "multipart/form-data", multipart_handle);