2 * Routines for multipart media encapsulation dissection
3 * Copyright 2004, Anders Broman.
4 * Copyright 2004, Olivier Biot.
8 * Refer to the AUTHORS file or the AUTHORS section in the man page
9 * for contacting the author(s) of this file.
11 * Ethereal - Network traffic analyzer
12 * By Gerald Combs <gerald@ethereal.com>
13 * Copyright 1998 Gerald Combs
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
30 * References for "media-type multipart/mixed :
31 * http://www.iana.org/assignments/media-types/index.html
32 * http://www.rfc-editor.org/rfc/rfc2045.txt
33 * http://www.rfc-editor.org/rfc/rfc2046.txt
34 * http://www.rfc-editor.org/rfc/rfc2047.txt
35 * http://www.rfc-editor.org/rfc/rfc2048.txt
36 * http://www.rfc-editor.org/rfc/rfc2049.txt
38 * Part of the code is modeled from the SIP and HTTP dissectors
40 * General format of a MIME multipart document:
41 * [ preamble line-end ]
42 * dash-boundary transport-padding line-end
45 * close-delimiter transport-padding
46 * [ line-end epilogue ]
49 * dash-boundary := "--" boundary
50 * encapsulation := delimiter transport-padding line-end body-part
51 * delimiter := line-end body-part
52 * close-delimiter := delimiter "--"
53 * body-part := MIME-part-headers [ line-end *OCTET ]
54 * transport-padding := *LWSP-char
56 * Note that line-end is often a LF instead of a CRLF.
66 #include <epan/prefs.h>
70 #ifdef NEED_SNPRINTF_H
71 # include "snprintf.h"
74 #include <epan/packet.h>
76 /* Dissector table for media requiring special attention in multipart
78 static dissector_table_t multipart_media_subdissector_table;
80 /* Initialize the protocol and registered fields */
81 static int proto_multipart = -1;
83 /* Initialize the subtree pointers */
84 static gint ett_multipart = -1;
85 static gint ett_multipart_main = -1;
86 static gint ett_multipart_body = -1;
88 /* Not sure that compact_name exists for multipart, but choose to keep
89 * the structure from SIP dissector, all the content- is also from SIP */
97 static const multipart_header_t multipart_headers[] = {
98 { "Unknown-header", NULL }, /* Pad so that the real headers start at index 1 */
99 { "Content-Disposition", NULL },
100 { "Content-Encoding", "e" },
101 { "Content-Language", NULL },
102 { "Content-Length", "l" },
103 { "Content-Type", "c" },
106 #define POS_CONTENT_DISPOSITION 1
107 #define POS_CONTENT_ENCODING 2
108 #define POS_CONTENT_LANGUAGE 3
109 #define POS_CONTENT_LENGTH 4
110 #define POS_CONTENT_TYPE 5
112 /* Initialize the header fields */
113 static gint hf_multipart_type = -1;
114 static gint hf_header_array[] = {
115 -1, /* "Unknown-header" - Pad so that the real headers start at index 1 */
116 -1, /* "Content-Disposition" */
117 -1, /* "Content-Encoding" */
118 -1, /* "Content-Language" */
119 -1, /* "Content-Length" */
120 -1, /* "Content-Type" */
123 /* Define media_type/Content type table */
124 static dissector_table_t media_type_dissector_table;
126 /* Data and media dissector handles */
127 static dissector_handle_t data_handle;
128 static dissector_handle_t media_handle;
130 /* Determins if bodies with no media type dissector shoud be displayed
131 * as raw text, may cause problems with images sound etc
132 * TODO improve to check for different content types ?
134 static gboolean display_unknown_body_as_text = FALSE;
138 const char *type; /* Type of multipart */
139 char *boundary; /* Boundary string (enclosing quotes removed if any) */
140 guint boundary_length; /* Length of the boundary string */
146 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
147 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
149 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
150 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
152 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
153 gint boundary_len, gboolean *last_boundary);
155 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
156 gint boundary_len, packet_info *pinfo, gint start,
157 gboolean *last_boundary);
159 is_known_multipart_header(const char *header_str, guint len);
161 index_of_char(const char *str, const char c);
163 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset);
166 * Unfold and clean up a MIME-like header, and process LWS as follows:
167 * o Preserves LWS in quoted text
168 * o Remove LWS before and after a separator
169 * o Remove trailing LWS
170 * o Replace other LWS with a single space
171 * Set value to the start of the value
172 * Return the cleaned-up RFC2822 header (buffer must be freed).
175 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset)
177 const char *p = lines;
180 char sep_seen = 0; /* Did we see a separator ":;," */
181 char lws = FALSE; /* Did we see LWS (incl. folding) */
184 if (! lines) return NULL;
187 ret = g_malloc(strlen(lines) + 1);
192 lws = FALSE; /* Prevent leading LWS from showing up */
193 if (colon == -1) {/* First colon */
196 *(q++) = sep_seen = c;
198 } else if (c == ';' || c == ',' || c == '=') {
199 lws = FALSE; /* Prevent leading LWS from showing up */
200 *(q++) = sep_seen = c;
202 } else if (c == ' ' || c == '\t') {
205 } else if (c == '\n') {
206 lws = FALSE; /* Skip trailing LWS */
208 if (c == ' ' || c == '\t') { /* Header unfolding */
212 *q = c = 0; /* Stop */
215 } else if (c == '\r') {
220 if (c == ' ' || c == '\t') { /* Header unfolding */
224 *q = c = 0; /* Stop */
227 } else if (c == ' ' || c == '\t') { /* Header unfolding */
231 *q = c = 0; /* Stop */
234 } else if (c == '"') { /* Start of quoted-string */
240 p++; /* Skip closing quote */
244 } else { /* Regular character */
265 *first_colon_offset = colon;
269 /* Return the index of a given char in the given string,
270 * or -1 if not found.
273 index_of_char(const char *str, const char c)
278 while (*p && *p != c) {
288 /* Retrieve the media information from pinfo->private_data,
289 * and compute the boundary string and its length.
290 * Return a pointer to a filled-in multipart_info_t, or NULL on failure.
292 * Boundary delimiters must not appear within the encapsulated material,
293 * and must be no longer than 70 characters, not counting the two
294 * leading hyphens. (quote from rfc2046)
296 static multipart_info_t *
297 get_multipart_info(packet_info *pinfo)
299 const char *start, *p;
301 multipart_info_t *m_info = NULL;
302 const char *type = pinfo->match_string;
306 if ((type == NULL) || (pinfo->private_data == NULL)) {
308 * We need both a content type AND parameters
309 * for multipart dissection.
314 /* Clean up the parameters */
315 parameters = unfold_and_compact_mime_header(pinfo->private_data, &dummy);
318 * Process the private data
319 * The parameters must contain the boundary string
323 if (strncasecmp(p, "boundary=", 9) == 0)
325 /* Skip to next parameter */
332 p++; /* Skip semicolon */
333 while ((*p) && isspace((guchar)*p))
334 p++; /* Skip white space */
343 * Process the parameter value
345 if (start[0] == '"') {
347 * Boundary string is a quoted-string
349 start++; /* Skip the quote */
350 len = index_of_char(start, '"');
360 * Look for end of boundary
364 if (*p == ';' || isspace((guchar)*p))
371 * There is a value for the boundary string
373 m_info = g_malloc(sizeof(multipart_info_t));
375 m_info->boundary = g_strndup(start, len);
376 m_info->boundary_length = len;
383 cleanup_multipart_info(void *data)
385 multipart_info_t *m_info = data;
387 if (m_info->boundary)
388 g_free(m_info->boundary);
394 * The first boundary does not implicitly contain the leading
397 * Return the offset to the 1st byte of the boundary delimiter line.
398 * Set boundary_line_len to the length of the entire boundary delimiter.
399 * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
402 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
403 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
405 gint offset = start, next_offset, line_len, boundary_start;
407 while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
408 boundary_start = offset;
409 if (((tvb_strneql(tvb, offset, (const guint8 *)"--", 2) == 0)
410 && (tvb_strneql(tvb, offset + 2, boundary, boundary_len) == 0)))
412 /* Boundary string; now check if last */
413 if ((tvb_length_remaining(tvb, offset + 2 + boundary_len + 2) >= 0)
414 && (tvb_strneql(tvb, offset + 2 + boundary_len,
415 (const guint8 *)"--", 2) == 0)) {
416 *last_boundary = TRUE;
418 *last_boundary = FALSE;
420 /* Look for line end of the boundary line */
421 line_len = tvb_find_line_end(tvb, offset, -1, &offset, FALSE);
422 if (line_len == -1) {
423 *boundary_line_len = -1;
425 *boundary_line_len = offset - boundary_start;
427 return boundary_start;
429 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
430 if (line_len == -1) {
433 offset = next_offset;
440 * Unless the first boundary, subsequent boundaries include a line-end sequence
441 * before the dashed boundary string.
443 * Return the offset to the 1st byte of the boundary delimiter line.
444 * Set boundary_line_len to the length of the entire boundary delimiter.
445 * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
448 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
449 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
451 gint offset = start, next_offset, line_len, boundary_start;
453 while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
454 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
455 if (line_len == -1) {
458 boundary_start = offset + line_len;
459 if (((tvb_strneql(tvb, next_offset, (const guint8 *)"--", 2) == 0)
460 && (tvb_strneql(tvb, next_offset + 2, boundary, boundary_len) == 0)))
462 /* Boundary string; now check if last */
463 if ((tvb_length_remaining(tvb, next_offset + 2 + boundary_len + 2) >= 0)
464 && (tvb_strneql(tvb, next_offset + 2 + boundary_len,
465 (const guint8 *)"--", 2) == 0)) {
466 *last_boundary = TRUE;
468 *last_boundary = FALSE;
470 /* Look for line end of the boundary line */
471 line_len = tvb_find_line_end(tvb, next_offset, -1, &offset, FALSE);
472 if (line_len == -1) {
473 *boundary_line_len = -1;
475 *boundary_line_len = offset - boundary_start;
477 return boundary_start;
479 offset = next_offset;
486 * Process the multipart preamble:
487 * [ preamble line-end ] dashed-boundary transport-padding line-end
489 * Return the offset to the start of the first body-part.
492 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
493 gint boundary_len, gboolean *last_boundary)
495 gint boundary_start, boundary_line_len, body_part_start;
498 boundary_start = find_first_boundary(tvb, 0, boundary, boundary_len,
499 &boundary_line_len, last_boundary);
500 if (boundary_start == 0) {
502 proto_tree_add_text(tree, tvb, boundary_start, boundary_line_len,
503 "First boundary: %s",
504 tvb_format_text(tvb, boundary_start, boundary_line_len));
506 return boundary_start + boundary_line_len;
507 } else if (boundary_start > 0) {
508 if (boundary_line_len > 0) {
509 gint body_part_start = boundary_start + boundary_line_len;
512 if (body_part_start > 0) {
513 proto_tree_add_text(tree, tvb, 0, body_part_start,
516 proto_tree_add_text(tree, tvb, boundary_start,
517 boundary_line_len, "First boundary: %s",
518 tvb_format_text(tvb, boundary_start,
521 return body_part_start;
528 * Process a multipart body-part:
529 * MIME-part-headers [ line-end *OCTET ]
530 * line-end dashed-boundary transport-padding line-end
532 * If applicable, call a media subdissector.
534 * Return the offset to the start of the next body-part.
537 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
538 gint boundary_len, packet_info *pinfo, gint start,
539 gboolean *last_boundary)
541 proto_tree *subtree = NULL;
542 proto_item *ti = NULL;
543 gint offset = start, next_offset;
544 gint line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
545 char *parameters = NULL;
546 gint body_start, boundary_start, boundary_line_len;
548 char *content_type_str = NULL;
551 ti = proto_tree_add_text(tree, tvb, start, 0,
552 "Encapsulated multipart part");
553 subtree = proto_item_add_subtree(ti, ett_multipart_body);
556 * Process the MIME-part-headers
562 char *hdr_str = ep_tvb_get_string(tvb, offset, next_offset - offset);
565 header_str = unfold_and_compact_mime_header(hdr_str, &colon_offset);
566 if (colon_offset <= 0) {
568 proto_tree_add_text(subtree, tvb, offset, next_offset - offset,
570 tvb_format_text(tvb, offset, next_offset - offset));
575 /* Split header name from header value */
576 header_str[colon_offset] = '\0';
577 hf_index = is_known_multipart_header(header_str, colon_offset);
579 if (hf_index == -1) {
581 proto_tree_add_text(subtree, tvb, offset,
582 next_offset - offset,
584 tvb_format_text(tvb, offset, next_offset - offset));
587 char *value_str = header_str + colon_offset + 1;
590 proto_tree_add_string_format(subtree,
591 hf_header_array[hf_index], tvb,
592 offset, next_offset - offset,
593 (const char *)value_str, "%s",
594 tvb_format_text(tvb, offset, next_offset - offset));
598 case POS_CONTENT_TYPE:
600 /* The Content-Type starts at colon_offset + 1 */
601 gint semicolon_offset = index_of_char(
604 if (semicolon_offset > 0) {
605 value_str[semicolon_offset] = '\0';
606 parameters = value_str + semicolon_offset + 1;
610 #if GLIB_MAJOR_VERSION < 2
611 content_type_str = g_strdup(value_str);
612 g_strdown(content_type_str);
614 content_type_str = g_ascii_strdown(value_str, -1);
625 offset = next_offset;
626 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
632 proto_tree_add_text(subtree, tvb, offset, next_offset - offset,
633 "%s", tvb_format_text(tvb, offset, next_offset - offset));
635 body_start = next_offset;
641 boundary_start = find_next_boundary(tvb, body_start, boundary, boundary_len,
642 &boundary_line_len, last_boundary);
643 if (boundary_start > 0) {
644 gint body_len = boundary_start - body_start;
645 tvbuff_t *tmp_tvb = tvb_new_subset(tvb, body_start,
648 if (content_type_str) {
652 void *save_private_data = pinfo->private_data;
655 pinfo->private_data = parameters;
657 * First try the dedicated multipart dissector table
659 dissected = dissector_try_string(multipart_media_subdissector_table,
660 content_type_str, tmp_tvb, pinfo, subtree);
663 * Fall back to the default media dissector table
665 dissected = dissector_try_string(media_type_dissector_table,
666 content_type_str, tmp_tvb, pinfo, subtree);
668 pinfo->private_data = save_private_data;
669 g_free(content_type_str);
670 content_type_str = NULL;
671 parameters = NULL; /* Shares same memory as content_type_str */
673 call_dissector(media_handle, tmp_tvb, pinfo, tree);
676 call_dissector(data_handle, tmp_tvb, pinfo, subtree);
679 if (*last_boundary == TRUE) {
680 proto_tree_add_text(tree, tvb,
681 boundary_start, boundary_line_len,
683 tvb_format_text(tvb, boundary_start,
686 proto_tree_add_text(tree, tvb,
687 boundary_start, boundary_line_len,
689 tvb_format_text(tvb, boundary_start,
693 return boundary_start + boundary_line_len;
700 * Call this method to actually dissect the multipart body.
701 * NOTE - Only do so if a boundary string has been found!
703 static void dissect_multipart(tvbuff_t *tvb, packet_info *pinfo,
706 proto_tree *subtree = NULL;
707 proto_item *ti = NULL;
708 multipart_info_t *m_info = get_multipart_info(pinfo);
709 gint header_start = 0;
713 gboolean last_boundary = FALSE;
715 if (m_info == NULL) {
717 * We can't get the required multipart information
719 proto_tree_add_text(tree, tvb, 0, -1,
720 "The multipart dissector could not find "
721 "the required boundary parameter.");
722 call_dissector(data_handle, tvb, pinfo, tree);
725 boundary = (guint8 *)m_info->boundary;
726 boundary_len = m_info->boundary_length;
727 /* Clean up the memory if an exception is thrown */
728 /* CLEANUP_PUSH(cleanup_multipart_info, m_info); */
730 /* Add stuff to the protocol tree */
732 ti = proto_tree_add_item(tree, proto_multipart,
734 subtree = proto_item_add_subtree(ti, ett_multipart);
735 proto_item_append_text(ti, ", Type: %s, Boundary: \"%s\"",
736 m_info->type, m_info->boundary);
737 proto_tree_add_string(subtree, hf_multipart_type,
738 tvb, 0, 0, pinfo->match_string);
742 * Make no entries in Protocol column and Info column on summary display,
743 * but stop sub-dissectors from clearing entered text in summary display.
745 if (check_col(pinfo->cinfo, COL_INFO))
746 col_set_fence(pinfo->cinfo, COL_INFO);
751 * Process the multipart preamble
753 header_start = process_preamble(subtree, tvb, boundary,
754 boundary_len, &last_boundary);
755 if (header_start == -1) {
756 call_dissector(data_handle, tvb, pinfo, subtree);
757 /* Clean up the dynamically allocated memory */
758 cleanup_multipart_info(m_info);
762 * Process the encapsulated bodies
764 while (last_boundary == FALSE) {
765 header_start = process_body_part(subtree, tvb, boundary, boundary_len,
766 pinfo, header_start, &last_boundary);
767 if (header_start == -1) {
768 /* Clean up the dynamically allocated memory */
769 cleanup_multipart_info(m_info);
774 * Process the multipart trailer
777 if (tvb_length_remaining(tvb, header_start) > 0) {
778 proto_tree_add_text(subtree, tvb, header_start, -1, "Trailer");
781 /* Clean up the dynamically allocated memory */
782 cleanup_multipart_info(m_info);
786 /* Returns index of method in multipart_headers */
788 is_known_multipart_header(const char *header_str, guint len)
792 for (i = 1; i < array_length(multipart_headers); i++) {
793 if (len == strlen(multipart_headers[i].name) &&
794 strncasecmp(header_str, multipart_headers[i].name, len) == 0)
796 if (multipart_headers[i].compact_name != NULL &&
797 len == strlen(multipart_headers[i].compact_name) &&
798 strncasecmp(header_str, multipart_headers[i].compact_name, len) == 0)
806 * Register the protocol with Ethereal.
808 * This format is required because a script is used to build the C function
809 * that calls all the protocol registration.
813 proto_register_multipart(void)
816 /* Setup list of header fields See Section 1.6.1 for details */
817 static hf_register_info hf[] = {
818 { &hf_multipart_type,
820 "mime_multipart.type",
821 FT_STRING, BASE_NONE, NULL, 0x00,
822 "RFC 3261: MIME multipart encapsulation type", HFILL
825 { &hf_header_array[POS_CONTENT_DISPOSITION],
826 { "Content-Disposition",
827 "mime_multipart.header.content-disposition",
828 FT_STRING, BASE_NONE, NULL, 0x00,
829 "RFC 3261: Content-Disposition Header", HFILL
832 { &hf_header_array[POS_CONTENT_ENCODING],
833 { "Content-Encoding",
834 "mime_multipart.header.content-encoding",
835 FT_STRING, BASE_NONE, NULL, 0x00,
836 "RFC 3261: Content-Encoding Header", HFILL
839 { &hf_header_array[POS_CONTENT_LANGUAGE],
840 { "Content-Language",
841 "mime_multipart.header.content-language",
842 FT_STRING, BASE_NONE, NULL, 0x00,
843 "RFC 3261: Content-Language Header", HFILL
846 { &hf_header_array[POS_CONTENT_LENGTH],
848 "mime_multipart.header.content-length",
849 FT_STRING, BASE_NONE, NULL, 0x0,
850 "RFC 3261: Content-Length Header", HFILL
853 { &hf_header_array[POS_CONTENT_TYPE],
855 "mime_multipart.header.content-type",
856 FT_STRING, BASE_NONE,NULL,0x0,
857 "RFC 3261: Content-Type Header", HFILL
865 module_t *multipart_module;
868 * Setup protocol subtree array
870 static gint *ett[] = {
877 * Register the protocol name and description
879 proto_multipart = proto_register_protocol(
880 "MIME Multipart Media Encapsulation",
885 * Required function calls to register
886 * the header fields and subtrees used.
888 proto_register_field_array(proto_multipart, hf, array_length(hf));
889 proto_register_subtree_array(ett, array_length(ett));
892 * Get the content type and Internet media type table
894 media_type_dissector_table = find_dissector_table("media_type");
896 multipart_module = prefs_register_protocol(proto_multipart, NULL);
898 prefs_register_bool_preference(multipart_module,
899 "display_unknown_body_as_text",
900 "Display bodies without media type as text",
901 "Display multipart bodies with no media type dissector"
902 " as raw text (may cause problems with binary data).",
903 &display_unknown_body_as_text);
906 * Dissectors requiring different behavior in cases where the media
907 * is contained in a multipart entity should register their multipart
908 * dissector in the dissector table below, which is similar to the
909 * "media_type" dissector table defined in the HTTP dissector code.
911 multipart_media_subdissector_table = register_dissector_table(
912 "multipart_media_type",
913 "Internet media type (for multipart processing)",
914 FT_STRING, BASE_NONE);
918 /* If this dissector uses sub-dissector registration add a registration routine.
919 This format is required because a script is used to find these routines and
920 create the code that calls these routines.
923 proto_reg_handoff_multipart(void)
925 dissector_handle_t multipart_handle;
928 * When we cannot display the data, call the data dissector.
929 * When there is no dissector for the given media, call the media dissector.
931 data_handle = find_dissector("data");
932 media_handle = find_dissector("media");
935 * Handle for multipart dissection
937 multipart_handle = create_dissector_handle(
938 dissect_multipart, proto_multipart);
940 dissector_add_string("media_type",
941 "multipart/mixed", multipart_handle);
942 dissector_add_string("media_type",
943 "multipart/related", multipart_handle);
944 dissector_add_string("media_type",
945 "multipart/alternative", multipart_handle);
946 dissector_add_string("media_type",
947 "multipart/form-data", multipart_handle);