2 * Routines for multipart media encapsulation dissection
3 * Copyright 2004, Anders Broman.
4 * Copyright 2004, Olivier Biot.
6 * $Id: packet-multipart.c,v 1.8 2004/03/08 22:03:59 obiot Exp $
8 * Refer to the AUTHORS file or the AUTHORS section in the man page
9 * for contacting the author(s) of this file.
11 * Ethereal - Network traffic analyzer
12 * By Gerald Combs <gerald@ethereal.com>
13 * Copyright 1998 Gerald Combs
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
30 * References for "media-type multipart/mixed :
31 * http://www.iana.org/assignments/media-types/index.html
32 * http://www.rfc-editor.org/rfc/rfc2045.txt
33 * http://www.rfc-editor.org/rfc/rfc2046.txt
34 * http://www.rfc-editor.org/rfc/rfc2047.txt
35 * http://www.rfc-editor.org/rfc/rfc2048.txt
36 * http://www.rfc-editor.org/rfc/rfc2049.txt
38 * Part of the code is modeled from the SIP and HTTP dissectors
40 * General format of a MIME multipart document:
41 * [ preamble line-end ]
42 * dash-boundary transport-padding line-end
45 * close-delimiter transport-padding
46 * [ line-end epilogue ]
49 * dash-boundary := "--" boundary
50 * encapsulation := delimiter transport-padding line-end body-part
51 * delimiter := line-end body-part
52 * close-delimiter := delimiter "--"
53 * body-part := MIME-part-headers [ line-end *OCTET ]
54 * transport-padding := *LWSP-char
56 * Note that line-end is often a LF instead of a CRLF.
70 #ifdef NEED_SNPRINTF_H
71 # include "snprintf.h"
74 #include <epan/packet.h>
76 /* Dissector table for media requiring special attention in multipart
78 static dissector_table_t multipart_media_subdissector_table;
80 /* Initialize the protocol and registered fields */
81 static int proto_multipart = -1;
83 /* Initialize the subtree pointers */
84 static gint ett_multipart = -1;
85 static gint ett_multipart_main = -1;
86 static gint ett_multipart_body = -1;
88 /* Not sure that compact_name exists for multipart, but choose to keep
89 * the structure from SIP dissector, all the content- is also from SIP */
92 static const char *multipart_headers[] = {
93 "Unknown-header", /* Pad so that the real headers start at index 1 */
94 "Content-Disposition",
101 #define POS_CONTENT_DISPOSITION 1
102 #define POS_CONTENT_ENCODING 2
103 #define POS_CONTENT_LANGUAGE 3
104 #define POS_CONTENT_LENGTH 4
105 #define POS_CONTENT_TYPE 5
107 /* Initialize the header fields */
108 static gint hf_multipart_type = -1;
109 static gint hf_header_array[] = {
110 -1, /* "Unknown-header" - Pad so that the real headers start at index 1 */
111 -1, /* "Content-Disposition" */
112 -1, /* "Content-Encoding" */
113 -1, /* "Content-Language" */
114 -1, /* "Content-Length" */
115 -1, /* "Content-Type" */
118 /* Define media_type/Content type table */
119 static dissector_table_t media_type_dissector_table;
121 /* Data dissector handle */
122 static dissector_handle_t data_handle;
124 /* Determins if bodies with no media type dissector shoud be displayed
125 * as raw text, may cause problems with images sound etc
126 * TODO improve to check for different content types ?
128 static gboolean display_unknown_body_as_text = FALSE;
132 const char *type; /* Type of multipart */
133 char *boundary; /* Boundary string (enclosing quotes removed if any) */
134 guint boundary_length; /* Length of the boundary string */
140 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
141 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
143 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
144 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary);
146 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
147 gint boundary_len, gboolean *last_boundary);
149 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
150 gint boundary_len, packet_info *pinfo, gint start,
151 gboolean *last_boundary);
153 is_known_multipart_header(const char *header_str, guint len);
155 index_of_char(const char *str, const char c);
157 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset);
160 * Unfold and clean up a MIME-like header, and process LWS as follows:
161 * o Preserves LWS in quoted text
162 * o Remove LWS before and after a separator
163 * o Remove trailing LWS
164 * o Replace other LWS with a single space
165 * Set value to the start of the value
166 * Return the cleaned-up RFC2822 header (buffer must be freed).
169 unfold_and_compact_mime_header(const char *lines, gint *first_colon_offset)
171 const char *p = lines;
174 char sep_seen = 0; /* Did we see a separator ":;," */
175 char lws = FALSE; /* Did we see LWS (incl. folding) */
178 if (! lines) return NULL;
181 ret = g_malloc(strlen(lines) + 1);
186 lws = FALSE; /* Prevent leading LWS from showing up */
187 if (colon == -1) {/* First colon */
190 *(q++) = sep_seen = c;
192 } else if (c == ';' || c == ',' || c == '=') {
193 lws = FALSE; /* Prevent leading LWS from showing up */
194 *(q++) = sep_seen = c;
196 } else if (c == ' ' || c == '\t') {
199 } else if (c == '\n') {
200 lws = FALSE; /* Skip trailing LWS */
202 if (c == ' ' || c == '\t') { /* Header unfolding */
206 *q = c = 0; /* Stop */
209 } else if (c == '\r') {
214 if (c == ' ' || c == '\t') { /* Header unfolding */
218 *q = c = 0; /* Stop */
221 } else if (c == ' ' || c == '\t') { /* Header unfolding */
225 *q = c = 0; /* Stop */
228 } else if (c == '"') { /* Start of quoted-string */
234 p++; /* Skip closing quote */
238 } else { /* Regular character */
259 *first_colon_offset = colon;
263 /* Return the index of a given char in the given string,
264 * or -1 if not found.
267 index_of_char(const char *str, const char c)
272 while (*p && *p != c) {
282 /* Retrieve the media information from pinfo->private_data,
283 * and compute the boundary string and its length.
284 * Return a pointer to a filled-in multipart_info_t, or NULL on failure.
286 * Boundary delimiters must not appear within the encapsulated material,
287 * and must be no longer than 70 characters, not counting the two
288 * leading hyphens. (quote from rfc2046)
290 static multipart_info_t *
291 get_multipart_info(packet_info *pinfo)
293 const char *start, *p;
295 multipart_info_t *m_info = NULL;
296 const char *type = pinfo->match_string;
297 const char *parameters = pinfo->private_data;
300 if ((type == NULL) || (parameters == NULL)) {
302 * We need both a content type AND parameters
303 * for multipart dissection.
308 /* Clean up the parameters */
309 parameters = unfold_and_compact_mime_header(parameters, &dummy);
312 * Process the private data
313 * The parameters must contain the boundary string
317 if (strncasecmp(p, "boundary=", 9) == 0)
319 /* Skip to next parameter */
323 p++; /* Skip semicolon */
324 while ((*p) && isspace((guchar)*p))
325 p++; /* Skip white space */
333 * Process the parameter value
335 if (start[0] == '"') {
337 * Boundary string is a quoted-string
339 start++; /* Skip the quote */
340 len = index_of_char(start, '"');
349 * Look for end of boundary
353 if (*p == ';' || isspace((guchar)*p))
360 * There is a value for the boundary string
362 m_info = g_malloc(sizeof(multipart_info_t));
364 m_info->boundary = g_strndup(start, len);
365 m_info->boundary_length = len;
371 cleanup_multipart_info(void *data)
373 multipart_info_t *m_info = data;
375 if (m_info->boundary)
376 g_free(m_info->boundary);
382 * The first boundary does not implicitly contain the leading
385 * Return the offset to the 1st byte of the boundary delimiter line.
386 * Set boundary_line_len to the length of the entire boundary delimiter.
387 * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
390 find_first_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
391 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
393 gint offset = start, next_offset, line_len, boundary_start;
395 while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
396 boundary_start = offset;
397 if (((tvb_strneql(tvb, offset, (const guint8 *)"--", 2) == 0)
398 && (tvb_strneql(tvb, offset + 2, boundary, boundary_len) == 0)))
400 /* Boundary string; now check if last */
401 if ((tvb_length_remaining(tvb, offset + 2 + boundary_len + 2) >= 0)
402 && (tvb_strneql(tvb, offset + 2 + boundary_len,
403 (const guint8 *)"--", 2) == 0)) {
404 *last_boundary = TRUE;
406 *last_boundary = FALSE;
408 /* Look for line end of the boundary line */
409 line_len = tvb_find_line_end(tvb, offset, -1, &offset, FALSE);
410 if (line_len == -1) {
411 *boundary_line_len = -1;
413 *boundary_line_len = offset - boundary_start;
415 return boundary_start;
417 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
418 if (line_len == -1) {
421 offset = next_offset;
428 * Unless the first boundary, subsequent boundaries include a line-end sequence
429 * before the dashed boundary string.
431 * Return the offset to the 1st byte of the boundary delimiter line.
432 * Set boundary_line_len to the length of the entire boundary delimiter.
433 * Set last_boundary to TRUE if we've seen the last-boundary delimiter.
436 find_next_boundary(tvbuff_t *tvb, gint start, const guint8 *boundary,
437 gint boundary_len, gint *boundary_line_len, gboolean *last_boundary)
439 gint offset = start, next_offset, line_len, boundary_start;
441 while (tvb_length_remaining(tvb, offset + 2 + boundary_len) > 0) {
442 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
443 if (line_len == -1) {
446 boundary_start = offset + line_len;
447 if (((tvb_strneql(tvb, next_offset, (const guint8 *)"--", 2) == 0)
448 && (tvb_strneql(tvb, next_offset + 2, boundary, boundary_len) == 0)))
450 /* Boundary string; now check if last */
451 if ((tvb_length_remaining(tvb, next_offset + 2 + boundary_len + 2) >= 0)
452 && (tvb_strneql(tvb, next_offset + 2 + boundary_len,
453 (const guint8 *)"--", 2) == 0)) {
454 *last_boundary = TRUE;
456 *last_boundary = FALSE;
458 /* Look for line end of the boundary line */
459 line_len = tvb_find_line_end(tvb, next_offset, -1, &offset, FALSE);
460 if (line_len == -1) {
461 *boundary_line_len = -1;
463 *boundary_line_len = offset - boundary_start;
465 return boundary_start;
467 offset = next_offset;
474 * Process the multipart preamble:
475 * [ preamble line-end ] dashed-boundary transport-padding line-end
477 * Return the offset to the start of the first body-part.
480 process_preamble(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
481 gint boundary_len, gboolean *last_boundary)
483 gint boundary_start, boundary_line_len, body_part_start;
486 boundary_start = find_first_boundary(tvb, 0, boundary, boundary_len,
487 &boundary_line_len, last_boundary);
488 if (boundary_start == 0) {
490 proto_tree_add_text(tree, tvb, boundary_start, boundary_line_len,
491 "First boundary: %s",
492 tvb_format_text(tvb, boundary_start, boundary_line_len));
494 return boundary_start + boundary_line_len;
495 } else if (boundary_start > 0) {
496 if (boundary_line_len > 0) {
497 gint body_part_start = boundary_start + boundary_line_len;
500 if (body_part_start > 0) {
501 proto_tree_add_text(tree, tvb, 0, body_part_start,
504 proto_tree_add_text(tree, tvb, boundary_start,
505 boundary_line_len, "First boundary: %s",
506 tvb_format_text(tvb, boundary_start,
509 return body_part_start;
516 * Process a multipart body-part:
517 * MIME-part-headers [ line-end *OCTET ]
518 * line-end dashed-boundary transport-padding line-end
520 * If applicable, call a media subdissector.
522 * Return the offset to the start of the next body-part.
525 process_body_part(proto_tree *tree, tvbuff_t *tvb, const guint8 *boundary,
526 gint boundary_len, packet_info *pinfo, gint start,
527 gboolean *last_boundary)
529 proto_tree *subtree = NULL;
530 proto_item *ti = NULL;
531 gint offset = start, next_offset;
532 gint line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
533 char *parameters = NULL;
534 gint body_start, boundary_start, boundary_line_len;
536 char *content_type_str = NULL;
539 ti = proto_tree_add_text(tree, tvb, start, 0,
540 "Encapsulated multipart part");
541 subtree = proto_item_add_subtree(ti, ett_multipart_body);
544 * Process the MIME-part-headers
550 char *header_str = tvb_get_string(tvb, offset, next_offset - offset);
552 header_str = unfold_and_compact_mime_header(header_str, &colon_offset);
553 if (colon_offset <= 0) {
555 proto_tree_add_text(subtree, tvb, offset, next_offset - offset,
557 tvb_format_text(tvb, offset, next_offset - offset));
562 /* Split header name from header value */
563 header_str[colon_offset] = '\0';
564 hf_index = is_known_multipart_header(header_str, colon_offset);
566 if (hf_index == -1) {
568 proto_tree_add_text(subtree, tvb, offset,
569 next_offset - offset,
571 tvb_format_text(tvb, offset, next_offset - offset));
574 char *value_str = header_str + colon_offset + 1;
577 proto_tree_add_string_format(subtree,
578 hf_header_array[hf_index], tvb,
579 offset, next_offset - offset,
580 (const char *)value_str, "%s",
581 tvb_format_text(tvb, offset, next_offset - offset));
585 case POS_CONTENT_TYPE:
587 /* The Content-Type starts at colon_offset + 1 */
588 gint semicolon_offset = index_of_char(
591 if (semicolon_offset > 0) {
592 value_str[semicolon_offset] = '\0';
593 parameters = value_str + semicolon_offset + 1;
597 #if GLIB_MAJOR_VERSION < 2
598 content_type_str = g_strdup(value_str);
599 g_strdown(content_type_str);
601 content_type_str = g_ascii_strdown(value_str, -1);
611 offset = next_offset;
612 line_len = tvb_find_line_end(tvb, offset, -1, &next_offset, FALSE);
618 proto_tree_add_text(subtree, tvb, offset, next_offset - offset,
619 "%s", tvb_format_text(tvb, offset, next_offset - offset));
621 body_start = next_offset;
627 boundary_start = find_next_boundary(tvb, body_start, boundary, boundary_len,
628 &boundary_line_len, last_boundary);
629 if (boundary_start > 0) {
630 gint body_len = boundary_start - body_start;
631 tvbuff_t *tmp_tvb = tvb_new_subset(tvb, body_start,
634 if (content_type_str) {
638 void *save_private_data = pinfo->private_data;
641 pinfo->private_data = parameters;
643 * First try the dedicated multipart dissector table
645 dissected = dissector_try_string(multipart_media_subdissector_table,
646 content_type_str, tmp_tvb, pinfo, subtree);
649 * Fall back to the default media dissector table
651 dissected = dissector_try_string(media_type_dissector_table,
652 content_type_str, tmp_tvb, pinfo, subtree);
654 pinfo->private_data = save_private_data;
655 g_free(content_type_str);
656 content_type_str = NULL;
657 parameters = NULL; /* Shares same memory as content_type_str */
659 call_dissector(data_handle, tmp_tvb, pinfo, subtree);
662 call_dissector(data_handle, tmp_tvb, pinfo, subtree);
665 if (*last_boundary == TRUE) {
666 proto_tree_add_text(tree, tvb,
667 boundary_start, boundary_line_len,
669 tvb_format_text(tvb, boundary_start,
672 proto_tree_add_text(tree, tvb,
673 boundary_start, boundary_line_len,
675 tvb_format_text(tvb, boundary_start,
679 return boundary_start + boundary_line_len;
686 * Call this method to actually dissect the multipart body.
687 * NOTE - Only do so if a boundary string has been found!
689 static void dissect_multipart(tvbuff_t *tvb, packet_info *pinfo,
692 proto_tree *subtree = NULL;
693 proto_item *ti = NULL;
694 multipart_info_t *m_info = get_multipart_info(pinfo);
695 gint header_start = 0;
699 gboolean last_boundary = FALSE;
701 if (m_info == NULL) {
703 * We can't get the required multipart information
705 proto_tree_add_text(tree, tvb, 0, -1,
706 "The multipart dissector could not find "
707 "the required boundary parameter.");
708 call_dissector(data_handle, tvb, pinfo, tree);
711 boundary = (guint8 *)m_info->boundary;
712 boundary_len = m_info->boundary_length;
713 /* Clean up the memory if an exception is thrown */
714 /* CLEANUP_PUSH(cleanup_multipart_info, m_info); */
716 /* Add stuff to the protocol tree */
718 ti = proto_tree_add_item(tree, proto_multipart,
720 subtree = proto_item_add_subtree(ti, ett_multipart);
721 proto_item_append_text(ti, ", Type: %s, Boundary: \"%s\"",
722 m_info->type, m_info->boundary);
723 proto_tree_add_string(subtree, hf_multipart_type,
724 tvb, 0, 0, pinfo->match_string);
728 * Make no entries in Protocol column and Info column on summary display,
729 * but stop sub-dissectors from clearing entered text in summary display.
731 if (check_col(pinfo->cinfo, COL_INFO))
732 col_set_fence(pinfo->cinfo, COL_INFO);
737 * Process the multipart preamble
739 header_start = process_preamble(subtree, tvb, boundary,
740 boundary_len, &last_boundary);
741 if (header_start == -1) {
742 call_dissector(data_handle, tvb, pinfo, subtree);
743 /* Clean up the dynamically allocated memory */
744 cleanup_multipart_info(m_info);
748 * Process the encapsulated bodies
750 while (last_boundary == FALSE) {
751 header_start = process_body_part(subtree, tvb, boundary, boundary_len,
752 pinfo, header_start, &last_boundary);
753 if (header_start == -1) {
754 /* Clean up the dynamically allocated memory */
755 cleanup_multipart_info(m_info);
760 * Process the multipart trailer
763 if (tvb_length_remaining(tvb, header_start) > 0) {
764 proto_tree_add_text(subtree, tvb, header_start, -1, "Trailer");
767 /* Clean up the dynamically allocated memory */
768 cleanup_multipart_info(m_info);
772 /* Returns index of method in multipart_headers */
774 is_known_multipart_header(const char *header_str, guint len)
778 for (i = 1; i < array_length(multipart_headers); i++) {
779 if (len == strlen(multipart_headers[i]) &&
780 strncasecmp(header_str, multipart_headers[i], len) == 0) {
789 * Register the protocol with Ethereal.
791 * This format is required because a script is used to build the C function
792 * that calls all the protocol registration.
796 proto_register_multipart(void)
799 /* Setup list of header fields See Section 1.6.1 for details */
800 static hf_register_info hf[] = {
801 { &hf_multipart_type,
803 "mime_multipart.type",
804 FT_STRING, BASE_NONE, NULL, 0x00,
805 "RFC 3261: MIME multipart encapsulation type", HFILL
808 { &hf_header_array[POS_CONTENT_DISPOSITION],
809 { "Content-Disposition",
810 "mime_multipart.header.content-disposition",
811 FT_STRING, BASE_NONE, NULL, 0x00,
812 "RFC 3261: Content-Disposition Header", HFILL
815 { &hf_header_array[POS_CONTENT_ENCODING],
816 { "Content-Encoding",
817 "mime_multipart.header.content-encoding",
818 FT_STRING, BASE_NONE, NULL, 0x00,
819 "RFC 3261: Content-Encoding Header", HFILL
822 { &hf_header_array[POS_CONTENT_LANGUAGE],
823 { "Content-Language",
824 "mime_multipart.header.content-language",
825 FT_STRING, BASE_NONE, NULL, 0x00,
826 "RFC 3261: Content-Language Header", HFILL
829 { &hf_header_array[POS_CONTENT_LENGTH],
831 "mime_multipart.header.content-length",
832 FT_STRING, BASE_NONE, NULL, 0x0,
833 "RFC 3261: Content-Length Header", HFILL
836 { &hf_header_array[POS_CONTENT_TYPE],
838 "mime_multipart.header.content-type",
839 FT_STRING, BASE_NONE,NULL,0x0,
840 "RFC 3261: Content-Type Header", HFILL
848 module_t *multipart_module;
851 * Setup protocol subtree array
853 static gint *ett[] = {
860 * Register the protocol name and description
862 proto_multipart = proto_register_protocol(
863 "MIME Multipart Media Encapsulation",
868 * Required function calls to register
869 * the header fields and subtrees used.
871 proto_register_field_array(proto_multipart, hf, array_length(hf));
872 proto_register_subtree_array(ett, array_length(ett));
875 * Get the content type and Internet media type table
877 media_type_dissector_table = find_dissector_table("media_type");
879 multipart_module = prefs_register_protocol(proto_multipart, NULL);
881 prefs_register_bool_preference(multipart_module,
882 "display_unknown_body_as_text",
883 "Display bodies without media type as text",
884 "Display multipart bodies with no media type dissector"
885 " as raw text (may cause problems with binary data).",
886 &display_unknown_body_as_text);
889 * Dissectors requiring different behavior in cases where the media
890 * is contained in a multipart entity should register their multipart
891 * dissector in the dissector table below, which is similar to the
892 * "media_type" dissector table defined in the HTTP dissector code.
894 multipart_media_subdissector_table = register_dissector_table(
895 "multipart_media_type",
896 "Internet media type (for multipart processing)",
897 FT_STRING, BASE_NONE);
901 /* If this dissector uses sub-dissector registration add a registration routine.
902 This format is required because a script is used to find these routines and
903 create the code that calls these routines.
906 proto_reg_handoff_multipart(void)
908 dissector_handle_t multipart_handle;
911 * When we cannot display the data, call the data dissector
913 data_handle = find_dissector("data");
916 * Handle for multipart dissection
918 multipart_handle = create_dissector_handle(
919 dissect_multipart, proto_multipart);
921 dissector_add_string("media_type",
922 "multipart/mixed", multipart_handle);
923 dissector_add_string("media_type",
924 "multipart/related", multipart_handle);
925 dissector_add_string("media_type",
926 "multipart/alternative", multipart_handle);
927 dissector_add_string("media_type",
928 "multipart/form-data", multipart_handle);