1 /**-*-C-*-**********************************************************************
5 * Utility to convert an ASCII hexdump into a libpcap-format capture file
7 * (c) Copyright 2001 Ashok Narayanan <ashokn@cisco.com>
9 * $Id: text2pcap.c,v 1.7 2001/11/24 08:14:10 guy Exp $
11 * Ethereal - Network traffic analyzer
12 * By Gerald Combs <gerald@ethereal.com>
13 * Copyright 1998 Gerald Combs
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 *******************************************************************************/
31 /*******************************************************************************
33 * This utility reads in an ASCII hexdump of this common format:
35 * 00000000 00 E0 1E A7 05 6F 00 10 5A A0 B9 12 08 00 46 00 .....o..Z.....F.
36 * 00000010 03 68 00 00 00 00 0A 2E EE 33 0F 19 08 7F 0F 19 .h.......3...
\7f..
37 * 00000020 03 80 94 04 00 00 10 01 16 A2 0A 00 03 50 00 0C .............P..
38 * 00000030 01 01 0F 19 03 80 11 01 1E 61 00 0C 03 01 0F 19 .........a......
40 * Each bytestring line consists of an offset, one or more bytes, and
41 * text at the end. An offset is defined as a hex string of more than
42 * two characters. A byte is defined as a hex string of exactly two
43 * characters. The text at the end is ignored, as is any text before
44 * the offset. Bytes read from a bytestring line are added to the
45 * current packet only if all the following conditions are satisfied:
47 * - No text appears between the offset and the bytes (any bytes appearing after
48 * such text would be ignored)
50 * - The offset must be arithmetically correct, i.e. if the offset is 00000020, then
51 * exactly 32 bytes must have been read into this packet before this. If the offset
52 * is wrong, the packet is immediately terminated
54 * A packet start is signalled by a zero offset.
56 * Lines starting with #TEXT2PCAP are directives. These allow the user
57 * to embed instructions into the capture file which allows text2pcap
58 * to take some actions (e.g. specifying the encapsulation
59 * etc.). Currently no directives are implemented.
61 * Lines beginning with # which are not directives are ignored as
62 * comments. Currently all non-hexdump text is ignored by text2pcap;
63 * in the future, text processing may be added, but lines prefixed
64 * with '#' will still be ignored.
66 * The output is a libpcap packet containing Ethernet frames by
67 * default. This program takes options which allow the user to add
68 * dummy Ethernet, IP and UDP headers to the packets in order to allow
69 * dumps of L3 or higher protocols to be decoded.
71 * Considerable flexibility is built into this code to read hexdumps
72 * of slightly different formats. For example, any text prefixing the
73 * hexdump line is dropped (including mail forwarding '>'). The offset
74 * can be any hex number of four digits or greater.
76 * This converter cannot read a single packet greater than 64K. Packet
77 * snaplength is automatically set to 64K.
89 #include <sys/types.h>
93 #ifdef HAVE_NETINET_IN_H
94 # include <netinet/in.h>
106 #ifdef NEED_STRPTIME_H
107 # include "strptime.h"
118 #include "text2pcap.h"
120 /*--- Options --------------------------------------------------------------------*/
127 /* Dummy Ethernet header */
128 int hdr_ethernet = FALSE;
129 unsigned long hdr_ethernet_proto = 0;
131 /* Dummy IP header */
133 unsigned long hdr_ip_proto = 0;
135 /* Dummy UDP header */
137 unsigned long hdr_udp_dest = 0;
138 unsigned long hdr_udp_src = 0;
140 /*--- Local date -----------------------------------------------------------------*/
142 /* This is where we store the packet currently being built */
143 #define MAX_PACKET 64000
144 unsigned char packet_buf[MAX_PACKET];
145 unsigned long curr_offset = 0;
147 /* This buffer contains strings present before the packet offset 0 */
148 #define PACKET_PREAMBLE_MAX_LEN 2048
149 static unsigned char packet_preamble[PACKET_PREAMBLE_MAX_LEN+1];
150 static int packet_preamble_len = 0;
152 /* Number of packets read and written */
153 unsigned long num_packets_read = 0;
154 unsigned long num_packets_written = 0;
156 /* Time code of packet, derived from packet_preamble */
157 static unsigned long ts_sec = 0;
158 static unsigned long ts_usec = 0;
159 static char *ts_fmt = NULL;
162 char *input_filename;
163 FILE *input_file = NULL;
165 char *output_filename;
166 FILE *output_file = NULL;
168 /* Offset base to parse */
169 unsigned long offset_base = 16;
173 /* ----- State machine -----------------------------------------------------------*/
175 /* Current state of parser */
177 INIT, /* Waiting for start of new packet */
178 START_OF_LINE, /* Starting from beginning of line */
179 READ_OFFSET, /* Just read the offset */
180 READ_BYTE, /* Just read a byte */
181 READ_TEXT, /* Just read text - ignore until EOL */
183 parser_state_t state = INIT;
185 const char *state_str[] = {"Init",
192 const char *token_str[] = {"",
200 /* ----- Skeleton Packet Headers --------------------------------------------------*/
203 unsigned char src_addr[6];
204 unsigned char dest_addr[6];
205 unsigned short l3pid;
208 hdr_ethernet_t HDR_ETHERNET = {
209 {0x01, 0x01, 0x01, 0x01, 0x01, 0x01},
210 {0x02, 0x02, 0x02, 0x02, 0x02, 0x02},
214 unsigned char ver_hdrlen;
216 unsigned short packet_length;
217 unsigned short identification;
219 unsigned char fragment;
221 unsigned char protocol;
222 unsigned short hdr_checksum;
223 unsigned long src_addr;
224 unsigned long dest_addr;
227 hdr_ip_t HDR_IP = {0x45, 0, 0, 0x3412, 0, 0, 0xff, 0, 0, 0x01010101, 0x02020202};
230 unsigned short source_port;
231 unsigned short dest_port;
232 unsigned short length;
233 unsigned short checksum;
236 hdr_udp_t HDR_UDP = {0, 0, 0, 0};
240 /*----------------------------------------------------------------------
241 * Stuff for writing a PCap file
243 #define PCAP_MAGIC 0xa1b2c3d4
245 /* "libpcap" file header (minus magic number). */
247 unsigned long magic; /* magic */
248 unsigned short version_major; /* major version number */
249 unsigned short version_minor; /* minor version number */
250 unsigned long thiszone; /* GMT to local correction */
251 unsigned long sigfigs; /* accuracy of timestamps */
252 unsigned long snaplen; /* max length of captured packets, in octets */
253 unsigned long network; /* data link type */
256 /* "libpcap" record header. */
258 unsigned long ts_sec; /* timestamp seconds */
259 unsigned long ts_usec; /* timestamp microseconds */
260 unsigned long incl_len; /* number of octets of packet saved in file */
261 unsigned long orig_len; /* actual length of packet */
264 /* Link-layer type; see net/bpf.h for details */
265 unsigned long pcap_link_type = 1; /* Default is DLT-EN10MB */
267 /*----------------------------------------------------------------------
268 * Parse a single hex number
269 * Will abort the program if it can't parse the number
270 * Pass in TRUE if this is an offset, FALSE if not
273 parse_num (char *str, int offset)
278 num = strtoul(str, &c, offset ? offset_base : 16);
280 fprintf(stderr, "FATAL ERROR: Bad hex number? [%s]\n", str);
286 /*----------------------------------------------------------------------
287 * Write this byte into current packet
290 write_byte (char *str)
294 num = parse_num(str, FALSE);
295 packet_buf[curr_offset] = num;
299 /*----------------------------------------------------------------------
300 * Remove bytes from the current packet
303 unwrite_bytes (unsigned long nbytes)
305 curr_offset -= nbytes;
308 /*----------------------------------------------------------------------
309 * Compute one's complement checksum (from RFC1071)
311 static unsigned short
312 in_checksum (void *buf, unsigned long count)
314 unsigned long sum = 0;
315 unsigned short *addr = buf;
318 /* This is the inner loop */
319 sum += ntohs(* (unsigned short *) addr++);
323 /* Add left-over byte, if any */
325 sum += * (unsigned char *) addr;
327 /* Fold 32-bit sum to 16 bits */
329 sum = (sum & 0xffff) + (sum >> 16);
334 /*----------------------------------------------------------------------
335 * Write current packet out
338 write_current_packet (void)
343 int eth_trailer_length = 0;
344 struct pcaprec_hdr ph;
346 if (curr_offset > 0) {
347 /* Write the packet */
349 /* Compute packet length */
350 length = curr_offset;
351 if (hdr_udp) { length += sizeof(HDR_UDP); udp_length = length; }
352 if (hdr_ip) { length += sizeof(HDR_IP); ip_length = length; }
354 length += sizeof(HDR_ETHERNET);
357 eth_trailer_length = 60 - length;
362 /* Write PCap header */
364 ph.ts_usec = ts_usec;
365 ph.incl_len = length;
366 ph.orig_len = length;
367 fwrite(&ph, sizeof(ph), 1, output_file);
369 /* Write Ethernet header */
371 HDR_ETHERNET.l3pid = htons(hdr_ethernet_proto);
372 fwrite(&HDR_ETHERNET, sizeof(HDR_ETHERNET), 1, output_file);
375 /* Write IP header */
377 HDR_IP.packet_length = htons(ip_length);
378 HDR_IP.protocol = hdr_ip_proto;
379 HDR_IP.hdr_checksum = 0;
380 HDR_IP.hdr_checksum = in_checksum(&HDR_IP, sizeof(HDR_IP));
381 fwrite(&HDR_IP, sizeof(HDR_IP), 1, output_file);
384 /* Write UDP header */
386 HDR_UDP.source_port = htons(hdr_udp_src);
387 HDR_UDP.dest_port = htons(hdr_udp_dest);
388 HDR_UDP.length = htons(udp_length);
390 fwrite(&HDR_UDP, sizeof(HDR_UDP), 1, output_file);
394 fwrite(packet_buf, curr_offset, 1, output_file);
396 /* Write Ethernet trailer */
397 if (hdr_ethernet && eth_trailer_length > 0) {
398 memset(tempbuf, 0, eth_trailer_length);
399 fwrite(tempbuf, eth_trailer_length, 1, output_file);
403 fprintf(stderr, "Wrote packet of %lu bytes\n", curr_offset);
404 num_packets_written ++;
409 /*----------------------------------------------------------------------
410 * Write the PCap file header
413 write_file_header (void)
417 fh.magic = PCAP_MAGIC;
418 fh.version_major = 2;
419 fh.version_minor = 4;
423 fh.network = pcap_link_type;
425 fwrite(&fh, sizeof(fh), 1, output_file);
428 /*----------------------------------------------------------------------
429 * Append a token to the packet preamble.
432 append_to_preamble(char *str)
436 if (packet_preamble_len != 0) {
437 if (packet_preamble_len == PACKET_PREAMBLE_MAX_LEN)
438 return; /* no room to add more preamble */
439 /* Add a blank separator between the previous token and this token. */
440 packet_preamble[packet_preamble_len++] = ' ';
442 toklen = strlen(str);
444 if (packet_preamble_len + toklen > PACKET_PREAMBLE_MAX_LEN)
445 return; /* no room to add the token to the preamble */
446 strcpy(&packet_preamble[packet_preamble_len], str);
447 packet_preamble_len += toklen;
451 /*----------------------------------------------------------------------
452 * Parse the preamble to get the timecode.
455 parse_preamble (void)
464 * If no "-t" flag was specified, don't attempt to parse a packet
465 * preamble to extract a time stamp.
474 * Null-terminate the preamble.
476 packet_preamble[packet_preamble_len] = '\0';
478 /* Ensure preamble has more than two chars before atempting to parse.
479 * This should cover line breaks etc that get counted.
481 if ( strlen(packet_preamble) > 2 ) {
482 memset(&timecode, '\0', sizeof timecode);
484 /* Get Time leaving subseconds */
485 subsecs = strptime( packet_preamble, ts_fmt, &timecode );
486 if (subsecs != NULL) {
487 /* Get the long time from the tm structure */
488 ts_sec = (unsigned long)mktime( &timecode );
490 ts_sec = -1; /* we failed to parse it */
492 /* This will ensure incorrectly parsed dates get set to zero */
493 if ( -1L == (long)ts_sec )
500 /* Parse subseconds */
501 ts_usec = strtol(subsecs, &p, 10);
507 * Convert that number to a number
508 * of microseconds; if it's N digits
509 * long, it's in units of 10^(-N) seconds,
510 * so, to convert it to units of
511 * 10^-6 seconds, we multiply by
514 subseclen = p - subsecs;
517 * *More* than 6 digits; 6-N is
518 * negative, so we divide by
521 for (i = subseclen - 6; i != 0; i--)
523 } else if (subseclen < 6) {
524 for (i = 6 - subseclen; i != 0; i--)
532 /*printf("Format(%s), time(%u), subsecs(%u)\n\n", ts_fmt, ts_sec, ts_usec);*/
535 packet_preamble_len = 0;
538 /*----------------------------------------------------------------------
542 start_new_packet (void)
545 fprintf(stderr, "Start new packet\n");
547 /* Write out the current packet, if required */
548 write_current_packet();
552 /* Ensure we parse the packet preamble as it may contain the time */
556 /*----------------------------------------------------------------------
557 * Process a directive
560 process_directive (char *str)
562 fprintf(stderr, "\n--- Directive [%s] currently unsupported ---\n", str+10);
566 /*----------------------------------------------------------------------
567 * Parse a single token (called from the scanner)
570 parse_token (token_t token, char *str)
575 * This is implemented as a simple state machine of five states.
576 * State transitions are caused by tokens being received from the
577 * scanner. The code should be self_documenting.
581 /* Sanitize - remove all '\r' */
583 if (str!=NULL) { while ((c = strchr(str, '\r')) != NULL) *c=' '; }
585 fprintf(stderr, "(%s, %s \"%s\") -> (",
586 state_str[state], token_str[token], str ? str : "");
591 /* ----- Waiting for new packet -------------------------------------------*/
595 append_to_preamble(str);
598 process_directive(str);
601 num = parse_num(str, TRUE);
603 /* New packet starts here */
613 /* ----- Processing packet, start of new line -----------------------------*/
617 append_to_preamble(str);
620 process_directive(str);
623 num = parse_num(str, TRUE);
625 /* New packet starts here */
628 } else if (num != curr_offset) {
630 * The offset we read isn't the one we expected.
631 * This may only mean that we mistakenly interpreted
632 * some text as byte values (e.g., if the text dump
633 * of packet data included a number with spaces around
634 * it). If the offset is less than what we expected,
635 * assume that's the problem, and throw away the putative
638 if (num < curr_offset) {
639 unwrite_bytes(curr_offset - num);
642 /* Bad offset; switch to INIT state */
644 fprintf(stderr, "Inconsistent offset. Expecting %0lX, got %0lX. Ignoring rest of packet\n",
646 write_current_packet();
657 /* ----- Processing packet, read offset -----------------------------------*/
661 /* Record the byte */
671 state = START_OF_LINE;
678 /* ----- Processing packet, read byte -------------------------------------*/
682 /* Record the byte */
691 state = START_OF_LINE;
698 /* ----- Processing packet, read text -------------------------------------*/
702 state = START_OF_LINE;
710 fprintf(stderr, "FATAL ERROR: Bad state (%d)", state);
715 fprintf(stderr, ", %s)\n", state_str[state]);
719 /*----------------------------------------------------------------------
720 * Print helpstring and exit
723 help (char *progname)
727 "Usage: %s [-d] [-q] [-o h|o] [-l typenum] [-e l3pid] [-i proto] \n"
728 " [-u srcp destp] [-t timefmt] <input-filename> <output-filename>\n"
730 "where <input-filename> specifies input filename (use - for standard input)\n"
731 " <output-filename> specifies output filename (use - for standard output)\n"
733 "[options] are one or more of the following \n"
735 " -w filename : Write capfile to <filename>. Default is standard output\n"
736 " -h : Display this help message \n"
737 " -d : Generate detailed debug of parser states \n"
738 " -o hex|oct : Parse offsets as (h)ex or (o)ctal. Default is hex\n"
739 " -l typenum : Specify link-layer type number. Default is 1 (Ethernet). \n"
740 " See net/bpf.h for list of numbers.\n"
741 " -q : Generate no output at all (automatically turns off -d)\n"
742 " -e l3pid : Prepend dummy Ethernet II header with specified L3PID (in HEX)\n"
743 " Example: -e 0x800\n"
744 " -i proto : Prepend dummy IP header with specified IP protocol (in DECIMAL). \n"
745 " Automatically prepends Ethernet header as well. Example: -i 46\n"
746 " -u srcp destp: Prepend dummy UDP header with specified dest and source ports (in DECIMAL).\n"
747 " Automatically prepends Ethernet and IP headers as well\n"
748 " Example: -u 30 40\n"
749 " -t timefmt : Treats the text before the packet as a time code parsed by strptime format patterns.\n"
750 " Example: The time \"10:15:14.5476\" has the format code \"%%H:%%M:%%S.\"\n"
751 " NOTE: The subsecond component delimiter must be specified (.) but no\n"
752 " pattern is required; the remaining number is assumed to be subseconds."
759 /*----------------------------------------------------------------------
763 parse_options (int argc, char *argv[])
768 /* Scan CLI parameters */
769 while ((c = getopt(argc, argv, "dqr:w:e:i:l:o:u:t:")) != -1) {
771 case '?': help(argv[0]); break;
772 case 'h': help(argv[0]); break;
773 case 'd': if (!quiet) debug++; break;
774 case 'q': quiet = TRUE; debug = FALSE; break;
775 case 'l': pcap_link_type = atoi(optarg); break;
777 if (optarg[0]!='h' && optarg[0] != 'o') {
778 fprintf(stderr, "Bad argument for '-e': %s\n", optarg);
781 offset_base = (optarg[0]=='o') ? 8 : 16;
785 if (sscanf(optarg, "%lx", &hdr_ethernet_proto) < 1) {
786 fprintf(stderr, "Bad argument for '-e': %s\n", optarg);
793 if (sscanf(optarg, "%ld", &hdr_ip_proto) < 1) {
794 fprintf(stderr, "Bad argument for '-i': %s\n", optarg);
798 hdr_ethernet_proto = 0x800;
807 hdr_udp_src = strtol(optarg, &p, 10);
808 if (p == optarg || (*p != ',' && *p != '\0')) {
809 fprintf(stderr, "Bad src port for '-u'\n");
813 fprintf(stderr, "No dest port specified for '-u'\n");
818 hdr_udp_dest = strtol(optarg, &p, 10);
819 if (p == optarg || *p != '\0') {
820 fprintf(stderr, "Bad dest port for '-u'\n");
826 hdr_ethernet_proto = 0x800;
834 if (optind >= argc || argc-optind < 2) {
835 fprintf(stderr, "Must specify input and output filename\n");
839 if (strcmp(argv[optind], "-")) {
840 input_filename = strdup(argv[optind]);
841 input_file = fopen(input_filename, "rb");
843 fprintf(stderr, "Cannot open file [%s] for reading: %s\n",
844 input_filename, strerror(errno));
848 input_filename = "Standard input";
852 if (strcmp(argv[optind+1], "-")) {
853 output_filename = strdup(argv[optind+1]);
854 output_file = fopen(output_filename, "wb");
856 fprintf(stderr, "Cannot open file [%s] for writing: %s\n",
857 output_filename, strerror(errno));
861 output_filename = "Standard output";
862 output_file = stdout;
865 /* Some validation */
866 if (pcap_link_type != 1 && hdr_ethernet) {
867 fprintf(stderr, "Dummy headers (-e, -i, -u) cannot be specified with link type override (-l)\n");
871 /* Set up our variables */
874 input_filename = "Standard input";
877 output_file = stdout;
878 output_filename = "Standard output";
881 /* Display summary of our state */
883 fprintf(stderr, "Input from: %s\n", input_filename);
884 fprintf(stderr, "Output to: %s\n", output_filename);
886 if (hdr_ethernet) fprintf(stderr, "Generate dummy Ethernet header: Protocol: 0x%0lX\n",
888 if (hdr_ip) fprintf(stderr, "Generate dummy IP header: Protocol: %ld\n",
890 if (hdr_udp) fprintf(stderr, "Generate dummy UDP header: Source port: %ld. Dest port: %ld\n",
891 hdr_udp_src, hdr_udp_dest);
895 int main(int argc, char *argv[])
897 parse_options(argc, argv);
899 assert(input_file != NULL);
900 assert(output_file != NULL);
906 write_current_packet();
908 fprintf(stderr, "\n-------------------------\n");
910 fprintf(stderr, "Read %ld potential packets, wrote %ld packets\n",
911 num_packets_read, num_packets_written);