7554e6e7f67c3dfcb4f6b4a6e13148bff9f26597
[metze/wireshark/wip.git] / epan / dfilter / gencode.c
1 /*
2  * Wireshark - Network traffic analyzer
3  * By Gerald Combs <gerald@wireshark.org>
4  * Copyright 2001 Gerald Combs
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8
9 #include "config.h"
10
11 #include "dfilter-int.h"
12 #include "gencode.h"
13 #include "dfvm.h"
14 #include "syntax-tree.h"
15 #include "sttype-range.h"
16 #include "sttype-test.h"
17 #include "sttype-set.h"
18 #include "sttype-function.h"
19 #include "ftypes/ftypes.h"
20
21 static void
22 gencode(dfwork_t *dfw, stnode_t *st_node);
23
24 static int
25 gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp);
26
27 static void
28 dfw_append_insn(dfwork_t *dfw, dfvm_insn_t *insn)
29 {
30         insn->id = dfw->next_insn_id;
31         dfw->next_insn_id++;
32         g_ptr_array_add(dfw->insns, insn);
33 }
34
35 static void
36 dfw_append_const(dfwork_t *dfw, dfvm_insn_t *insn)
37 {
38         insn->id = dfw->next_const_id;
39         dfw->next_const_id++;
40         g_ptr_array_add(dfw->consts, insn);
41 }
42
43 /* returns register number */
44 static int
45 dfw_append_read_tree(dfwork_t *dfw, header_field_info *hfinfo)
46 {
47         dfvm_insn_t     *insn;
48         dfvm_value_t    *val1, *val2;
49         int             reg = -1;
50         gboolean        added_new_hfinfo = FALSE;
51
52         /* Rewind to find the first field of this name. */
53         while (hfinfo->same_name_prev_id != -1) {
54                 hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id);
55         }
56
57         /* Keep track of which registers
58          * were used for which hfinfo's so that we
59          * can re-use registers. */
60         reg = GPOINTER_TO_INT(
61                         g_hash_table_lookup(dfw->loaded_fields, hfinfo));
62         if (reg) {
63                 /* Reg's are stored in has as reg+1, so
64                  * that the non-existence of a hfinfo in
65                  * the hash, or 0, can be differentiated from
66                  * a hfinfo being loaded into register #0. */
67                 reg--;
68         }
69         else {
70                 reg = dfw->next_register++;
71                 g_hash_table_insert(dfw->loaded_fields,
72                         hfinfo, GINT_TO_POINTER(reg + 1));
73
74                 added_new_hfinfo = TRUE;
75         }
76
77         insn = dfvm_insn_new(READ_TREE);
78         val1 = dfvm_value_new(HFINFO);
79         val1->value.hfinfo = hfinfo;
80         val2 = dfvm_value_new(REGISTER);
81         val2->value.numeric = reg;
82
83         insn->arg1 = val1;
84         insn->arg2 = val2;
85         dfw_append_insn(dfw, insn);
86
87         if (added_new_hfinfo) {
88                 while (hfinfo) {
89                         /* Record the FIELD_ID in hash of interesting fields. */
90                         g_hash_table_insert(dfw->interesting_fields,
91                             GINT_TO_POINTER(hfinfo->id),
92                             GUINT_TO_POINTER(TRUE));
93                         hfinfo = hfinfo->same_name_next;
94                 }
95         }
96
97         return reg;
98 }
99
100 /* returns register number */
101 static int
102 dfw_append_put_fvalue(dfwork_t *dfw, fvalue_t *fv)
103 {
104         dfvm_insn_t     *insn;
105         dfvm_value_t    *val1, *val2;
106         int             reg;
107
108         insn = dfvm_insn_new(PUT_FVALUE);
109         val1 = dfvm_value_new(FVALUE);
110         val1->value.fvalue = fv;
111         val2 = dfvm_value_new(REGISTER);
112         reg = dfw->first_constant--;
113         val2->value.numeric = reg;
114         insn->arg1 = val1;
115         insn->arg2 = val2;
116         dfw_append_const(dfw, insn);
117
118         return reg;
119 }
120
121 /* returns register number */
122 static int
123 dfw_append_mk_range(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
124 {
125         int                     hf_reg, reg;
126         stnode_t                *entity;
127         dfvm_insn_t             *insn;
128         dfvm_value_t            *val;
129
130         entity = sttype_range_entity(node);
131
132         /* XXX, check if p_jmp logic is OK */
133         hf_reg = gen_entity(dfw, entity, p_jmp);
134
135         insn = dfvm_insn_new(MK_RANGE);
136
137         val = dfvm_value_new(REGISTER);
138         val->value.numeric = hf_reg;
139         insn->arg1 = val;
140
141         val = dfvm_value_new(REGISTER);
142         reg =dfw->next_register++;
143         val->value.numeric = reg;
144         insn->arg2 = val;
145
146         val = dfvm_value_new(DRANGE);
147         val->value.drange = sttype_range_drange(node);
148         insn->arg3 = val;
149
150         sttype_range_remove_drange(node);
151
152         dfw_append_insn(dfw, insn);
153
154         return reg;
155 }
156
157 /* returns register number that the functions's result will be in. */
158 static int
159 dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
160 {
161         GSList *params;
162         int i, num_params, reg;
163         dfvm_value_t **jmps;
164         dfvm_insn_t     *insn;
165         dfvm_value_t    *val1, *val2, *val;
166
167         params = sttype_function_params(node);
168         num_params = g_slist_length(params);
169
170         /* Array to hold the instructions that need to jump to
171          * an instruction if they fail. */
172         jmps = (dfvm_value_t **)g_malloc(num_params * sizeof(dfvm_value_t*));
173
174         /* Create the new DFVM instruction */
175         insn = dfvm_insn_new(CALL_FUNCTION);
176
177         val1 = dfvm_value_new(FUNCTION_DEF);
178         val1->value.funcdef = sttype_function_funcdef(node);
179         insn->arg1 = val1;
180         val2 = dfvm_value_new(REGISTER);
181         val2->value.numeric = dfw->next_register++;
182         insn->arg2 = val2;
183         insn->arg3 = NULL;
184         insn->arg4 = NULL;
185
186         i = 0;
187         while (params) {
188                 jmps[i] = NULL;
189                 reg = gen_entity(dfw, (stnode_t *)params->data, &jmps[i]);
190
191                 val = dfvm_value_new(REGISTER);
192                 val->value.numeric = reg;
193
194                 switch(i) {
195                         case 0:
196                                 insn->arg3 = val;
197                                 break;
198                         case 1:
199                                 insn->arg4 = val;
200                                 break;
201                         default:
202                                 g_assert_not_reached();
203                 }
204
205                 params = params->next;
206                 i++;
207         }
208
209         dfw_append_insn(dfw, insn);
210
211         /* If any of our parameters failed, send them to
212          * our own failure instruction. This *has* to be done
213          * after we caled dfw_append_insn above so that
214          * we know what the next DFVM insruction is, via
215          * dfw->next_insn_id */
216         for (i = 0; i < num_params; i++) {
217                 if (jmps[i]) {
218                         jmps[i]->value.numeric = dfw->next_insn_id;
219                 }
220         }
221
222         /* We need another instruction to jump to another exit
223          * place, if the call() of our function failed for some reaosn */
224         insn = dfvm_insn_new(IF_FALSE_GOTO);
225         g_assert(p_jmp);
226         *p_jmp = dfvm_value_new(INSN_NUMBER);
227         insn->arg1 = *p_jmp;
228         dfw_append_insn(dfw, insn);
229
230         g_free(jmps);
231
232         return val2->value.numeric;
233 }
234
235
236 static void
237 gen_relation(dfwork_t *dfw, dfvm_opcode_t op, stnode_t *st_arg1, stnode_t *st_arg2)
238 {
239         dfvm_insn_t     *insn;
240         dfvm_value_t    *val1, *val2;
241         dfvm_value_t    *jmp1 = NULL, *jmp2 = NULL;
242         int             reg1 = -1, reg2 = -1;
243
244     /* Create code for the LHS and RHS of the relation */
245     reg1 = gen_entity(dfw, st_arg1, &jmp1);
246     reg2 = gen_entity(dfw, st_arg2, &jmp2);
247
248     /* Then combine them in a DFVM insruction */
249         insn = dfvm_insn_new(op);
250         val1 = dfvm_value_new(REGISTER);
251         val1->value.numeric = reg1;
252         val2 = dfvm_value_new(REGISTER);
253         val2->value.numeric = reg2;
254         insn->arg1 = val1;
255         insn->arg2 = val2;
256         dfw_append_insn(dfw, insn);
257
258     /* If either of the relation argumnents need an "exit" instruction
259      * to jump to (on failure), mark them */
260         if (jmp1) {
261                 jmp1->value.numeric = dfw->next_insn_id;
262         }
263
264         if (jmp2) {
265                 jmp2->value.numeric = dfw->next_insn_id;
266         }
267 }
268
269 static void
270 fixup_jumps(gpointer data, gpointer user_data)
271 {
272         dfvm_value_t *jmp = (dfvm_value_t*)data;
273         dfwork_t *dfw = (dfwork_t*)user_data;
274
275         if (jmp) {
276                 jmp->value.numeric = dfw->next_insn_id;
277         }
278 }
279
280 /* Generate the code for the in operator.  It behaves much like an OR-ed
281  * series of == tests, but without the redundant existence checks. */
282 static void
283 gen_relation_in(dfwork_t *dfw, stnode_t *st_arg1, stnode_t *st_arg2)
284 {
285         dfvm_insn_t     *insn;
286         dfvm_value_t    *val1, *val2;
287         dfvm_value_t    *jmp1 = NULL, *jmp2 = NULL;
288         int             reg1 = -1, reg2 = -1;
289         stnode_t        *node;
290         GSList          *nodelist;
291         GSList          *jumplist = NULL;
292
293         /* Create code for the LHS of the relation */
294         reg1 = gen_entity(dfw, st_arg1, &jmp1);
295
296         /* Create code for the set on the RHS of the relation */
297         nodelist = (GSList*)stnode_data(st_arg2);
298         while (nodelist) {
299                 node = (stnode_t*)nodelist->data;
300                 reg2 = gen_entity(dfw, node, &jmp2);
301
302                 /* Add test to see if the item matches */
303                 insn = dfvm_insn_new(ANY_EQ);
304                 val1 = dfvm_value_new(REGISTER);
305                 val1->value.numeric = reg1;
306                 val2 = dfvm_value_new(REGISTER);
307                 val2->value.numeric = reg2;
308                 insn->arg1 = val1;
309                 insn->arg2 = val2;
310                 dfw_append_insn(dfw, insn);
311
312                 nodelist = g_slist_next(nodelist);
313
314                 /* Exit as soon as we find a match */
315                 if (nodelist) {
316                         insn = dfvm_insn_new(IF_TRUE_GOTO);
317                         val1 = dfvm_value_new(INSN_NUMBER);
318                         insn->arg1 = val1;
319                         dfw_append_insn(dfw, insn);
320                         jumplist = g_slist_prepend(jumplist, val1);
321                 }
322
323                 /* If an item is not present, just jump to the next item */
324                 if (jmp2) {
325                         jmp2->value.numeric = dfw->next_insn_id;
326                         jmp2 = NULL;
327                 }
328         }
329
330         /* Jump here if the LHS entity was not present */
331         if (jmp1) {
332                 jmp1->value.numeric = dfw->next_insn_id;
333         }
334         /* Jump here if any of the items in the set matched */
335         g_slist_foreach(jumplist, fixup_jumps, dfw);
336
337         /* Clean up */
338         g_slist_free(jumplist);
339         nodelist = (GSList*)stnode_data(st_arg2);
340         set_nodelist_free(nodelist);
341 }
342
343 /* Parse an entity, returning the reg that it gets put into.
344  * p_jmp will be set if it has to be set by the calling code; it should
345  * be set to the place to jump to, to return to the calling code,
346  * if the load of a field from the proto_tree fails. */
347 static int
348 gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp)
349 {
350         sttype_id_t       e_type;
351         dfvm_insn_t       *insn;
352         header_field_info *hfinfo;
353         int reg = -1;
354         e_type = stnode_type_id(st_arg);
355
356         if (e_type == STTYPE_FIELD) {
357                 hfinfo = (header_field_info*)stnode_data(st_arg);
358                 reg = dfw_append_read_tree(dfw, hfinfo);
359
360                 insn = dfvm_insn_new(IF_FALSE_GOTO);
361                 g_assert(p_jmp);
362                 *p_jmp = dfvm_value_new(INSN_NUMBER);
363                 insn->arg1 = *p_jmp;
364                 dfw_append_insn(dfw, insn);
365         }
366         else if (e_type == STTYPE_FVALUE) {
367                 reg = dfw_append_put_fvalue(dfw, (fvalue_t *)stnode_data(st_arg));
368         }
369         else if (e_type == STTYPE_RANGE) {
370                 reg = dfw_append_mk_range(dfw, st_arg, p_jmp);
371         }
372         else if (e_type == STTYPE_FUNCTION) {
373                 reg = dfw_append_function(dfw, st_arg, p_jmp);
374         }
375         else {
376                 /* printf("sttype_id is %u\n", (unsigned)e_type); */
377                 g_assert_not_reached();
378         }
379         return reg;
380 }
381
382
383 static void
384 gen_test(dfwork_t *dfw, stnode_t *st_node)
385 {
386         test_op_t       st_op;
387         stnode_t        *st_arg1, *st_arg2;
388         dfvm_value_t    *val1;
389         dfvm_insn_t     *insn;
390
391         header_field_info       *hfinfo;
392
393         sttype_test_get(st_node, &st_op, &st_arg1, &st_arg2);
394
395         switch (st_op) {
396                 case TEST_OP_UNINITIALIZED:
397                         g_assert_not_reached();
398                         break;
399
400                 case TEST_OP_EXISTS:
401                         val1 = dfvm_value_new(HFINFO);
402                         hfinfo = (header_field_info*)stnode_data(st_arg1);
403
404                         /* Rewind to find the first field of this name. */
405                         while (hfinfo->same_name_prev_id != -1) {
406                                 hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id);
407                         }
408                         val1->value.hfinfo = hfinfo;
409                         insn = dfvm_insn_new(CHECK_EXISTS);
410                         insn->arg1 = val1;
411                         dfw_append_insn(dfw, insn);
412
413                         /* Record the FIELD_ID in hash of interesting fields. */
414                         while (hfinfo) {
415                                 g_hash_table_insert(dfw->interesting_fields,
416                                         GINT_TO_POINTER(hfinfo->id),
417                                         GUINT_TO_POINTER(TRUE));
418                                 hfinfo = hfinfo->same_name_next;
419                         }
420
421                         break;
422
423                 case TEST_OP_NOT:
424                         gencode(dfw, st_arg1);
425                         insn = dfvm_insn_new(NOT);
426                         dfw_append_insn(dfw, insn);
427                         break;
428
429                 case TEST_OP_AND:
430                         gencode(dfw, st_arg1);
431
432                         insn = dfvm_insn_new(IF_FALSE_GOTO);
433                         val1 = dfvm_value_new(INSN_NUMBER);
434                         insn->arg1 = val1;
435                         dfw_append_insn(dfw, insn);
436
437                         gencode(dfw, st_arg2);
438                         val1->value.numeric = dfw->next_insn_id;
439                         break;
440
441                 case TEST_OP_OR:
442                         gencode(dfw, st_arg1);
443
444                         insn = dfvm_insn_new(IF_TRUE_GOTO);
445                         val1 = dfvm_value_new(INSN_NUMBER);
446                         insn->arg1 = val1;
447                         dfw_append_insn(dfw, insn);
448
449                         gencode(dfw, st_arg2);
450                         val1->value.numeric = dfw->next_insn_id;
451                         break;
452
453                 case TEST_OP_EQ:
454                         gen_relation(dfw, ANY_EQ, st_arg1, st_arg2);
455                         break;
456
457                 case TEST_OP_NE:
458                         gen_relation(dfw, ANY_NE, st_arg1, st_arg2);
459                         break;
460
461                 case TEST_OP_GT:
462                         gen_relation(dfw, ANY_GT, st_arg1, st_arg2);
463                         break;
464
465                 case TEST_OP_GE:
466                         gen_relation(dfw, ANY_GE, st_arg1, st_arg2);
467                         break;
468
469                 case TEST_OP_LT:
470                         gen_relation(dfw, ANY_LT, st_arg1, st_arg2);
471                         break;
472
473                 case TEST_OP_LE:
474                         gen_relation(dfw, ANY_LE, st_arg1, st_arg2);
475                         break;
476
477                 case TEST_OP_BITWISE_AND:
478                         gen_relation(dfw, ANY_BITWISE_AND, st_arg1, st_arg2);
479                         break;
480
481                 case TEST_OP_CONTAINS:
482                         gen_relation(dfw, ANY_CONTAINS, st_arg1, st_arg2);
483                         break;
484
485                 case TEST_OP_MATCHES:
486                         gen_relation(dfw, ANY_MATCHES, st_arg1, st_arg2);
487                         break;
488
489                 case TEST_OP_IN:
490                         gen_relation_in(dfw, st_arg1, st_arg2);
491                         break;
492         }
493 }
494
495 static void
496 gencode(dfwork_t *dfw, stnode_t *st_node)
497 {
498         /* const char   *name; */
499
500         /* name = */stnode_type_name(st_node);  /* XXX: is this being done just for the side-effect ? */
501
502         switch (stnode_type_id(st_node)) {
503                 case STTYPE_TEST:
504                         gen_test(dfw, st_node);
505                         break;
506                 default:
507                         g_assert_not_reached();
508         }
509 }
510
511
512 void
513 dfw_gencode(dfwork_t *dfw)
514 {
515         int             id, id1, length;
516         dfvm_insn_t     *insn, *insn1, *prev;
517         dfvm_value_t    *arg1;
518
519         dfw->insns = g_ptr_array_new();
520         dfw->consts = g_ptr_array_new();
521         dfw->loaded_fields = g_hash_table_new(g_direct_hash, g_direct_equal);
522         dfw->interesting_fields = g_hash_table_new(g_direct_hash, g_direct_equal);
523         gencode(dfw, dfw->st_root);
524         dfw_append_insn(dfw, dfvm_insn_new(RETURN));
525
526         /* fixup goto */
527         length = dfw->insns->len;
528
529         for (id = 0, prev = NULL; id < length; prev = insn, id++) {
530                 insn = (dfvm_insn_t     *)g_ptr_array_index(dfw->insns, id);
531                 arg1 = insn->arg1;
532                 if (insn->op == IF_TRUE_GOTO || insn->op == IF_FALSE_GOTO) {
533                         dfvm_opcode_t revert = (insn->op == IF_FALSE_GOTO)?IF_TRUE_GOTO:IF_FALSE_GOTO;
534                         id1 = arg1->value.numeric;
535                         do {
536                                 insn1 = (dfvm_insn_t*)g_ptr_array_index(dfw->insns, id1);
537                                 if (insn1->op == revert) {
538                                         /* this one is always false and the branch is not taken*/
539                                         id1 = id1 +1;
540                                         continue;
541                                 }
542                                 else if (insn1->op == READ_TREE && prev && prev->op == READ_TREE &&
543                                                 prev->arg2->value.numeric == insn1->arg2->value.numeric) {
544                                         /* hack if it's the same register it's the same field
545                                          * and it returns the same value
546                                          */
547                                         id1 = id1 +1;
548                                         continue;
549                                 }
550                                 else if (insn1->op != insn->op) {
551                                         /* bail out */
552                                         arg1 = insn->arg1;
553                                         arg1->value.numeric = id1;
554                                         break;
555                                 }
556                                 arg1 = insn1->arg1;
557                                 id1 = arg1->value.numeric;
558                         } while (1);
559                 }
560         }
561
562         /* move constants after registers*/
563         if (dfw->first_constant == -1) {
564                 /* NONE */
565                 dfw->first_constant = dfw->next_register;
566                 return;
567         }
568
569         id = -dfw->first_constant -1;
570         dfw->first_constant = dfw->next_register;
571         dfw->next_register += id;
572
573         length = dfw->consts->len;
574         for (id = 0; id < length; id++) {
575                 insn = (dfvm_insn_t     *)g_ptr_array_index(dfw->consts, id);
576                 if (insn->arg2 && insn->arg2->type == REGISTER && (int)insn->arg2->value.numeric < 0 )
577                         insn->arg2->value.numeric = dfw->first_constant - insn->arg2->value.numeric -1;
578         }
579
580         length = dfw->insns->len;
581         for (id = 0; id < length; id++) {
582                 insn = (dfvm_insn_t     *)g_ptr_array_index(dfw->insns, id);
583                 if (insn->arg1 && insn->arg1->type == REGISTER && (int)insn->arg1->value.numeric < 0 )
584                         insn->arg1->value.numeric = dfw->first_constant - insn->arg1->value.numeric -1;
585
586                 if (insn->arg2 && insn->arg2->type == REGISTER && (int)insn->arg2->value.numeric < 0 )
587                         insn->arg2->value.numeric = dfw->first_constant - insn->arg2->value.numeric -1;
588
589                 if (insn->arg3 && insn->arg3->type == REGISTER && (int)insn->arg3->value.numeric < 0 )
590                         insn->arg3->value.numeric = dfw->first_constant - insn->arg3->value.numeric -1;
591
592                 if (insn->arg4 && insn->arg4->type == REGISTER && (int)insn->arg4->value.numeric < 0 )
593                         insn->arg4->value.numeric = dfw->first_constant - insn->arg4->value.numeric -1;
594         }
595
596
597 }
598
599
600
601 typedef struct {
602         int i;
603         int *fields;
604 } hash_key_iterator;
605
606 static void
607 get_hash_key(gpointer key, gpointer value _U_, gpointer user_data)
608 {
609         int field_id = GPOINTER_TO_INT(key);
610         hash_key_iterator *hki = (hash_key_iterator *)user_data;
611
612         hki->fields[hki->i] = field_id;
613         hki->i++;
614 }
615
616 int*
617 dfw_interesting_fields(dfwork_t *dfw, int *caller_num_fields)
618 {
619         int num_fields = g_hash_table_size(dfw->interesting_fields);
620
621         hash_key_iterator hki;
622
623         if (num_fields == 0) {
624                 *caller_num_fields = 0;
625                 return NULL;
626         }
627
628         hki.fields = g_new(int, num_fields);
629         hki.i = 0;
630
631         g_hash_table_foreach(dfw->interesting_fields, get_hash_key, &hki);
632         *caller_num_fields = num_fields;
633         return hki.fields;
634 }
635
636 /*
637  * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
638  *
639  * Local variables:
640  * c-basic-offset: 8
641  * tab-width: 8
642  * indent-tabs-mode: t
643  * End:
644  *
645  * vi: set shiftwidth=8 tabstop=8 noexpandtab:
646  * :indentSize=8:tabSize=8:noTabs=false:
647  */