lib/util Use lib/util/ms_fnmatch.c in common for gen_fnmatch()
[abartlet/samba.git/.git] / source3 / lib / ms_fnmatch.c
index 72f61c021c391e4a29f7c44e9f5d19ab10c373c9..272355b7d203d707c2aa943eba6c85480408fb86 100644 (file)
-/* 
-   Unix SMB/Netbios implementation.
-   Version 3.0
+/*
+   Unix SMB/CIFS implementation.
    filename matching routine
-   Copyright (C) Andrew Tridgell 1992-1998 
+   Copyright (C) Andrew Tridgell 1992-2004
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
-   
+
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
 
 /*
    This module was originally based on fnmatch.c copyright by the Free
-   Software Foundation. It bears little resemblence to that code now 
-*/  
+   Software Foundation. It bears little (if any) resemblence to that
+   code now
+*/
 
 
-#if FNMATCH_TEST
-#include <stdio.h>
-#include <stdlib.h>
-#else
 #include "includes.h"
-#endif
 
+static int null_match(const smb_ucs2_t *p)
+{
+       for (;*p;p++) {
+               if (*p != UCS2_CHAR('*') &&
+                   *p != UCS2_CHAR('<') &&
+                   *p != UCS2_CHAR('"') &&
+                   *p != UCS2_CHAR('>')) return -1;
+       }
+       return 0;
+}
 
+/*
+  the max_n structure is purely for efficiency, it doesn't contribute
+  to the matching algorithm except by ensuring that the algorithm does
+  not grow exponentially
+*/
+struct max_n {
+       const smb_ucs2_t *predot;
+       const smb_ucs2_t *postdot;
+};
 
-/* 
-   bugger. we need a separate wildcard routine for older versions
-   of the protocol. This is not yet perfect, but its a lot
-   better thaan what we had */
-static int ms_fnmatch_lanman_core(const char *pattern, const char *string)
-{
-       const char *p = pattern, *n = string;
-       char c;
 
-       if (strcmp(p,"?")==0 && strcmp(n,".")==0) goto match;
+/*
+  p and n are the pattern and string being matched. The max_n array is
+  an optimisation only. The ldot pointer is NULL if the string does
+  not contain a '.', otherwise it points at the last dot in 'n'.
+*/
+static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n,
+                          struct max_n *max_n, const smb_ucs2_t *ldot,
+                          bool is_case_sensitive)
+{
+       smb_ucs2_t c;
+       int i;
 
        while ((c = *p++)) {
                switch (c) {
-               case '.':
-                       if (! *n) goto next;
-                       /* if (! *n && ! *p) goto match; */
-                       if (*n != '.') goto nomatch;
-                       n++;
-                       break;
+                       /* a '*' matches zero or more characters of any type */
+               case UCS2_CHAR('*'):
+                       if (max_n->predot && max_n->predot <= n) {
+                               return null_match(p);
+                       }
+                       for (i=0; n[i]; i++) {
+                               if (ms_fnmatch_core(p, n+i, max_n+1, ldot, is_case_sensitive) == 0) {
+                                       return 0;
+                               }
+                       }
+                       if (!max_n->predot || max_n->predot > n) max_n->predot = n;
+                       return null_match(p);
+
+                       /* a '<' matches zero or more characters of
+                          any type, but stops matching at the last
+                          '.' in the string. */
+               case UCS2_CHAR('<'):
+                       if (max_n->predot && max_n->predot <= n) {
+                               return null_match(p);
+                       }
+                       if (max_n->postdot && max_n->postdot <= n && n <= ldot) {
+                               return -1;
+                       }
+                       for (i=0; n[i]; i++) {
+                               if (ms_fnmatch_core(p, n+i, max_n+1, ldot, is_case_sensitive) == 0) return 0;
+                               if (n+i == ldot) {
+                                       if (ms_fnmatch_core(p, n+i+1, max_n+1, ldot, is_case_sensitive) == 0) return 0;
+                                       if (!max_n->postdot || max_n->postdot > n) max_n->postdot = n;
+                                       return -1;
+                               }
+                       }
+                       if (!max_n->predot || max_n->predot > n) max_n->predot = n;
+                       return null_match(p);
 
-               case '?':
-                       if (! *n) goto next;
-                       if ((*n == '.' && n[1] != '.') || ! *n) goto next;
+                       /* a '?' matches any single character */
+               case UCS2_CHAR('?'):
+                       if (! *n) {
+                               return -1;
+                       }
                        n++;
                        break;
 
-               case '>':
-                       if (! *n) goto next;
-                       if (n[0] == '.') {
-                               if (! n[1] && ms_fnmatch_lanman_core(p, n+1) == 0) goto match;
-                               if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
-                               goto nomatch;
+                       /* a '?' matches any single character */
+               case UCS2_CHAR('>'):
+                       if (n[0] == UCS2_CHAR('.')) {
+                               if (! n[1] && null_match(p) == 0) {
+                                       return 0;
+                               }
+                               break;
                        }
+                       if (! *n) return null_match(p);
                        n++;
                        break;
 
-               case '*':
-                       if (! *n) goto next;
-                       if (! *p) goto match;
-                       for (; *n; n++) {
-                               if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
+               case UCS2_CHAR('"'):
+                       if (*n == 0 && null_match(p) == 0) {
+                               return 0;
                        }
+                       if (*n != UCS2_CHAR('.')) return -1;
+                       n++;
                        break;
 
-               case '<':
-                       for (; *n; n++) {
-                               if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
-                               if (*n == '.' && !strchr(n+1,'.')) {
-                                       n++;
-                                       break;
+               default:
+                       if (c != *n) {
+                               if (is_case_sensitive) {
+                                       return -1;
+                               }
+                               if (toupper_m(c) != toupper_m(*n)) {
+                                       return -1;
                                }
                        }
-                       break;
-
-               case '"':
-                       if (*n == 0 && ms_fnmatch_lanman_core(p, n) == 0) goto match;
-                       if (*n != '.') goto nomatch;
                        n++;
                        break;
-
-               default:
-                       if (c != *n) goto nomatch;
-                       n++;
                }
        }
-       
-       if (! *n) goto match;
-       
- nomatch:
-       /*
-       if (verbose) printf("NOMATCH pattern=[%s] string=[%s]\n", pattern, string);
-       */
-       return -1;
 
-next:
-       if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
-        goto nomatch;
+       if (! *n) {
+               return 0;
+       }
 
- match:
-       /*
-       if (verbose) printf("MATCH   pattern=[%s] string=[%s]\n", pattern, string);
-       */
-       return 0;
+       return -1;
 }
 
-static int ms_fnmatch_lanman1(const char *pattern, const char *string)
+int ms_fnmatch(const char *pattern, const char *string, bool translate_pattern,
+              bool is_case_sensitive)
 {
-       if (!strpbrk(pattern, "?*<>\"")) {
-               if (strcmp(string,"..") == 0) string = ".";
-               return strcasecmp(pattern, string);
+       smb_ucs2_t *p = NULL;
+       smb_ucs2_t *s = NULL;
+       int ret, count, i;
+       struct max_n *max_n = NULL;
+       struct max_n *max_n_free = NULL;
+       struct max_n one_max_n;
+       size_t converted_size;
+
+       if (ISDOTDOT(string)) {
+               string = ".";
        }
 
-       if (strcmp(string,"..") == 0 || strcmp(string,".") == 0) {
-               return ms_fnmatch_lanman_core(pattern, "..") &&
-                       ms_fnmatch_lanman_core(pattern, ".");
+       if (strpbrk(pattern, "<>*?\"") == NULL) {
+               /* this is not just an optmisation - it is essential
+                  for LANMAN1 correctness */
+               if (is_case_sensitive) {
+                       return strcmp(pattern, string);
+               } else {
+                       return StrCaseCmp(pattern, string);
+               }
        }
 
-       return ms_fnmatch_lanman_core(pattern, string);
-}
-
-
-/* the following function was derived using the masktest utility -
-   after years of effort we finally have a perfect MS wildcard
-   matching routine! 
-
-   NOTE: this matches only filenames with no directory component
-
-   Returns 0 on match, -1 on fail.
-*/
-int ms_fnmatch(const char *pattern, const char *string)
-{
-       const char *p = pattern, *n = string;
-       char c;
-       extern int Protocol;
-
-       if (Protocol <= PROTOCOL_LANMAN2) {
-               return ms_fnmatch_lanman1(pattern, string);
+       if (!push_ucs2_talloc(talloc_tos(), &p, pattern, &converted_size)) {
+               return -1;
        }
 
-       while ((c = *p++)) {
-               switch (c) {
-               case '?':
-                       if (! *n) return -1;
-                       n++;
-                       break;
+       if (!push_ucs2_talloc(talloc_tos(), &s, string, &converted_size)) {
+               TALLOC_FREE(p);
+               return -1;
+       }
 
-               case '>':
-                       if (n[0] == '.') {
-                               if (! n[1] && ms_fnmatch(p, n+1) == 0) return 0;
-                               if (ms_fnmatch(p, n) == 0) return 0;
-                               return -1;
+       if (translate_pattern) {
+               /*
+                 for older negotiated protocols it is possible to
+                 translate the pattern to produce a "new style"
+                 pattern that exactly matches w2k behaviour
+               */
+               for (i=0;p[i];i++) {
+                       if (p[i] == UCS2_CHAR('?')) {
+                               p[i] = UCS2_CHAR('>');
+                       } else if (p[i] == UCS2_CHAR('.') &&
+                                  (p[i+1] == UCS2_CHAR('?') ||
+                                   p[i+1] == UCS2_CHAR('*') ||
+                                   p[i+1] == 0)) {
+                               p[i] = UCS2_CHAR('"');
+                       } else if (p[i] == UCS2_CHAR('*') && p[i+1] == UCS2_CHAR('.')) {
+                               p[i] = UCS2_CHAR('<');
                        }
-                       if (! *n) return ms_fnmatch(p, n);
-                       n++;
-                       break;
+               }
+       }
 
-               case '*':
-                       for (; *n; n++) {
-                               if (ms_fnmatch(p, n) == 0) return 0;
-                       }
-                       break;
+       for (count=i=0;p[i];i++) {
+               if (p[i] == UCS2_CHAR('*') || p[i] == UCS2_CHAR('<')) count++;
+       }
 
-               case '<':
-                       for (; *n; n++) {
-                               if (ms_fnmatch(p, n) == 0) return 0;
-                               if (*n == '.' && !strchr(n+1,'.')) {
-                                       n++;
-                                       break;
-                               }
+       if (count != 0) {
+               if (count == 1) {
+                       /*
+                        * We're doing this a LOT, so save the effort to allocate
+                        */
+                       ZERO_STRUCT(one_max_n);
+                       max_n = &one_max_n;
+               }
+               else {
+                       max_n = SMB_CALLOC_ARRAY(struct max_n, count);
+                       if (!max_n) {
+                               TALLOC_FREE(p);
+                               TALLOC_FREE(s);
+                               return -1;
                        }
-                       break;
-
-               case '"':
-                       if (*n == 0 && ms_fnmatch(p, n) == 0) return 0;
-                       if (*n != '.') return -1;
-                       n++;
-                       break;
-
-               default:
-                       if (c != *n) return -1;
-                       n++;
+                       max_n_free = max_n;
                }
        }
-       
-       if (! *n) return 0;
-       
-       return -1;
-}
-
-
-#if FNMATCH_TEST
-
-static int match_one(char *pattern, char *file)
-{
-       if (strcmp(file,"..") == 0) file = ".";
-       if (strcmp(pattern,".") == 0) return -1;
-
-       return ms_fnmatch(pattern, file);
-}
 
-static char *match_test(char *pattern, char *file, char *short_name)
-{
-       static char ret[4];
-       strncpy(ret, "---", 3);
+       ret = ms_fnmatch_core(p, s, max_n, strrchr_w(s, UCS2_CHAR('.')), is_case_sensitive);
 
-       if (match_one(pattern, ".") == 0) ret[0] = '+';
-       if (match_one(pattern, "..") == 0) ret[1] = '+';
-       if (match_one(pattern, file) == 0 || 
-           (*short_name && match_one(pattern, short_name)==0)) ret[2] = '+';
+       SAFE_FREE(max_n_free);
+       TALLOC_FREE(p);
+       TALLOC_FREE(s);
        return ret;
 }
-
- int main(int argc, char *argv[])
-{
-       int ret;
-       char ans[4], mask[100], file[100], mfile[100];
-       char *ans2;
-       int n, i=0;
-       char line[200];
-
-       if (argc == 3) {
-               ret = ms_fnmatch(argv[1], argv[2]);
-               if (ret == 0) 
-                       printf("YES\n");
-               else printf("NO\n");
-               return ret;
-       }
-       mfile[0] = 0;
-
-       while (fgets(line, sizeof(line)-1, stdin)) {
-               n = sscanf(line, "%3s %s %s %s\n", ans, mask, file, mfile);
-               if (n < 3) continue;
-               ans2 = match_test(mask, file, mfile);
-               if (strcmp(ans2, ans)) {
-                       printf("%s %s %d mask=[%s]  file=[%s]  mfile=[%s]\n",
-                              ans, ans2, i, mask, file, mfile);
-               }
-               i++;
-               mfile[0] = 0;
-       }
-       return 0;
-}
-#endif /* FNMATCH_TEST */
-