r23784: use the GPLv3 boilerplate as recommended by the FSF and the license text
[samba.git] / source3 / lib / ms_fnmatch.c
index 39b3e0013c5e09e7aedb9af6b80041a63bc8d8c9..bdfaca143cfd3a6e293b3142d479f0d04a93a020 100644 (file)
@@ -1,12 +1,11 @@
 /* 
-   Unix SMB/Netbios implementation.
-   Version 3.0
+   Unix SMB/CIFS implementation.
    filename matching routine
-   Copyright (C) Andrew Tridgell 1992-1998 
+   Copyright (C) Andrew Tridgell 1992-2004
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  
+*/
 
 /*
    This module was originally based on fnmatch.c copyright by the Free
-   Software Foundation. It bears little resemblence to that code now 
+   Software Foundation. It bears little (if any) resemblence to that
+   code now
 */  
 
 
-#if FNMATCH_TEST
-#include <stdio.h>
-#include <stdlib.h>
-#else
 #include "includes.h"
-#endif
 
-/* 
-   bugger. we need a separate wildcard routine for older versions
-   of the protocol. This is not yet perfect, but its a lot
-   better than what we had */
-static int ms_fnmatch_lanman_core(const smb_ucs2_t *pattern, 
-                                 const smb_ucs2_t *string)
+static int null_match(const smb_ucs2_t *p)
 {
-       const smb_ucs2_t *p = pattern, *n = string;
-       smb_ucs2_t c;
+       for (;*p;p++) {
+               if (*p != UCS2_CHAR('*') &&
+                   *p != UCS2_CHAR('<') &&
+                   *p != UCS2_CHAR('"') &&
+                   *p != UCS2_CHAR('>')) return -1;
+       }
+       return 0;
+}
 
-       if (strcmp_wa(p, "?")==0 && strcmp_wa(n, ".")) goto match;
+/*
+  the max_n structure is purely for efficiency, it doesn't contribute
+  to the matching algorithm except by ensuring that the algorithm does
+  not grow exponentially
+*/
+struct max_n {
+       const smb_ucs2_t *predot;
+       const smb_ucs2_t *postdot;
+};
+
+
+/*
+  p and n are the pattern and string being matched. The max_n array is
+  an optimisation only. The ldot pointer is NULL if the string does
+  not contain a '.', otherwise it points at the last dot in 'n'.
+*/
+static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n, 
+                          struct max_n *max_n, const smb_ucs2_t *ldot,
+                          BOOL is_case_sensitive)
+{
+       smb_ucs2_t c;
+       int i;
 
        while ((c = *p++)) {
                switch (c) {
-               case UCS2_CHAR('.'):
-                       if (! *n) goto next;
-                       if (*n != UCS2_CHAR('.')) goto nomatch;
-                       n++;
-                       break;
+                       /* a '*' matches zero or more characters of any type */
+               case UCS2_CHAR('*'):
+                       if (max_n->predot && max_n->predot <= n) {
+                               return null_match(p);
+                       }
+                       for (i=0; n[i]; i++) {
+                               if (ms_fnmatch_core(p, n+i, max_n+1, ldot, is_case_sensitive) == 0) {
+                                       return 0;
+                               }
+                       }
+                       if (!max_n->predot || max_n->predot > n) max_n->predot = n;
+                       return null_match(p);
+
+                       /* a '<' matches zero or more characters of
+                          any type, but stops matching at the last
+                          '.' in the string. */
+               case UCS2_CHAR('<'):
+                       if (max_n->predot && max_n->predot <= n) {
+                               return null_match(p);
+                       }
+                       if (max_n->postdot && max_n->postdot <= n && n <= ldot) {
+                               return -1;
+                       }
+                       for (i=0; n[i]; i++) {
+                               if (ms_fnmatch_core(p, n+i, max_n+1, ldot, is_case_sensitive) == 0) return 0;
+                               if (n+i == ldot) {
+                                       if (ms_fnmatch_core(p, n+i+1, max_n+1, ldot, is_case_sensitive) == 0) return 0;
+                                       if (!max_n->postdot || max_n->postdot > n) max_n->postdot = n;
+                                       return -1;
+                               }
+                       }
+                       if (!max_n->predot || max_n->predot > n) max_n->predot = n;
+                       return null_match(p);
 
+                       /* a '?' matches any single character */
                case UCS2_CHAR('?'):
-                       if (! *n) goto next;
-                       if ((*n == UCS2_CHAR('.') && 
-                            n[1] != UCS2_CHAR('.')) || ! *n) 
-                               goto next;
+                       if (! *n) {
+                               return -1;
+                       }
                        n++;
                        break;
 
+                       /* a '?' matches any single character */
                case UCS2_CHAR('>'):
-                       if (! *n) goto next;
                        if (n[0] == UCS2_CHAR('.')) {
-                               if (! n[1] && ms_fnmatch_lanman_core(p, n+1) == 0) goto match;
-                               if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
-                               goto nomatch;
+                               if (! n[1] && null_match(p) == 0) {
+                                       return 0;
+                               }
+                               break;
                        }
+                       if (! *n) return null_match(p);
                        n++;
                        break;
 
-               case UCS2_CHAR('*'):
-                       if (! *n) goto next;
-                       if (! *p) goto match;
-                       for (; *n; n++) {
-                               if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
+               case UCS2_CHAR('"'):
+                       if (*n == 0 && null_match(p) == 0) {
+                               return 0;
                        }
+                       if (*n != UCS2_CHAR('.')) return -1;
+                       n++;
                        break;
 
-               case UCS2_CHAR('<'):
-                       for (; *n; n++) {
-                               if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
-                               if (*n == UCS2_CHAR('.') && 
-                                   !strchr_w(n+1,UCS2_CHAR('.'))) {
-                                       n++;
-                                       break;
+               default:
+                       if (c != *n) {
+                               if (is_case_sensitive) {
+                                       return -1;
+                               }
+                               if (toupper_w(c) != toupper_w(*n)) {
+                                       return -1;
                                }
                        }
-                       break;
-
-               case UCS2_CHAR('"'):
-                       if (*n == 0 && ms_fnmatch_lanman_core(p, n) == 0) goto match;
-                       if (*n != UCS2_CHAR('.')) goto nomatch;
                        n++;
                        break;
-
-               default:
-                       if (c != *n) goto nomatch;
-                       n++;
                }
        }
        
-       if (! *n) goto match;
+       if (! *n) {
+               return 0;
+       }
        
- nomatch:
-       /*
-       if (verbose) printf("NOMATCH pattern=[%s] string=[%s]\n", pattern, string);
-       */
        return -1;
-
-next:
-       if (ms_fnmatch_lanman_core(p, n) == 0) goto match;
-        goto nomatch;
-
- match:
-       /*
-       if (verbose) printf("MATCH   pattern=[%s] string=[%s]\n", pattern, string);
-       */
-       return 0;
 }
 
-static int ms_fnmatch_lanman1(const smb_ucs2_t *pattern, const smb_ucs2_t *string)
+int ms_fnmatch(const char *pattern, const char *string, BOOL translate_pattern,
+              BOOL is_case_sensitive)
 {
-       if (!strpbrk_wa(pattern, "?*<>\"")) {
-               smb_ucs2_t s[] = {UCS2_CHAR('.'), 0};
-               if (strcmp_wa(string,"..") == 0) string = s;
-               return strcasecmp_w(pattern, string);
-       }
+       wpstring p, s;
+       int ret, count, i;
+       struct max_n *max_n = NULL;
 
-       if (strcmp_wa(string,"..") == 0 || strcmp_wa(string,".") == 0) {
-               smb_ucs2_t dot[] = {UCS2_CHAR('.'), 0};
-               smb_ucs2_t dotdot[] = {UCS2_CHAR('.'), UCS2_CHAR('.'), 0};
-               return ms_fnmatch_lanman_core(pattern, dotdot) &&
-                       ms_fnmatch_lanman_core(pattern, dot);
+       if (strcmp(string, "..") == 0) {
+               string = ".";
        }
 
-       return ms_fnmatch_lanman_core(pattern, string);
-}
-
-
-/* the following function was derived using the masktest utility -
-   after years of effort we finally have a perfect MS wildcard
-   matching routine! 
-
-   NOTE: this matches only filenames with no directory component
-
-   Returns 0 on match, -1 on fail.
-*/
-static int ms_fnmatch_w(const smb_ucs2_t *pattern, const smb_ucs2_t *string)
-{
-       const smb_ucs2_t *p = pattern, *n = string;
-       smb_ucs2_t c;
-       extern int Protocol;
+       if (strpbrk(pattern, "<>*?\"") == NULL) {
+               /* this is not just an optmisation - it is essential
+                  for LANMAN1 correctness */
+               if (is_case_sensitive) {
+                       return strcmp(pattern, string);
+               } else {
+                       return StrCaseCmp(pattern, string);
+               }
+       }
 
-       if (Protocol <= PROTOCOL_LANMAN2) {
-               return ms_fnmatch_lanman1(pattern, string);
+       if (push_ucs2(NULL, p, pattern, sizeof(p), STR_TERMINATE) == (size_t)-1) {
+               /* Not quite the right answer, but finding the right one
+                 under this failure case is expensive, and it's pretty close */
+               return -1;
        }
 
-       while ((c = *p++)) {
-               switch (c) {
-               case UCS2_CHAR('?'):
-                       if (! *n) return -1;
-                       n++;
-                       break;
+       if (push_ucs2(NULL, s, string, sizeof(s), STR_TERMINATE) == (size_t)-1) {
+               /* Not quite the right answer, but finding the right one
+                  under this failure case is expensive, and it's pretty close */
+               return -1;
+       }
 
-               case UCS2_CHAR('>'):
-                       if (n[0] == UCS2_CHAR('.')) {
-                               if (! n[1] && ms_fnmatch_w(p, n+1) == 0) return 0;
-                               if (ms_fnmatch_w(p, n) == 0) return 0;
-                               return -1;
+       if (translate_pattern) {
+               /*
+                 for older negotiated protocols it is possible to
+                 translate the pattern to produce a "new style"
+                 pattern that exactly matches w2k behaviour
+               */
+               for (i=0;p[i];i++) {
+                       if (p[i] == UCS2_CHAR('?')) {
+                               p[i] = UCS2_CHAR('>');
+                       } else if (p[i] == UCS2_CHAR('.') && 
+                                  (p[i+1] == UCS2_CHAR('?') || 
+                                   p[i+1] == UCS2_CHAR('*') ||
+                                   p[i+1] == 0)) {
+                               p[i] = UCS2_CHAR('"');
+                       } else if (p[i] == UCS2_CHAR('*') && p[i+1] == UCS2_CHAR('.')) {
+                               p[i] = UCS2_CHAR('<');
                        }
-                       if (! *n) return ms_fnmatch_w(p, n);
-                       n++;
-                       break;
+               }
+       }
 
-               case UCS2_CHAR('*'):
-                       for (; *n; n++) {
-                               if (ms_fnmatch_w(p, n) == 0) return 0;
-                       }
-                       break;
+       for (count=i=0;p[i];i++) {
+               if (p[i] == UCS2_CHAR('*') || p[i] == UCS2_CHAR('<')) count++;
+       }
 
-               case UCS2_CHAR('<'):
-                       for (; *n; n++) {
-                               if (ms_fnmatch_w(p, n) == 0) return 0;
-                               if (*n == UCS2_CHAR('.') && !strchr_wa(n+1,'.')) {
-                                       n++;
-                                       break;
-                               }
-                       }
-                       break;
+       if (count != 0) {
+               max_n = SMB_CALLOC_ARRAY(struct max_n, count);
+               if (!max_n) {
+                       return -1;
+               }
+       }
 
-               case UCS2_CHAR('"'):
-                       if (*n == 0 && ms_fnmatch_w(p, n) == 0) return 0;
-                       if (*n != UCS2_CHAR('.')) return -1;
-                       n++;
-                       break;
+       ret = ms_fnmatch_core(p, s, max_n, strrchr_w(s, UCS2_CHAR('.')), is_case_sensitive);
 
-               default:
-                       if (c != *n) return -1;
-                       n++;
-               }
+       if (max_n) {
+               free(max_n);
        }
-       
-       if (! *n) return 0;
-       
-       return -1;
+
+       return ret;
 }
 
 
-int ms_fnmatch(const char *pattern, const char *string)
+/* a generic fnmatch function - uses for non-CIFS pattern matching */
+int gen_fnmatch(const char *pattern, const char *string)
 {
-       wpstring p, s;
-
-       pstrcpy_wa(p, pattern);
-       pstrcpy_wa(s, string);
-
-       return ms_fnmatch_w(p, s);
+       return ms_fnmatch(pattern, string, PROTOCOL_NT1, False);
 }