算法设计:通配符模式匹配算法原理和实现

2021年4月13日10:29:10 发表评论 1,054 次浏览

本文概述

给定文本和通配符模式, 请实现通配符模式匹配算法, 以查找通配符模式是否与文本匹配。匹配项应覆盖整个文本(而非部分文本)。

通配符模式可以包含字符"?"和" *"

‘?’–匹配任何单个字符

‘*’–匹配任何字符序列(包括空序列)

例如,

Text = "baaabab", Pattern = "*****ba*****ab", output : true
Pattern = "baaa?ab", output : true
Pattern = "ba*a?", output : true
Pattern = "a*ab", output : false
通配符模式匹配

通配符模式中每次出现的"?"字符都可以用任何其他字符替换, 而每次出现" *"时都可以使用一系列字符, 以使通配符模式在替换后变得与输入字符串相同。

让我们考虑一下模式中的任何字符。

情况1:字符为" *"

这里出现两种情况

  1. 我们可以忽略" *"字符, 然后移至图案中的下一个字符。
  2. " *"字符与"文本"中的一个或多个字符匹配。在这里, 我们将移至字符串中的下一个字符。

情况2:字符是"?"

我们可以忽略文本中的当前字符, 而移至图案和文本中的下一个字符。

情况3:字符不是通配符

如果Text中的当前字符与Pattern中的当前字符匹配, 我们将移至Pattern和Text中的下一个字符。如果它们不匹配, 则通配符模式和文本不匹配。

我们可以使用动态编程来解决此问题–

如果给定字符串中的第一个i字符与pattern的第一个j字符匹配,则设T[i][j]为真。

DP初始化:

//both text and pattern are null
T[0][0] = true; 

//pattern is null
T[i][0] = false; 

//text is null
T[0][j] = T[0][j - 1] if pattern[j – 1] is '*'

DP关系:

//If current characters match, result is same as 
//result for lengths minus one. Characters match
//in two cases:
//a) If pattern character is '?' then it matches  
//with any character of text. 
//b) If current characters in both match
if ( pattern[j – 1] == ‘?’) || 
     (pattern[j – 1] == text[i - 1])
    T[i][j] = T[i-1][j-1]   
 
//If we encounter ‘*’, two choices are possible-
//a) We ignore ‘*’ character and move to next 
//character in the pattern, i.e., ‘*’ 
//indicates an empty sequence.
//b) '*' character matches with ith character in
// input 
else if (pattern[j – 1] == ‘*’)
    T[i][j] = T[i][j-1] || T[i-1][j]  

else //if (pattern[j – 1] != text[i - 1])
    T[i][j]  = false

下面是上述动态编程方法的实现。

C++

//C++ program to implement wildcard
//pattern matching algorithm
#include <bits/stdc++.h>
using namespace std;
 
//Function that matches input str with
//given wildcard pattern
bool strmatch( char str[], char pattern[], int n, int m)
{
     //empty pattern can only match with
     //empty string
     if (m == 0)
         return (n == 0);
 
     //lookup table for storing results of
     //subproblems
     bool lookup[n + 1][m + 1];
 
     //initailze lookup table to false
     memset (lookup, false , sizeof (lookup));
 
     //empty pattern can match with empty string
     lookup[0][0] = true ;
 
     //Only '*' can match with empty string
     for ( int j = 1; j <= m; j++)
         if (pattern[j - 1] == '*' )
             lookup[0][j] = lookup[0][j - 1];
 
     //fill the table in bottom-up fashion
     for ( int i = 1; i <= n; i++) {
         for ( int j = 1; j <= m; j++) {
             //Two cases if we see a '*'
             //a) We ignore ‘*’ character and move
             //to next  character in the pattern, // i.e., ‘*’ indicates an empty sequence.
             //b) '*' character matches with ith
             // character in input
             if (pattern[j - 1] == '*' )
                 lookup[i][j]
                     = lookup[i][j - 1] || lookup[i - 1][j];
 
             //Current characters are considered as
             //matching in two cases
             //(a) current character of pattern is '?'
             //(b) characters actually match
             else if (pattern[j - 1] == '?'
                      || str[i - 1] == pattern[j - 1])
                 lookup[i][j] = lookup[i - 1][j - 1];
 
             //If characters don't match
             else
                 lookup[i][j] = false ;
         }
     }
 
     return lookup[n][m];
}
 
int main()
{
     char str[] = "baaabab" ;
     char pattern[] = "*****ba*****ab" ;
     //char pattern[] = "ba*****ab";
     //char pattern[] = "ba*ab";
     //char pattern[] = "a*ab";
     //char pattern[] = "a*****ab";
     //char pattern[] = "*a*****ab";
     //char pattern[] = "ba*ab****";
     //char pattern[] = "****";
     //char pattern[] = "*";
     //char pattern[] = "aa?ab";
     //char pattern[] = "b*b";
     //char pattern[] = "a*a";
     //char pattern[] = "baaabab";
     //char pattern[] = "?baaabab";
     //char pattern[] = "*baaaba*";
 
     if (strmatch(str, pattern, strlen (str), strlen (pattern)))
         cout <<"Yes" <<endl;
     else
         cout <<"No" <<endl;
 
     return 0;
}

Java

//Java program to implement wildcard
//pattern matching algorithm
import java.util.Arrays;
public class GFG {
 
     //Function that matches input str with
     //given wildcard pattern
     static boolean strmatch(String str, String pattern, int n, int m)
     {
         //empty pattern can only match with
         //empty string
         if (m == 0 )
             return (n == 0 );
 
         //lookup table for storing results of
         //subproblems
         boolean [][] lookup = new boolean [n + 1 ][m + 1 ];
 
         //initailze lookup table to false
         for ( int i = 0 ; i <n + 1 ; i++)
             Arrays.fill(lookup[i], false );
 
         //empty pattern can match with empty string
         lookup[ 0 ][ 0 ] = true ;
 
         //Only '*' can match with empty string
         for ( int j = 1 ; j <= m; j++)
             if (pattern.charAt(j - 1 ) == '*' )
                 lookup[ 0 ][j] = lookup[ 0 ][j - 1 ];
 
         //fill the table in bottom-up fashion
         for ( int i = 1 ; i <= n; i++)
         {
             for ( int j = 1 ; j <= m; j++)
             {
                 //Two cases if we see a '*'
                 //a) We ignore '*'' character and move
                 //to next  character in the pattern, // i.e., '*' indicates an empty
                 // sequence.
                 //b) '*' character matches with ith
                 // character in input
                 if (pattern.charAt(j - 1 ) == '*' )
                     lookup[i][j] = lookup[i][j - 1 ]
                                    || lookup[i - 1 ][j];
 
                 //Current characters are considered as
                 //matching in two cases
                 //(a) current character of pattern is '?'
                 //(b) characters actually match
                 else if (pattern.charAt(j - 1 ) == '?'
                          || str.charAt(i - 1 )
                                 == pattern.charAt(j - 1 ))
                     lookup[i][j] = lookup[i - 1 ][j - 1 ];
 
                 //If characters don't match
                 else
                     lookup[i][j] = false ;
             }
         }
 
         return lookup[n][m];
     }
 
   
     //Driver code
     public static void main(String args[])
     {
         String str = "baaabab" ;
         String pattern = "*****ba*****ab" ;
         //String pattern = "ba*****ab";
         //String pattern = "ba*ab";
         //String pattern = "a*ab";
         //String pattern = "a*****ab";
         //String pattern = "*a*****ab";
         //String pattern = "ba*ab****";
         //String pattern = "****";
         //String pattern = "*";
         //String pattern = "aa?ab";
         //String pattern = "b*b";
         //String pattern = "a*a";
         //String pattern = "baaabab";
         //String pattern = "?baaabab";
         //String pattern = "*baaaba*";
 
         if (strmatch(str, pattern, str.length(), pattern.length()))
             System.out.println( "Yes" );
         else
             System.out.println( "No" );
     }
}
//This code is contributed by Sumit Ghosh

Python3

# Python program to implement wildcard
# pattern matching algorithm
 
# Function that matches input strr with
# given wildcard pattern
 
 
def strrmatch(strr, pattern, n, m):
 
     # empty pattern can only match with
     # empty strring
     if (m = = 0 ):
         return (n = = 0 )
 
     # lookup table for storing results of
     # subproblems
     lookup = [[ False for i in range (m + 1 )] for j in range (n + 1 )]
 
     # empty pattern can match with empty strring
     lookup[ 0 ][ 0 ] = True
 
     # Only '*' can match with empty strring
     for j in range ( 1 , m + 1 ):
         if (pattern[j - 1 ] = = '*' ):
             lookup[ 0 ][j] = lookup[ 0 ][j - 1 ]
 
     # fill the table in bottom-up fashion
     for i in range ( 1 , n + 1 ):
         for j in range ( 1 , m + 1 ):
 
             # Two cases if we see a '*'
             # a) We ignore ‘*’ character and move
             # to next character in the pattern, # i.e., ‘*’ indicates an empty sequence.
             # b) '*' character matches with ith
             # character in input
             if (pattern[j - 1 ] = = '*' ):
                 lookup[i][j] = lookup[i][j - 1 ] or lookup[i - 1 ][j]
 
             # Current characters are considered as
             # matching in two cases
             # (a) current character of pattern is '?'
             # (b) characters actually match
             elif (pattern[j - 1 ] = = '?' or strr[i - 1 ] = = pattern[j - 1 ]):
                 lookup[i][j] = lookup[i - 1 ][j - 1 ]
 
             # If characters don't match
             else :
                 lookup[i][j] = False
 
     return lookup[n][m]
 
# Driver code
 
 
strr = "baaabab"
pattern = "*****ba*****ab"
# char pattern[] = "ba*****ab"
# char pattern[] = "ba*ab"
# char pattern[] = "a*ab"
# char pattern[] = "a*****ab"
# char pattern[] = "*a*****ab"
# char pattern[] = "ba*ab****"
# char pattern[] = "****"
# char pattern[] = "*"
# char pattern[] = "aa?ab"
# char pattern[] = "b*b"
# char pattern[] = "a*a"
# char pattern[] = "baaabab"
# char pattern[] = "?baaabab"
# char pattern[] = "*baaaba*"
 
if (strrmatch(strr, pattern, len (strr), len (pattern))):
     print ( "Yes" )
else :
     print ( "No" )
 
# This code is contributed by shubhamsingh10

C#

//C# program to implement wildcard
//pattern matching algorithm
using System;
 
class GFG {
 
     //Function that matches input str with
     //given wildcard pattern
     static Boolean strmatch(String str, String pattern, int n, int m)
     {
         //empty pattern can only match with
         //empty string
         if (m == 0)
             return (n == 0);
 
         //lookup table for storing results of
         //subproblems
         Boolean[, ] lookup = new Boolean[n + 1, m + 1];
 
         //initailze lookup table to false
         for ( int i = 0; i <n + 1; i++)
             for ( int j = 0; j <m + 1; j++)
                 lookup[i, j] = false ;
 
         //empty pattern can match with
         //empty string
         lookup[0, 0] = true ;
 
         //Only '*' can match with empty string
         for ( int j = 1; j <= m; j++)
             if (pattern[j - 1] == '*' )
                 lookup[0, j] = lookup[0, j - 1];
 
         //fill the table in bottom-up fashion
         for ( int i = 1; i <= n; i++) {
             for ( int j = 1; j <= m; j++) {
                 //Two cases if we see a '*'
                 //a) We ignore '*'' character and move
                 //to next character in the pattern, // i.e., '*' indicates an empty
                 // sequence.
                 //b) '*' character matches with ith
                 // character in input
                 if (pattern[j - 1] == '*' )
                     lookup[i, j] = lookup[i, j - 1]
                                    || lookup[i - 1, j];
 
                 //Current characters are considered as
                 //matching in two cases
                 //(a) current character of pattern is '?'
                 //(b) characters actually match
                 else if (pattern[j - 1] == '?'
                          || str[i - 1] == pattern[j - 1])
                     lookup[i, j] = lookup[i - 1, j - 1];
 
                 //If characters don't match
                 else
                     lookup[i, j] = false ;
             }
         }
         return lookup[n, m];
     }
 
     //Driver Code
     public static void Main(String[] args)
     {
         String str = "baaabab" ;
         String pattern = "*****ba*****ab" ;
         //String pattern = "ba*****ab";
         //String pattern = "ba*ab";
         //String pattern = "a*ab";
         //String pattern = "a*****ab";
         //String pattern = "*a*****ab";
         //String pattern = "ba*ab****";
         //String pattern = "****";
         //String pattern = "*";
         //String pattern = "aa?ab";
         //String pattern = "b*b";
         //String pattern = "a*a";
         //String pattern = "baaabab";
         //String pattern = "?baaabab";
         //String pattern = "*baaaba*";
 
         if (strmatch(str, pattern, str.Length, pattern.Length))
             Console.WriteLine( "Yes" );
         else
             Console.WriteLine( "No" );
     }
}
 
//This code is contributed by Rajput-Ji

输出如下

Yes

时间复杂度:O(m x n)

辅助空间:O(m x n)

DP缓存解决方案:-

C++

//C++ program to implement wildcard
//pattern matching algorithm
#include <bits/stdc++.h>
using namespace std;
 
//Function that matches input str with
//given wildcard pattern
vector<vector<int>> dp;
int finding(string& s, string& p, int n, int m)
{
     //return 1 if n and m are negative
     if (n <0 && m <0)
         return 1;
   
     //return 0 if m is negative
     if (m <0)
         return 0;
   
     //return n if n is negative
     if (n <0)
     {
         //while m is positve
         while (m>= 0)
         {
             if (p[m] != '*' )
                 return 0;
             m--;
         }
         return 1;
     }
    
     //if dp state is not visited
     if (dp[n][m] == -1)
     {
         if (p[m] == '*' )
         {
             return dp[n][m] = finding(s, p, n - 1, m)
                               || finding(s, p, n, m - 1);
         }
         else
         {
             if (p[m] != s[n] && p[m] != '?' )
                 return dp[n][m] = 0;
             else
                 return dp[n][m]
                        = finding(s, p, n - 1, m - 1);
         }
     }
   
     //return dp[n][m] if dp state is previsited
     return dp[n][m];
}
 
 
bool isMatch(string s, string p)
{
     dp.clear();
     
     //resize the dp array
     dp.resize(s.size() + 1, vector<int>(p.size() + 1, -1));
     return dp[s.size()][p.size()]
            = finding(s, p, s.size() - 1, p.size() - 1);
}
 
//Driver code
int main()
{
     string str = "baaabab" ;
     string pattern = "*****ba*****ab" ;
     //char pattern[] = "ba*****ab";
     //char pattern[] = "ba*ab";
     //char pattern[] = "a*ab";
     //char pattern[] = "a*****ab";
     //char pattern[] = "*a*****ab";
     //char pattern[] = "ba*ab****";
     //char pattern[] = "****";
     //char pattern[] = "*";
     //char pattern[] = "aa?ab";
     //char pattern[] = "b*b";
     //char pattern[] = "a*a";
     //char pattern[] = "baaabab";
     //char pattern[] = "?baaabab";
     //char pattern[] = "*baaaba*";
 
     if (isMatch(str, pattern))
         cout <<"Yes" <<endl;
     else
         cout <<"No" <<endl;
 
     return 0;
}

输出如下

Yes

时间复杂度:O(m x n)

辅助空间:O(m x n)

进一步改进:

通过仅使用最后一行的结果, 可以提高空间的复杂性。

另一个改进是, 你将模式中的连续" *"合并为单个" *", 因为它们含义相同。例如, 对于模式" ***** ba ***** ab", 如果我们合并连续的星星, 则结果字符串将为" * ba * ab"。因此, m的值从14减少到6。

如果发现任何不正确的地方, 或者想分享有关上述主题的更多信息, 请发表评论。

木子山

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: