[leetcode] Regular Expression Matching


Regular Expression Matching

Implement regular expression matching with support for '.' and '*'.

'.' Matches any single character.
'*' Matches zero or more of the preceding element.

The matching should cover the entire input string (not partial).

The function prototype should be:
bool isMatch(const char *s, const char *p)

Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "a*") → true
isMatch("aa", ".*") → true
isMatch("ab", ".*") → true
isMatch("aab", "c*a*b") → true

tags: backtracking, dynamic programming, string

9/25/2015 update dynamic programming

O(p*s*s)

//Dynamic programming
class Solution {
public:
    bool isMatch(string s, string p) {
        //base condition
        if(s.size() == 0){
            if(p.size() == 0) return true;
            while(p.size() >= 2 && p[1] == '*'){
                p = p.substr(2, -1);
            }
            if(p.size() == 0) return true;
            else{
                return false;
            }
        }
        if(p.size() == 0) return false;
        //this ensures s.size() and p.size() not equal to 0 
        bool dp[100][100];
        vector<string> tokens;
        memset(dp, false, 100 * 100 * sizeof(bool));
        parseToken(p, tokens);
        //match a space
        if(tokens[0].size() == 2) dp[0][0] = true;
        
        //initialize first column
        for(int i = 1; i <= s.size(); i++){
            //dp[0][j] means whether p[0-j] matches an empty string
            if(i == 1){
                if(tokens[0][0] == '.' || tokens[0][0] == s[i - 1]){
                    dp[i][0] = true;
                }
                else{
                    break;
                }
            }
            else{
                if(tokens[0].size() == 2 && (tokens[0][0] == s[i - 1] || tokens[0][0] == '.')){
                    dp[i][0] = true;
                }
                else{
                    break;
                }
            }
        }
        for(int j = 1; j < tokens.size(); j++){
            string token = tokens[j];
            for(int i = 0; i <= s.size(); i++){
                if(dp[i][j - 1] == false){
                    continue;
                }
                //match space
                if(token.size() == 2){
                    dp[i][j] = true;
                }

                for(int k = i + 1; k <= s.size(); k++){
                    if(k == i + 1){
                        if(token[0] == '.' || token[0] == s[k - 1]){
                            dp[k][j] = true;
                        }
                        else{
                            break;
                        }
                    }
                    else{
                        if(token.size() == 2 && (token[0] == s[k - 1] || token[0] == '.')){
                            dp[k][j] = true;
                        }
                        else{
                            break;
                        }    
                    }
                    
                }
            }
        }
        return dp[s.size()][tokens.size() - 1];
    }
    void parseToken(string& p, vector<string>& tokens){
        int i = 0;
        while(i < p.size()){
            if(p[i] == '*'){
                tokens.back().push_back('*');
            }
            else{
                tokens.push_back(string(1, p[i]));
            }
            i++;
        }
    }
};

 

很有味道的一道题,看了tag才做出来。
考虑用递归构造一个在string域内包含全部可能的,正则表达式展开结果的树。如果展开到某一节点,发现无法匹配,则回溯。
效率很低,但我们不得不用深搜考虑所有可能的情况。估计工业上的正则表达式匹配也用的是这样的方案把。
记string长度为m,正则表达式长度为n。则最坏情况下的时间复杂度为m^n(感觉,待论证)。

class Solution {
public:
    bool isMatch(const char *s, const char *p) {
        return _isMatch(s, p, 0, 0);
    }
    bool _isMatch(const char * s, const char * p, int idxS, int idxP){
        if(idxS == strlen(s) && idxP == strlen(p)){//string and reString all end
            return true;
        }
        else if(idxS != strlen(s) && idxP == strlen(p)){//reString end but string not end
            return false;
        }
        else if(idxS == strlen(s) && idxP != strlen(p)){//string end but reString not end, 
            char token[3];
            getToken(p, idxP, token);
            if(strlen(token) == 2){//if token is (a*) or (.*) ignore it and search deeper
                return _isMatch(s, p, idxS, idxP);
            }
            else{
                return false;
            }
            
        }
        else{
            char token[3];
            getToken(p, idxP, token);
            if(strlen(token) == 1){
                if(matchChar(s,idxS, token[0]) == false){
                    return false;
                }
                else{
                    return _isMatch(s, p, idxS, idxP);
                }
            }
            else{// .* or a*
                bool state;
                if(_isMatch(s, p, idxS, idxP) == true){//ignore this token, since an empty string also matches (a*) or (.*)
                    return true;
                }
                while(matchChar(s, idxS, token[0])){
                    state = _isMatch(s, p, idxS, idxP);
                    if(state == true){
                        return true;
                    }
                }
                return false;
            }
        }
    }
    bool getToken(const char * p, int &pos, char * token){
        if(p[pos] == '\0'){
            return false;
        }
        if(p[pos + 1] == '*'){
            token[0] = p[pos];
            token[1] = p[pos + 1];
            token[2] = '\0';
            pos += 2;
            return true;
        }
        else{
            token[0] = p[pos];
            token[1] = '\0';
            pos += 1;
            return true;
        }
    }
    bool matchChar(const char *s, int &pos, char c){
        if(pos == strlen(s)){
            return false;
        }
        else if(s[pos] == c || c == '.'){
            pos +=1;
            return true;
        }
        else{
            return false;
        }
    }
};

Selection_006

 

 

Leave a comment

Your email address will not be published.

This site uses Akismet to reduce spam. Learn how your comment data is processed.