Search results for '2010/07'. 1 post(s) found.

  1. 2010/07/21 KMP String Matching Algorithm
2010/07/21 10:10

KMP String Matching Algorithm


KMP String Matching Algorithm is well-known algorithm for faster string search. KMP is, as you know, Knuth, Morris, and Pratt. It's complexity is O(n). n is the length of target string and O is big-O notation.

Here's the implemented algorithm.

////////////////////////////////////////////////////////////////////
// programmed by Kurapa Chunun Kang (kurapa@kurapa.com)
#ifndef __GLOBAL_H__
#define __GLOBAL_H__
 
#define MAX_STRING_SIZE                        100
#define MAX_PATTERN_SIZE                     100
 
int g_failure[MAX_PATTERN_SIZE];
int pattern_matching(char *str, char *pat);
void make_failure(char *pat);                                      

//////////////////////////////////////////////////////////////
#endif // __GLOBAL_H__
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
 
// Knuth,Morris,Pratt String Matching Algorithm
int pattern_matching(char *str, char *pat)
{
        int i = 0, j = 0;  // index
        int intStrLen = strlen(str);
        int intPatLen = strlen(pat);

        // debug
        // printf("intStrLen = %d intPatLen = %d\n", intStrLen, intPatLen);
 
        // index should be longer than intStrLen & intPatLen
        while ((i < intStrLen) && (j < intPatLen))
        {
               if (str[i] == pat[j])
               {
                       i++;
                       j++;
               }
               else if (j == 0) i++;
               else j = g_failure[j-1] + 1;
        }
        return ((j == intPatLen) ? (i - intPatLen) : -1);
}
 
void make_failure(char *pat) // create failure function for target string
{
        int j = 0;
        int intPatLen = strlen(pat);
        g_failure[j] = -1; // the first value should be -1

        int i = 0;
        for ( j=1; j<intPatLen; j ++)
        {
               i = g_failure[j - 1];
               while ((pat[j] != pat[i+1]) && (i >= 0))
               {
                       i = g_failure[i];
               } /// end while

               if (pat[j] == pat[i+1]) g_failure[j] = i+1;
               else g_failure[j] = -1;
        } // end for


        // below is for debug
        printf( "g_failure[]={");
        for (j = 0; j < intPatLen; j ++)
        {
            if (j) printf( ",%d", g_failure[j]);
            else printf("%d", g_failure[j]);
        }
        printf( "} - where pattern=%s\n", pat);

}

int main(int argc, char **argv)
{
        char string[] = "aababaaasdasdasdasdcas";
        char *pattern = "ababaa";
        int result = 0;
 
        make_failure(pattern);
 
        result = pattern_matching(string, pattern);
        // debug
        printf ("string is %s \n", string);
        printf ("pattern is %s \n", pattern);
        printf ("result = %d \n", result);


        make_failure(pattern);

        pattern = "aabba";
        make_failure(pattern);

        pattern = "abababab";
        make_failure(pattern);

        pattern = "ababbaab";
        make_failure(pattern);

        return 0;
}

/* run result
Microsoft Windows XP [Version 5.1.2600]
(C) Copyright 1985-2001 Microsoft Corp.

Z:\내 문서\Debug>KMP
g_failure[]={-1,-1,0,1,2,0} - where pattern=ababaa
intStrLen = 22 intPatLen = 6
string is aababaaasdasdasdasdcas
pattern is ababaa
result = 1
g_failure[]={-1,-1,0,1,2,0} - where pattern=ababaa
g_failure[]={-1,0,-1,-1,0} - where pattern=aabba
g_failure[]={-1,-1,0,1,2,3,4,5} - where pattern=abababab
g_failure[]={-1,-1,0,1,-1,0,0,1} - where pattern=ababbaab

Z:\내 문서\Debug>
*/

Trackback 0 Comment 0

Trackback : Cannot send a trackbact to this post.