The KMP algorithm for string pattern matching proceeds like this:
//
#include "stdafx.h"
#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
using namespace std;
int* PreProcess( string pattern) {
int patternLength = pattern.length();
if (patternLength == 0) return 0;
int * next = new int[patternLength + 1];
if (next == 0) return 0;
next[0] = -1; // set up for loop below; unused by KMP
int i = 0;
int j = -1;
while (i < patternLength) {
next[i + 1] = next[i] + 1;
while ( next[i+1] > 0 &&
pattern[i] != pattern[next[i + 1] - 1])
next[i + 1] = next[next[i + 1] - 1] + 1;
i++;
}
return next;
}
void KMP(string pattern, string text, vector<int> *positions) {
int patternLength = pattern.length();
int textLength = text.length();
int* next = PreProcess(pattern);
if (next == 0) return;
int i = 0;
int j = 0;
while ( j < textLength )
{
while(true)
if (text[j] == pattern[i]) //matches
{
i++; // yes, move on to the next state
if (i == patternLength) // maybe that was the last state
{
// found a match;
positions->push_back(j-(i-1));
i = next[i];
}
break;
}
else if (i == 0) break; // no match in state j = 0, give up
else i = next[i];
j++;
}
}
char CharCode(char chr) {
if ( 'a' <= chr && chr <= 'z' )
return 'a';
if ( 'A' <= chr && chr <= 'Z' )
return 'A';
if ( '0' <= chr && chr <= '9' )
return '0';
if ( chr == '.' || chr == '?' || chr == '!' || chr == ',' || chr == ':' || chr == ';' || chr == '-' )
return '.';
}
string CodeText(string text) {
string ret = text;
for (int i = 0; i < text.length(); ++i) {
ret[i] = CharCode(text[i]);
}
return ret;
}
void FancyOutput(string pattern, string code, vector<int> *positions) {
cout << "Matched positions: ";
for (int i = 0; i < positions->size()-1; ++i)
cout << (*positions)[i] + 1 << ", ";
cout << (*positions)[positions->size()-1] + 1 << "." << endl;
std::cerr << "Text: " << code.c_str() << endl;
for (int i = 0; i < positions->size(); ++i) {
printf("%5d ", i+1);
for (int j = 0; j < (*positions)[i]; ++j) cout << " ";
cout << pattern.c_str() << endl;
}
}
int _tmain(int argc, _TCHAR* argv[])
{
string pattern, text, code;
char pattext[1024];
char txttext[1024];
cout << "Input pattern:" << endl;
cin.getline(pattext, 1024, '\n');
pattern.assign(pattext);
cout << "Input text:" << endl;
cin.getline(txttext, 1024, '\n');
text.assign(txttext);
cout << endl;
code = CodeText(text);
cout << "Processed text:" << endl;
cout << code.c_str() << endl;
cout << endl;
vector<int> *positions = new vector<int>();
KMP(pattern, code, positions);
if ( positions->size() )
cout << "Y" << endl;
else
cout << "N" << endl;
if ( positions->size() )
FancyOutput(pattern, code, positions);
return 0;
}
//
#include "stdafx.h"
#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
using namespace std;
int* PreProcess( string pattern) {
int patternLength = pattern.length();
if (patternLength == 0) return 0;
int * next = new int[patternLength + 1];
if (next == 0) return 0;
next[0] = -1; // set up for loop below; unused by KMP
int i = 0;
int j = -1;
while (i < patternLength) {
next[i + 1] = next[i] + 1;
while ( next[i+1] > 0 &&
pattern[i] != pattern[next[i + 1] - 1])
next[i + 1] = next[next[i + 1] - 1] + 1;
i++;
}
return next;
}
void KMP(string pattern, string text, vector<int> *positions) {
int patternLength = pattern.length();
int textLength = text.length();
int* next = PreProcess(pattern);
if (next == 0) return;
int i = 0;
int j = 0;
while ( j < textLength )
{
while(true)
if (text[j] == pattern[i]) //matches
{
i++; // yes, move on to the next state
if (i == patternLength) // maybe that was the last state
{
// found a match;
positions->push_back(j-(i-1));
i = next[i];
}
break;
}
else if (i == 0) break; // no match in state j = 0, give up
else i = next[i];
j++;
}
}
char CharCode(char chr) {
if ( 'a' <= chr && chr <= 'z' )
return 'a';
if ( 'A' <= chr && chr <= 'Z' )
return 'A';
if ( '0' <= chr && chr <= '9' )
return '0';
if ( chr == '.' || chr == '?' || chr == '!' || chr == ',' || chr == ':' || chr == ';' || chr == '-' )
return '.';
}
string CodeText(string text) {
string ret = text;
for (int i = 0; i < text.length(); ++i) {
ret[i] = CharCode(text[i]);
}
return ret;
}
void FancyOutput(string pattern, string code, vector<int> *positions) {
cout << "Matched positions: ";
for (int i = 0; i < positions->size()-1; ++i)
cout << (*positions)[i] + 1 << ", ";
cout << (*positions)[positions->size()-1] + 1 << "." << endl;
std::cerr << "Text: " << code.c_str() << endl;
for (int i = 0; i < positions->size(); ++i) {
printf("%5d ", i+1);
for (int j = 0; j < (*positions)[i]; ++j) cout << " ";
cout << pattern.c_str() << endl;
}
}
int _tmain(int argc, _TCHAR* argv[])
{
string pattern, text, code;
char pattext[1024];
char txttext[1024];
cout << "Input pattern:" << endl;
cin.getline(pattext, 1024, '\n');
pattern.assign(pattext);
cout << "Input text:" << endl;
cin.getline(txttext, 1024, '\n');
text.assign(txttext);
cout << endl;
code = CodeText(text);
cout << "Processed text:" << endl;
cout << code.c_str() << endl;
cout << endl;
vector<int> *positions = new vector<int>();
KMP(pattern, code, positions);
if ( positions->size() )
cout << "Y" << endl;
else
cout << "N" << endl;
if ( positions->size() )
FancyOutput(pattern, code, positions);
return 0;
}
No comments:
Post a Comment