CodeArchive SuffixTree
From Ubcacm
Yury's implementation...
#include <cstdio> #include <string> #include <cctype> using namespace std; char s[1 << 20]; // the actual, global string struct edge{ int begin, end; edge *child[64], *slink, *parent; edge(int b = 0, int e = 0, edge *p = 0, edge *sl = 0) { memset(child, 0, sizeof(child)), slink = sl, begin = b, end = e, parent = p; } ~edge(){ for(int i = 0; i < 64; i++) if(child[i]) delete child[i]; } }; void get_suffix(edge *&e, int &ep, int sp){ // s[e->begin + ep] == s[sp] if((e = e->parent)->slink) e = e->slink, ep += (e->end - e->begin); else ep += (e->end - e->begin - 1); while(ep > e->end - e->begin) ep -= (e->end - e->begin), e = e->child[s[sp - ep]]; } edge *build_tree(){ // assume non-zero length for s edge *t = new edge(0, 0), *cur = t->child[s[0]] = new edge(0, (1 << 30), t); for(int phase = 1, pos = 1, f = 1; s[phase]; phase++, pos++){ edge **needslink = 0; while(true){ if(f) get_suffix(cur, pos, phase); else f = 1; if(cur->end == (1 << 30) && pos + cur->begin == phase) continue; if(cur->end - cur->begin > pos && s[cur->begin + pos] == s[phase] || cur->child[s[phase]]) { f = 0; break; } if(needslink) *needslink = cur, needslink = 0; if(cur->end - cur->begin > pos){ edge *oedge = new edge(pos, cur->end, cur, cur->slink); memcpy(oedge->child, cur->child, sizeof(cur->child)); edge *nedge = new edge(phase, (1 << 30), cur); needslink = &(nedge->slink); cur->end = pos, memset(cur->child, 0, sizeof(cur->child)); cur->child[s[phase]] = nedge, cur->child[s[cur->end]] = oedge; }else cur->child[s[phase]] = new edge(phase, (1 << 30), cur); if(cur->parent == 0 && !(f = 0)) break; } } return t; } bool find_str(edge *tree, char *w){ if(*w == 0) return true; if(tree == 0) return false; for(int i = 0; i + tree->begin < tree->end; i++) if(w[i] == 0) return true; else if(s[tree->begin + i] != w[i]) return false; w += (tree->end - tree->begin); return find_str(tree->child[*w], w); }
Suffix Tree
Just implementation anyway...
#include <cstdio> #include <cstdlib> #include <iostream> #include <string> using namespace std; struct node_t { node_t *parent, *suffixLink; bool isLeaf; int sIdx, eIdx; node_t** children; node_t*& operator[] (const int k) { return children[k]; } node_t(node_t* p, bool il, int ss, int ee):parent(p), suffixLink(NULL), isLeaf(il), sIdx(ss), eIdx(ee) { if (!il) { children = new node_t*[32]; memset(children, 0, sizeof(children)); } } ~node_t() { if (children) { for (int i = 0; i < 32; ++i) if (children[i]) delete children[i]; delete [] children; } }; // The str is available globally node_t* build_suffix_tree(const char* str) { int n = strlen(str); node_t *root = new node_t(NULL, false, 0, 0); root->suffixLink = root; // Initialize suffix link (*root)[str[0]] = new node_t(root, true, 0, n); node_t* nextRoot = root, *unknown = NULL, *tmp = NULL; int lts = 0, ltc = n-1; // Those part that I can skip for(int i = 1; i < n; ++i) { tmp = nextRoot; if(lts > 0) { // Fill up the suffix link of the unknown while(lts >= // There must be an edge waiting for me (*tmp)[str[ n-ltc-lts ]]->eIdx - (*tmp)[str[ n-ltc-lts ]]->sIdx ) { // Skip as much as we know tmp = (*tmp)[str[ n-ltc-lts ]]; lts -= (tmp->eIdx - tmp->sIdx); } if(lts > 0) { // Create an internal node node_t* newInternal = new node_t(tmp, false, n-ltc-lts, n-ltc); node_t* originalInternal = (*tmp)[str[ n-ltc-lts ]]; originalInternal->parent = newInternal; originalInternal->sIdx += lts; (*tmp)[str[ n-ltc-lts ]] = newInternal; (*newInternal)[ str[originalInternal->sIdx] ] = originalInternal; tmp = newInternal; lts = 0; } unknown->suffixLink = tmp; // unknown != NULL, the suffixLink of unknown should be found here } while( ltc > 0 ) { // tmp is the default parent, tmp must be an internal node? node_t *nextTrial = (*tmp)[str[n-ltc]]; if(nextTrial == NULL) break; // There is no match already! int idx = nextTrial->sIdx; for(; idx < nextTrial->eIdx; ++idx) if( str[ n-ltc + (idx - nextTrial->sIdx) ] != str[idx] ) break; if(idx < nextTrial->eIdx) { // Ultra troublesome case! I have to create a new internal node node_t* newInternal = new node_t(tmp, false, nextTrial->sIdx, idx); (*tmp)[str[n-ltc]] = newInternal; (*newInternal)[str[idx]] = nextTrial; ltc -= (idx - nextTrial->sIdx); nextTrial->sIdx = idx; tmp = newInternal; break; } else { // idx == nextTrial->eIdx tmp = nextTrial; ltc -= (nextTrial->eIdx - nextTrial->sIdx); } } node_t* newLeaf = new node_t(tmp, true, n-ltc, n); if(ltc == 0) (*tmp)[ 31 ] = newLeaf; else (*tmp)[str[n-ltc]] = newLeaf; nextRoot = newLeaf->parent; // Find my parent first if(nextRoot->suffixLink) { // Suffix link found, I don't have unknown now unknown = NULL; lts = 0; // I can skip nothing } else { unknown = nextRoot; // Fill in the suffix Link here lts = nextRoot->eIdx - nextRoot->sIdx; nextRoot = nextRoot->parent; } if(nextRoot == root) { // I must skip 1 less if(lts > 0) --lts; ltc = n - lts - 1 - i; } nextRoot = nextRoot->suffixLink; } } int longest_prefix(node_t* root, const char* str, const char *test) { int idx = 0; for(;;) { if (root->isLeaf) return idx; node_t* next = (*root)[test[idx]]; if (!next) return idx; for (int i = next->sIdx; test[idx] && i < next->eIdx; ++i, ++idx) if (str[i] != test[idx]) return idx; if (!test[idx]) return idx; root = next; } } int main() { return 0; }
-- Main.MatthewChan - 29 Mar 2006