~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2005 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/*
17
  Implementation of trie and Aho-Corasick automaton.
18
  Supports only charsets that can be compared byte-wise.
19
  
20
  TODO:
21
    Add character frequencies. Can increase lookup speed
22
    up to 30%.
23
    Implement character-wise comparision.
24
*/
25
26
27
#include "mysys_priv.h"
28
#include <m_string.h>
29
#include <my_trie.h>
30
#include <my_base.h>
31
32
33
/*
34
  SYNOPSIS
35
    TRIE *trie_init (TRIE *trie, CHARSET_INFO *charset);
36
37
  DESCRIPTION
38
    Allocates or initializes a `TRIE' object. If `trie' is a `NULL'
39
    pointer, the function allocates, initializes, and returns a new
40
    object. Otherwise, the object is initialized and the address of
41
    the object is returned.  If `trie_init()' allocates a new object,
42
    it will be freed when `trie_free()' is called.
43
44
  RETURN VALUE
45
    An initialized `TRIE*' object.  `NULL' if there was insufficient
46
    memory to allocate a new object.
47
*/
48
49
TRIE *trie_init (TRIE *trie, CHARSET_INFO *charset)
50
{
51
  MEM_ROOT mem_root;
52
  DBUG_ENTER("trie_init");
53
  DBUG_ASSERT(charset);
54
  init_alloc_root(&mem_root,
55
                  (sizeof(TRIE_NODE) * 128) + ALLOC_ROOT_MIN_BLOCK_SIZE,
56
                  sizeof(TRIE_NODE) * 128);
57
  if (! trie)
58
  {
59
    if (! (trie= (TRIE *)alloc_root(&mem_root, sizeof(TRIE))))
60
    {
61
      free_root(&mem_root, MYF(0));
62
      DBUG_RETURN(NULL);
63
    }
64
  }
65
66
  memcpy(&trie->mem_root, &mem_root, sizeof(MEM_ROOT));
67
  trie->root.leaf= 0;
68
  trie->root.c= 0;
69
  trie->root.next= NULL;
70
  trie->root.links= NULL;
71
  trie->root.fail= NULL;
72
  trie->charset= charset;
73
  trie->nnodes= 0;
74
  trie->nwords= 0;
75
  DBUG_RETURN(trie);
76
}
77
78
79
/*
80
  SYNOPSIS
81
    void trie_free (TRIE *trie);
82
    trie - valid pointer to `TRIE'
83
84
  DESCRIPTION
85
    Frees the memory allocated for a `trie'.
86
87
  RETURN VALUE
88
    None.
89
*/
90
91
void trie_free (TRIE *trie)
92
{
93
  MEM_ROOT mem_root;
94
  DBUG_ENTER("trie_free");
95
  DBUG_ASSERT(trie);
96
  memcpy(&mem_root, &trie->mem_root, sizeof(MEM_ROOT));
97
  free_root(&mem_root, MYF(0));
98
  DBUG_VOID_RETURN;
99
}
100
101
102
/*
103
  SYNOPSIS
104
    my_bool trie_insert (TRIE *trie, const uchar *key, uint keylen);
105
    trie - valid pointer to `TRIE'
106
    key - valid pointer to key to insert
107
    keylen - non-0 key length
108
109
  DESCRIPTION
110
    Inserts new key into trie.
111
112
  RETURN VALUE
113
    Upon successful completion, `trie_insert' returns `FALSE'. Otherwise
114
    `TRUE' is returned.
115
116
  NOTES
117
    If this function fails you must assume `trie' is broken.
118
    However it can be freed with trie_free().
119
*/
120
121
my_bool trie_insert (TRIE *trie, const uchar *key, uint keylen)
122
{
123
  TRIE_NODE *node;
124
  TRIE_NODE *next;
125
  uchar p;
126
  uint k;
127
  DBUG_ENTER("trie_insert");
128
  DBUG_ASSERT(trie && key && keylen);
129
  node= &trie->root;
130
  trie->root.fail= NULL;
131
  for (k= 0; k < keylen; k++)
132
  {
133
    p= key[k];
134
    for (next= node->links; next; next= next->next)
135
      if (next->c == p)
136
        break;
137
138
    if (! next)
139
    {
140
      TRIE_NODE *tmp= (TRIE_NODE *)alloc_root(&trie->mem_root,
141
                                              sizeof(TRIE_NODE));
142
      if (! tmp)
143
        DBUG_RETURN(TRUE);
144
      tmp->leaf= 0;
145
      tmp->c= p;
146
      tmp->links= tmp->fail= tmp->next= NULL;
147
      trie->nnodes++;
148
      if (! node->links)
149
      {
150
        node->links= tmp;
151
      }
152
      else
153
      {
154
        for (next= node->links; next->next; next= next->next) /* no-op */;
155
        next->next= tmp;
156
      }
157
      node= tmp;
158
    }
159
    else
160
    {
161
      node= next;
162
    }
163
  }
164
  node->leaf= keylen;
165
  trie->nwords++;
166
  DBUG_RETURN(FALSE);
167
}
168
169
170
/*
171
  SYNOPSIS
172
    my_bool trie_prepare (TRIE *trie);
173
    trie - valid pointer to `TRIE'
174
175
  DESCRIPTION
176
    Constructs Aho-Corasick automaton.
177
178
  RETURN VALUE
179
    Upon successful completion, `trie_prepare' returns `FALSE'. Otherwise
180
    `TRUE' is returned.
181
*/
182
183
my_bool ac_trie_prepare (TRIE *trie)
184
{
185
  TRIE_NODE **tmp_nodes;
186
  TRIE_NODE *node;
187
  uint32 fnode= 0;
188
  uint32 lnode= 0;
189
  DBUG_ENTER("trie_prepare");
190
  DBUG_ASSERT(trie);
191
192
  tmp_nodes= (TRIE_NODE **)my_malloc(trie->nnodes * sizeof(TRIE_NODE *), MYF(0));
193
  if (! tmp_nodes)
194
    DBUG_RETURN(TRUE);
195
196
  trie->root.fail= &trie->root;
197
  for (node= trie->root.links; node; node= node->next)
198
  {
199
    node->fail= &trie->root;
200
    tmp_nodes[lnode++]= node;
201
  }
202
203
  while (fnode < lnode)
204
  {
205
    TRIE_NODE *current= (TRIE_NODE *)tmp_nodes[fnode++];
206
    for (node= current->links; node; node= node->next)
207
    {
208
      TRIE_NODE *fail= current->fail;
209
      tmp_nodes[lnode++]= node;
210
      while (! (node->fail= trie_goto(&trie->root, fail, node->c)))
211
        fail= fail->fail;
212
    }
213
  }
214
  my_free((uchar*)tmp_nodes, MYF(0));
215
  DBUG_RETURN(FALSE);
216
}
217
218
219
/*
220
  SYNOPSIS
221
    void ac_trie_init (TRIE *trie, AC_TRIE_STATE *state);
222
    trie - valid pointer to `TRIE'
223
    state - value pointer to `AC_TRIE_STATE'
224
225
  DESCRIPTION
226
    Initializes `AC_TRIE_STATE' object.
227
*/
228
229
void ac_trie_init (TRIE *trie, AC_TRIE_STATE *state)
230
{
231
  DBUG_ENTER("ac_trie_init");
232
  DBUG_ASSERT(trie && state);
233
  state->trie= trie;
234
  state->node= &trie->root;
235
  DBUG_VOID_RETURN;
236
}