5
* @version $Id: fulltext_mysql.php,v 1.51 2007/11/29 18:26:20 davidmj Exp $
6
* @copyright (c) 2005 phpBB Group
7
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
14
if (!defined('IN_PHPBB'))
22
include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
26
* Fulltext search for MySQL
29
class fulltext_mysql extends search_backend
32
var $word_length = array();
33
var $split_words = array();
35
var $common_words = array();
36
var $pcre_properties = false;
37
var $mbstring_regex = false;
39
function fulltext_mysql(&$error)
43
$this->word_length = array('min' => $config['fulltext_mysql_min_word_len'], 'max' => $config['fulltext_mysql_max_word_len']);
45
if (version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>=')))
47
// While this is the proper range of PHP versions, PHP may not be linked with the bundled PCRE lib and instead with an older version
48
if (@preg_match('/\p{L}/u', 'a') !== false)
50
$this->pcre_properties = true;
54
if (function_exists('mb_ereg'))
56
$this->mbstring_regex = true;
57
mb_regex_encoding('UTF-8');
64
* Checks for correct MySQL version and stores min/max word length in the config
70
if ($db->sql_layer != 'mysql4' && $db->sql_layer != 'mysqli')
72
return $user->lang['FULLTEXT_MYSQL_INCOMPATIBLE_VERSION'];
75
$result = $db->sql_query('SHOW TABLE STATUS LIKE \'' . POSTS_TABLE . '\'');
76
$info = $db->sql_fetchrow($result);
77
$db->sql_freeresult($result);
80
if (isset($info['Engine']))
82
$engine = $info['Engine'];
84
else if (isset($info['Type']))
86
$engine = $info['Type'];
89
if ($engine != 'MyISAM')
91
return $user->lang['FULLTEXT_MYSQL_NOT_MYISAM'];
94
$sql = 'SHOW VARIABLES
96
$result = $db->sql_query($sql);
98
$mysql_info = array();
99
while ($row = $db->sql_fetchrow($result))
101
$mysql_info[$row['Variable_name']] = $row['Value'];
103
$db->sql_freeresult($result);
105
set_config('fulltext_mysql_max_word_len', $mysql_info['ft_max_word_len']);
106
set_config('fulltext_mysql_min_word_len', $mysql_info['ft_min_word_len']);
112
* Splits keywords entered by a user into an array of words stored in $this->split_words
113
* Stores the tidied search query in $this->search_query
115
* @param string &$keywords Contains the keyword as entered by the user
116
* @param string $terms is either 'all' or 'any'
117
* @return bool false if no valid keywords were found and otherwise true
119
function split_keywords(&$keywords, $terms)
125
$match = array('#\sand\s#iu', '#\sor\s#iu', '#\snot\s#iu', '#\+#', '#-#', '#\|#');
126
$replace = array(' +', ' |', ' -', ' +', ' -', ' |');
128
$keywords = preg_replace($match, $replace, $keywords);
131
// Filter out as above
132
$split_keywords = preg_replace("#[\n\r\t]+#", ' ', trim(htmlspecialchars_decode($keywords)));
135
if ($this->pcre_properties)
137
$split_keywords = preg_replace('#([^\p{L}\p{N}\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords)));
139
else if ($this->mbstring_regex)
141
$split_keywords = mb_ereg_replace('([^\w\'*"()])', '\\1\\1', str_replace('\'\'', '\' \'', trim($split_keywords)));
145
$split_keywords = preg_replace('#([^\w\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords)));
148
if ($this->pcre_properties)
151
preg_match_all('#(?:[^\p{L}\p{N}*"()]|^)([+\-|]?(?:[\p{L}\p{N}*"()]+\'?)*[\p{L}\p{N}*"()])(?:[^\p{L}\p{N}*"()]|$)#u', $split_keywords, $matches);
152
$this->split_words = $matches[1];
154
else if ($this->mbstring_regex)
156
mb_ereg_search_init($split_keywords, '(?:[^\w*"()]|^)([+\-|]?(?:[\w*"()]+\'?)*[\w*"()])(?:[^\w*"()]|$)');
158
while (($word = mb_ereg_search_regs()))
160
$this->split_words[] = $word[1];
166
preg_match_all('#(?:[^\w*"()]|^)([+\-|]?(?:[\w*"()]+\'?)*[\w*"()])(?:[^\w*"()]|$)#u', $split_keywords, $matches);
167
$this->split_words = $matches[1];
170
// to allow phrase search, we need to concatenate quoted words
171
$tmp_split_words = array();
173
foreach ($this->split_words as $word)
177
$phrase .= ' ' . $word;
178
if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
180
$tmp_split_words[] = $phrase;
184
else if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
190
$tmp_split_words[] = $word . ' ';
195
$tmp_split_words[] = $phrase;
198
$this->split_words = $tmp_split_words;
200
unset($tmp_split_words);
203
foreach ($this->split_words as $i => $word)
205
$clean_word = preg_replace('#^[+\-|"]#', '', $word);
208
$clean_len = utf8_strlen(str_replace('*', '', $clean_word));
209
if (($clean_len < $config['fulltext_mysql_min_word_len']) || ($clean_len > $config['fulltext_mysql_max_word_len']))
211
$this->common_words[] = $word;
212
unset($this->split_words[$i]);
218
$this->search_query = '';
219
foreach ($this->split_words as $word)
221
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0))
223
$word = substr($word, 1);
225
$this->search_query .= $word . ' ';
230
$this->search_query = '';
231
foreach ($this->split_words as $word)
233
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0))
235
$this->search_query .= $word . ' ';
237
else if (strpos($word, '|') === 0)
239
$this->search_query .= substr($word, 1) . ' ';
243
$this->search_query .= '+' . $word . ' ';
248
$this->search_query = utf8_htmlspecialchars($this->search_query);
250
if ($this->search_query)
252
$this->split_words = array_values($this->split_words);
253
sort($this->split_words);
260
* Turns text into an array of words
262
function split_message($text)
267
if ($this->pcre_properties)
269
$text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text)));
271
else if ($this->mbstring_regex)
273
$text = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($text)));
277
$text = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text)));
280
if ($this->pcre_properties)
283
preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches);
286
else if ($this->mbstring_regex)
288
mb_ereg_search_init($text, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)');
291
while (($word = mb_ereg_search_regs()))
299
preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $text, $matches);
303
// remove too short or too long words
304
$text = array_values($text);
305
for ($i = 0, $n = sizeof($text); $i < $n; $i++)
307
$text[$i] = trim($text[$i]);
308
if (utf8_strlen($text[$i]) < $config['fulltext_mysql_min_word_len'] || utf8_strlen($text[$i]) > $config['fulltext_mysql_max_word_len'])
314
return array_values($text);
318
* Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
320
* @param string $type contains either posts or topics depending on what should be searched for
321
* @param string &$fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
322
* @param string &$terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
323
* @param array &$sort_by_sql contains SQL code for the ORDER BY part of a query
324
* @param string &$sort_key is the key of $sort_by_sql for the selected sorting
325
* @param string &$sort_dir is either a or d representing ASC and DESC
326
* @param string &$sort_days specifies the maximum amount of days a post may be old
327
* @param array &$ex_fid_ary specifies an array of forum ids which should not be searched
328
* @param array &$m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts
329
* @param int &$topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
330
* @param array &$author_ary an array of author ids if the author should be ignored during the search the array is empty
331
* @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
332
* @param int $start indicates the first index of the page
333
* @param int $per_page number of ids each page is supposed to contain
334
* @return boolean|int total number of results
338
function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
342
// No keywords? No posts.
343
if (!$this->search_query)
348
// generate a search_key from all the options to identify the results
349
$search_key = md5(implode('#', array(
350
implode(', ', $this->split_words),
357
implode(',', $ex_fid_ary),
358
implode(',', $m_approve_fid_ary),
359
implode(',', $author_ary)
362
// try reading the results from cache
364
if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
366
return $result_count;
371
$join_topic = ($type == 'posts') ? false : true;
373
// Build sql strings for sorting
374
$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
375
$sql_sort_table = $sql_sort_join = '';
377
switch ($sql_sort[0])
380
$sql_sort_table = USERS_TABLE . ' u, ';
381
$sql_sort_join = ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster ';
389
$sql_sort_table = FORUMS_TABLE . ' f, ';
390
$sql_sort_join = ' AND f.forum_id = p.forum_id ';
394
// Build some display specific sql strings
398
$sql_match = 'p.post_subject';
399
$sql_match_where = ' AND p.post_id = t.topic_first_post_id';
404
$sql_match = 'p.post_text';
405
$sql_match_where = '';
409
$sql_match = 'p.post_subject, p.post_text';
410
$sql_match_where = ' AND p.post_id = t.topic_first_post_id';
415
$sql_match = 'p.post_subject, p.post_text';
416
$sql_match_where = '';
420
if (!sizeof($m_approve_fid_ary))
422
$m_approve_fid_sql = ' AND p.post_approved = 1';
424
else if ($m_approve_fid_ary === array(-1))
426
$m_approve_fid_sql = '';
430
$m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
433
$sql_select = (!$result_count) ? 'SQL_CALC_FOUND_ROWS ' : '';
434
$sql_select = ($type == 'posts') ? $sql_select . 'p.post_id' : 'DISTINCT ' . $sql_select . 't.topic_id';
435
$sql_from = ($join_topic) ? TOPICS_TABLE . ' t, ' : '';
436
$field = ($type == 'posts') ? 'post_id' : 'topic_id';
437
$sql_author = (sizeof($author_ary) == 1) ? ' = ' . $author_ary[0] : 'IN (' . implode(', ', $author_ary) . ')';
439
$sql_where_options = $sql_sort_join;
440
$sql_where_options .= ($topic_id) ? ' AND p.topic_id = ' . $topic_id : '';
441
$sql_where_options .= ($join_topic) ? ' AND t.topic_id = p.topic_id' : '';
442
$sql_where_options .= (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
443
$sql_where_options .= $m_approve_fid_sql;
444
$sql_where_options .= (sizeof($author_ary)) ? ' AND p.poster_id ' . $sql_author : '';
445
$sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
446
$sql_where_options .= $sql_match_where;
448
$sql = "SELECT $sql_select
449
FROM $sql_from$sql_sort_table" . POSTS_TABLE . " p
450
WHERE MATCH ($sql_match) AGAINST ('" . $db->sql_escape(htmlspecialchars_decode($this->search_query)) . "' IN BOOLEAN MODE)
453
$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
455
while ($row = $db->sql_fetchrow($result))
457
$id_ary[] = $row[$field];
459
$db->sql_freeresult($result);
461
$id_ary = array_unique($id_ary);
463
if (!sizeof($id_ary))
468
// if the total result count is not cached yet, retrieve it from the db
471
$sql = 'SELECT FOUND_ROWS() as result_count';
472
$result = $db->sql_query($sql);
473
$result_count = (int) $db->sql_fetchfield('result_count');
474
$db->sql_freeresult($result);
482
// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
483
$this->save_ids($search_key, implode(' ', $this->split_words), $author_ary, $result_count, $id_ary, $start, $sort_dir);
484
$id_ary = array_slice($id_ary, 0, (int) $per_page);
486
return $result_count;
490
* Performs a search on an author's posts without caring about message contents. Depends on display specific params
492
* @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
493
* @param int $start indicates the first index of the page
494
* @param int $per_page number of ids each page is supposed to contain
495
* @return total number of results
497
function author_search($type, $firstpost_only, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
501
// No author? No posts.
502
if (!sizeof($author_ary))
507
// generate a search_key from all the options to identify the results
508
$search_key = md5(implode('#', array(
511
($firstpost_only) ? 'firstpost' : '',
517
implode(',', $ex_fid_ary),
518
implode(',', $m_approve_fid_ary),
519
implode(',', $author_ary)
522
// try reading the results from cache
524
if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
526
return $result_count;
531
// Create some display specific sql strings
532
$sql_author = $db->sql_in_set('p.poster_id', $author_ary);
533
$sql_fora = (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
534
$sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
535
$sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
536
$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
538
// Build sql strings for sorting
539
$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
540
$sql_sort_table = $sql_sort_join = '';
541
switch ($sql_sort[0])
544
$sql_sort_table = USERS_TABLE . ' u, ';
545
$sql_sort_join = ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster ';
549
$sql_sort_table = ($type == 'posts') ? TOPICS_TABLE . ' t, ' : '';
550
$sql_sort_join = ($type == 'posts') ? ' AND t.topic_id = p.topic_id ' : '';
554
$sql_sort_table = FORUMS_TABLE . ' f, ';
555
$sql_sort_join = ' AND f.forum_id = p.forum_id ';
559
if (!sizeof($m_approve_fid_ary))
561
$m_approve_fid_sql = ' AND p.post_approved = 1';
563
else if ($m_approve_fid_ary == array(-1))
565
$m_approve_fid_sql = '';
569
$m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
572
// If the cache was completely empty count the results
573
$calc_results = ($result_count) ? '' : 'SQL_CALC_FOUND_ROWS ';
575
// Build the query for really selecting the post_ids
576
if ($type == 'posts')
578
$sql = "SELECT {$calc_results}p.post_id
579
FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
592
$sql = "SELECT {$calc_results}t.topic_id
593
FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
599
AND t.topic_id = p.topic_id
607
// Only read one block of posts from the db and then cache it
608
$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
610
while ($row = $db->sql_fetchrow($result))
612
$id_ary[] = $row[$field];
614
$db->sql_freeresult($result);
616
// retrieve the total result count if needed
619
$sql = 'SELECT FOUND_ROWS() as result_count';
620
$result = $db->sql_query($sql);
621
$result_count = (int) $db->sql_fetchfield('result_count');
622
$db->sql_freeresult($result);
632
$this->save_ids($search_key, '', $author_ary, $result_count, $id_ary, $start, $sort_dir);
633
$id_ary = array_slice($id_ary, 0, $per_page);
635
return $result_count;
641
* Destroys cached search results, that contained one of the new words in a post so the results won't be outdated.
643
* @param string $mode contains the post mode: edit, post, reply, quote ...
645
function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
649
// Split old and new post/subject to obtain array of words
650
$split_text = $this->split_message($message);
651
$split_title = ($subject) ? $this->split_message($subject) : array();
653
$words = array_unique(array_merge($split_text, $split_title));
658
// destroy cached search results containing any of the words removed or added
659
$this->destroy_cache($words, array($poster_id));
665
* Destroy cached results, that might be outdated after deleting a post
667
function index_remove($post_ids, $author_ids, $forum_ids)
669
$this->destroy_cache(array(), $author_ids);
673
* Destroy old cache entries
679
// destroy too old cached search results
680
$this->destroy_cache(array());
682
set_config('search_last_gc', time(), true);
686
* Create fulltext index
688
function create_index($acp_module, $u_action)
692
// Make sure we can actually use MySQL with fulltext indexes
693
if ($error = $this->init())
698
if (empty($this->stats))
705
if (!isset($this->stats['post_subject']))
707
if ($db->sql_layer == 'mysqli' || version_compare($db->mysql_version, '4.1.3', '>='))
709
//$alter[] = 'MODIFY post_subject varchar(100) COLLATE utf8_unicode_ci DEFAULT \'\' NOT NULL';
713
$alter[] = 'MODIFY post_subject text NOT NULL';
715
$alter[] = 'ADD FULLTEXT (post_subject)';
718
if (!isset($this->stats['post_text']))
720
if ($db->sql_layer == 'mysqli' || version_compare($db->mysql_version, '4.1.3', '>='))
722
$alter[] = 'MODIFY post_text mediumtext COLLATE utf8_unicode_ci NOT NULL';
726
$alter[] = 'MODIFY post_text mediumtext NOT NULL';
728
$alter[] = 'ADD FULLTEXT (post_text)';
731
if (!isset($this->stats['post_content']))
733
$alter[] = 'ADD FULLTEXT post_content (post_subject, post_text)';
738
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
741
$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
747
* Drop fulltext index
749
function delete_index($acp_module, $u_action)
753
// Make sure we can actually use MySQL with fulltext indexes
754
if ($error = $this->init())
759
if (empty($this->stats))
766
if (isset($this->stats['post_subject']))
768
$alter[] = 'DROP INDEX post_subject';
771
if (isset($this->stats['post_text']))
773
$alter[] = 'DROP INDEX post_text';
776
if (isset($this->stats['post_content']))
778
$alter[] = 'DROP INDEX post_content';
783
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
786
$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
792
* Returns true if both FULLTEXT indexes exist
794
function index_created()
796
if (empty($this->stats))
801
return (isset($this->stats['post_text']) && isset($this->stats['post_subject']) && isset($this->stats['post_content'])) ? true : false;
805
* Returns an associative array containing information about the indexes
807
function index_stats()
811
if (empty($this->stats))
817
$user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0,
825
if (strpos($db->sql_layer, 'mysql') === false)
827
$this->stats = array();
832
FROM ' . POSTS_TABLE;
833
$result = $db->sql_query($sql);
835
while ($row = $db->sql_fetchrow($result))
837
// deal with older MySQL versions which didn't use Index_type
838
$index_type = (isset($row['Index_type'])) ? $row['Index_type'] : $row['Comment'];
840
if ($index_type == 'FULLTEXT')
842
if ($row['Key_name'] == 'post_text')
844
$this->stats['post_text'] = $row;
846
else if ($row['Key_name'] == 'post_subject')
848
$this->stats['post_subject'] = $row;
850
else if ($row['Key_name'] == 'post_content')
852
$this->stats['post_content'] = $row;
856
$db->sql_freeresult($result);
858
$sql = 'SELECT COUNT(post_id) as total_posts
859
FROM ' . POSTS_TABLE;
860
$result = $db->sql_query($sql);
861
$this->stats['total_posts'] = (int) $db->sql_fetchfield('total_posts');
862
$db->sql_freeresult($result);
866
* Display a note, that UTF-8 support is not available with certain versions of PHP
870
global $user, $config;
874
<dt><label>' . $user->lang['FULLTEXT_MYSQL_PCRE'] . '</label><br /><span>' . $user->lang['FULLTEXT_MYSQL_PCRE_EXPLAIN'] . '</span></dt>
875
<dd>' . (($this->pcre_properties) ? $user->lang['YES'] : $user->lang['NO']) . ' (PHP ' . PHP_VERSION . ')</dd>
878
<dt><label>' . $user->lang['FULLTEXT_MYSQL_MBSTRING'] . '</label><br /><span>' . $user->lang['FULLTEXT_MYSQL_MBSTRING_EXPLAIN'] . '</span></dt>
879
<dd>' . (($this->mbstring_regex) ? $user->lang['YES'] : $user->lang['NO']). '</dd>
883
// These are fields required in the config table
b'\\ No newline at end of file'