~azzar1/unity/add-show-desktop-key

443 by dcoles
Added Forum application along with unmodifed version of phpBB3 "Olympus" 3.0.0
1
<?php
2
/**
3
*
4
* @package search
5
* @version $Id: fulltext_native.php,v 1.60 2007/10/05 14:36:33 acydburn Exp $
6
* @copyright (c) 2005 phpBB Group
7
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
8
*
9
*/
10
11
/**
12
* @ignore
13
*/
14
if (!defined('IN_PHPBB'))
15
{
16
	exit;
17
}
18
19
/**
20
* @ignore
21
*/
22
include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
23
24
/**
25
* fulltext_native
26
* phpBB's own db driven fulltext search, version 2
27
* @package search
28
*/
29
class fulltext_native extends search_backend
30
{
31
	var $stats = array();
32
	var $word_length = array();
33
	var $search_query;
34
	var $common_words = array();
35
36
	var $must_contain_ids = array();
37
	var $must_not_contain_ids = array();
38
	var $must_exclude_one_ids = array();
39
40
	/**
41
	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded.
42
	*
43
	* @param	boolean|string	&$error	is passed by reference and should either be set to false on success or an error message on failure.
44
	*
45
	* @access	public
46
	*/
47
	function fulltext_native(&$error)
48
	{
49
		global $phpbb_root_path, $phpEx, $config;
50
51
		$this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
52
53
		/**
54
		* Load the UTF tools
55
		*/
56
		if (!class_exists('utf_normalizer'))
57
		{
58
			include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
59
		}
60
61
62
		$error = false;
63
	}
64
65
	/**
66
	* This function fills $this->search_query with the cleaned user search query.
67
	*
68
	* If $terms is 'any' then the words will be extracted from the search query
69
	* and combined with | inside brackets. They will afterwards be treated like
70
	* an standard search query.
71
	*
72
	* Then it analyses the query and fills the internal arrays $must_not_contain_ids,
73
	* $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search().
74
	*
75
	* @param	string	$keywords	contains the search query string as entered by the user
76
	* @param	string	$terms		is either 'all' (use search query as entered, default words to 'must be contained in post')
77
	* 	or 'any' (find all posts containing at least one of the given words)
78
	* @return	boolean				false if no valid keywords were found and otherwise true
79
	*
80
	* @access	public
81
	*/
82
	function split_keywords($keywords, $terms)
83
	{
84
		global $db, $user;
85
86
		$keywords = trim($this->cleanup($keywords, '+-|()*'));
87
88
		// allow word|word|word without brackets
89
		if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
90
		{
91
			$keywords = '(' . $keywords . ')';
92
		}
93
94
		$open_bracket = $space = false;
95
		for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
96
		{
97
			if ($open_bracket !== false)
98
			{
99
				switch ($keywords[$i])
100
				{
101
					case ')':
102
						if ($open_bracket + 1 == $i)
103
						{
104
							$keywords[$i - 1] = '|';
105
							$keywords[$i] = '|';
106
						}
107
						$open_bracket = false;
108
					break;
109
					case '(':
110
						$keywords[$i] = '|';
111
					break;
112
					case '+':
113
					case '-':
114
					case ' ':
115
						$keywords[$i] = '|';
116
					break;
117
				}
118
			}
119
			else
120
			{
121
				switch ($keywords[$i])
122
				{
123
					case ')':
124
						$keywords[$i] = ' ';
125
					break;
126
					case '(':
127
						$open_bracket = $i;
128
						$space = false;
129
					break;
130
					case '|':
131
						$keywords[$i] = ' ';
132
					break;
133
					case '-':
134
					case '+':
135
						$space = $keywords[$i];
136
					break;
137
					case ' ':
138
						if ($space !== false)
139
						{
140
							$keywords[$i] = $space;
141
						}
142
					break;
143
					default:
144
						$space = false;
145
				}
146
			}
147
		}
148
149
		if ($open_bracket)
150
		{
151
			$keywords .= ')';
152
		}
153
154
		$match = array(
155
			'#  +#',
156
			'#\|\|+#',
157
			'#(\+|\-)(?:\+|\-)+#',
158
			'#\(\|#',
159
			'#\|\)#',
160
		);
161
		$replace = array(
162
			' ',
163
			'|',
164
			'$1',
165
			'(',
166
			')',
167
		);
168
169
		$keywords = preg_replace($match, $replace, $keywords);
170
171
		// $keywords input format: each word separated by a space, words in a bracket are not separated
172
173
		// the user wants to search for any word, convert the search query
174
		if ($terms == 'any')
175
		{
176
			$words = array();
177
178
			preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
179
			if (sizeof($words[1]))
180
			{
181
				$keywords = '(' . implode('|', $words[1]) . ')';
182
			}
183
		}
184
185
		// set the search_query which is shown to the user
186
		$this->search_query = $keywords;
187
188
		$exact_words = array();
189
		preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
190
		$exact_words = $exact_words[1];
191
192
		$common_ids = $words = array();
193
194
		if (sizeof($exact_words))
195
		{
196
			$sql = 'SELECT word_id, word_text, word_common
197
				FROM ' . SEARCH_WORDLIST_TABLE . '
198
				WHERE ' . $db->sql_in_set('word_text', $exact_words);
199
			$result = $db->sql_query($sql);
200
201
			// store an array of words and ids, remove common words
202
			while ($row = $db->sql_fetchrow($result))
203
			{
204
				if ($row['word_common'])
205
				{
206
					$this->common_words[] = $row['word_text'];
207
					$common_ids[$row['word_text']] = (int) $row['word_id'];
208
					continue;
209
				}
210
211
				$words[$row['word_text']] = (int) $row['word_id'];
212
			}
213
			$db->sql_freeresult($result);
214
		}
215
		unset($exact_words);
216
217
		// now analyse the search query, first split it using the spaces
218
		$query = explode(' ', $keywords);
219
220
		$this->must_contain_ids = array();
221
		$this->must_not_contain_ids = array();
222
		$this->must_exclude_one_ids = array();
223
224
		$mode = '';
225
		$ignore_no_id = true;
226
227
		foreach ($query as $word)
228
		{
229
			if (empty($word))
230
			{
231
				continue;
232
			}
233
234
			// words which should not be included
235
			if ($word[0] == '-')
236
			{
237
				$word = substr($word, 1);
238
239
				// a group of which at least one may not be in the resulting posts
240
				if ($word[0] == '(')
241
				{
242
					$word = array_unique(explode('|', substr($word, 1, -1)));
243
					$mode = 'must_exclude_one';
244
				}
245
				// one word which should not be in the resulting posts
246
				else
247
				{
248
					$mode = 'must_not_contain';
249
				}
250
				$ignore_no_id = true;
251
			}
252
			// words which have to be included
253
			else
254
			{
255
				// no prefix is the same as a +prefix
256
				if ($word[0] == '+')
257
				{
258
					$word = substr($word, 1);
259
				}
260
261
				// a group of words of which at least one word should be in every resulting post
262
				if ($word[0] == '(')
263
				{
264
					$word = array_unique(explode('|', substr($word, 1, -1)));
265
				}
266
				$ignore_no_id = false;
267
				$mode = 'must_contain';
268
			}
269
270
			if (empty($word))
271
			{
272
				continue;
273
			}
274
275
			// if this is an array of words then retrieve an id for each
276
			if (is_array($word))
277
			{
278
				$non_common_words = array();
279
				$id_words = array();
280
				foreach ($word as $i => $word_part)
281
				{
282
					if (strpos($word_part, '*') !== false)
283
					{
284
						$id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
285
						$non_common_words[] = $word_part;
286
					}
287
					else if (isset($words[$word_part]))
288
					{
289
						$id_words[] = $words[$word_part];
290
						$non_common_words[] = $word_part;
291
					}
292
					else
293
					{
294
						$len = utf8_strlen($word_part);
295
						if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
296
						{
297
							$this->common_words[] = $word_part;
298
						}
299
					}
300
				}
301
				if (sizeof($id_words))
302
				{
303
					sort($id_words);
304
					if (sizeof($id_words) > 1)
305
					{
306
						$this->{$mode . '_ids'}[] = $id_words;
307
					}
308
					else
309
					{
310
						$mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
311
						$this->{$mode . '_ids'}[] = $id_words[0];
312
					}
313
				}
314
				// throw an error if we shall not ignore unexistant words
315
				else if (!$ignore_no_id && sizeof($non_common_words))
316
				{
317
					trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $non_common_words)));
318
				}
319
				unset($non_common_words);
320
			}
321
			// else we only need one id
322
			else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
323
			{
324
				if ($wildcard)
325
				{
326
					$len = utf8_strlen(str_replace('*', '', $word));
327
					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
328
					{
329
						$this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\'';
330
					}
331
					else
332
					{
333
						$this->common_words[] = $word;
334
					}
335
				}
336
				else
337
				{
338
					$this->{$mode . '_ids'}[] = $words[$word];
339
				}
340
			}
341
			// throw an error if we shall not ignore unexistant words
342
			else if (!$ignore_no_id)
343
			{
344
				if (!isset($common_ids[$word]))
345
				{
346
					$len = utf8_strlen($word);
347
					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
348
					{
349
						trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word));
350
					}
351
					else
352
					{
353
						$this->common_words[] = $word;
354
					}
355
				}
356
			}
357
			else
358
			{
359
				$len = utf8_strlen($word);
360
				if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
361
				{
362
					$this->common_words[] = $word;
363
				}
364
			}
365
		}
366
367
		// we can't search for negatives only
368
		if (!sizeof($this->must_contain_ids))
369
		{
370
			return false;
371
		}
372
373
		sort($this->must_contain_ids);
374
		sort($this->must_not_contain_ids);
375
		sort($this->must_exclude_one_ids);
376
377
		if (!empty($this->search_query))
378
		{
379
			return true;
380
		}
381
		return false;
382
	}
383
384
	/**
385
	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
386
	*
387
	* @param	string		$type				contains either posts or topics depending on what should be searched for
388
	* @param	string		&$fields			contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
389
	* @param	string		&$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
390
	* @param	array		&$sort_by_sql		contains SQL code for the ORDER BY part of a query
391
	* @param	string		&$sort_key			is the key of $sort_by_sql for the selected sorting
392
	* @param	string		&$sort_dir			is either a or d representing ASC and DESC
393
	* @param	string		&$sort_days			specifies the maximum amount of days a post may be old
394
	* @param	array		&$ex_fid_ary		specifies an array of forum ids which should not be searched
395
	* @param	array		&$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
396
	* @param	int			&$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
397
	* @param	array		&$author_ary		an array of author ids if the author should be ignored during the search the array is empty
398
	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
399
	* @param	int			$start				indicates the first index of the page
400
	* @param	int			$per_page			number of ids each page is supposed to contain
401
	* @return	boolean|int						total number of results
402
	*
403
	* @access	public
404
	*/
405
	function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
406
	{
407
		global $config, $db;
408
409
		// No keywords? No posts.
410
		if (empty($this->search_query))
411
		{
412
			return false;
413
		}
414
415
		// generate a search_key from all the options to identify the results
416
		$search_key = md5(implode('#', array(
417
			serialize($this->must_contain_ids),
418
			serialize($this->must_not_contain_ids),
419
			serialize($this->must_exclude_one_ids),
420
			$type,
421
			$fields,
422
			$terms,
423
			$sort_days,
424
			$sort_key,
425
			$topic_id,
426
			implode(',', $ex_fid_ary),
427
			implode(',', $m_approve_fid_ary),
428
			implode(',', $author_ary)
429
		)));
430
431
		// try reading the results from cache
432
		$total_results = 0;
433
		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
434
		{
435
			return $total_results;
436
		}
437
438
		$id_ary = array();
439
440
		$sql_where = array();
441
		$group_by = false;
442
		$m_num = 0;
443
		$w_num = 0;
444
445
		$sql_array = array(
446
			'SELECT'	=> ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
447
			'FROM'		=> array(
448
				SEARCH_WORDMATCH_TABLE	=> array(),
449
				SEARCH_WORDLIST_TABLE	=> array(),
450
				POSTS_TABLE				=> 'p'
451
			),
452
			'LEFT_JOIN'	=> array()
453
		);
454
		$sql_where[] = 'm0.post_id = p.post_id';
455
456
		$title_match = '';
457
		$group_by = true;
458
		// Build some display specific sql strings
459
		switch ($fields)
460
		{
461
			case 'titleonly':
462
				$title_match = 'title_match = 1';
463
				$group_by = false;
464
			// no break
465
			case 'firstpost':
466
				$sql_array['FROM'][TOPICS_TABLE] = 't';
467
				$sql_where[] = 'p.post_id = t.topic_first_post_id';
468
			break;
469
470
			case 'msgonly':
471
				$title_match = 'title_match = 0';
472
				$group_by = false;
473
			break;
474
		}
475
476
		if ($type == 'topics')
477
		{
478
			if (!isset($sql_array['FROM'][TOPICS_TABLE]))
479
			{
480
				$sql_array['FROM'][TOPICS_TABLE] = 't';
481
				$sql_where[] = 'p.topic_id = t.topic_id';
482
			}
483
			$group_by = true;
484
		}
485
486
		/**
487
		* @todo Add a query optimizer (handle stuff like "+(4|3) +4")
488
		*/
489
490
		foreach ($this->must_contain_ids as $subquery)
491
		{
492
			if (is_array($subquery))
493
			{
494
				$group_by = true;
495
496
				$word_id_sql = array();
497
				$word_ids = array();
498
				foreach ($subquery as $id)
499
				{
500
					if (is_string($id))
501
					{
502
						$sql_array['LEFT_JOIN'][] = array(
503
							'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
504
							'ON'	=> "w$w_num.word_text LIKE $id"
505
						);
506
						$word_ids[] = "w$w_num.word_id";
507
508
						$w_num++;
509
					}
510
					else
511
					{
512
						$word_ids[] = $id;
513
					}
514
				}
515
516
				$sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids);
517
518
				unset($word_id_sql);
519
				unset($word_ids);
520
			}
521
			else if (is_string($subquery))
522
			{
523
				$sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
524
525
				$sql_where[] = "w$w_num.word_text LIKE $subquery";
526
				$sql_where[] = "m$m_num.word_id = w$w_num.word_id";
527
528
				$group_by = true;
529
				$w_num++;
530
			}
531
			else
532
			{
533
				$sql_where[] = "m$m_num.word_id = $subquery";
534
			}
535
536
			$sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
537
538
			if ($title_match)
539
			{
540
				$sql_where[] = "m$m_num.$title_match";
541
			}
542
543
			if ($m_num != 0)
544
			{
545
				$sql_where[] = "m$m_num.post_id = m0.post_id";
546
			}
547
			$m_num++;
548
		}
549
550
		foreach ($this->must_not_contain_ids as $key => $subquery)
551
		{
552
			if (is_string($subquery))
553
			{
554
				$sql_array['LEFT_JOIN'][] = array(
555
					'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
556
					'ON'	=> "w$w_num.word_text LIKE $subquery"
557
				);
558
559
				$this->must_not_contain_ids[$key] = "w$w_num.word_id";
560
561
				$group_by = true;
562
				$w_num++;
563
			}
564
		}
565
566
		if (sizeof($this->must_not_contain_ids))
567
		{
568
			$sql_array['LEFT_JOIN'][] = array(
569
				'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
570
				'ON'	=> $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
571
			);
572
573
			$sql_where[] = "m$m_num.word_id IS NULL";
574
			$m_num++;
575
		}
576
577
		foreach ($this->must_exclude_one_ids as $ids)
578
		{
579
			$is_null_joins = array();
580
			foreach ($ids as $id)
581
			{
582
				if (is_string($id))
583
				{
584
					$sql_array['LEFT_JOIN'][] = array(
585
						'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
586
						'ON'	=> "w$w_num.word_text LIKE $id"
587
					);
588
					$id = "w$w_num.word_id";
589
590
					$group_by = true;
591
					$w_num++;
592
				}
593
594
				$sql_array['LEFT_JOIN'][] = array(
595
					'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
596
					'ON'	=> "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
597
				);
598
				$is_null_joins[] = "m$m_num.word_id IS NULL";
599
600
				$m_num++;
601
			}
602
			$sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
603
		}
604
605
		if (!sizeof($m_approve_fid_ary))
606
		{
607
			$sql_where[] = 'p.post_approved = 1';
608
		}
609
		else if ($m_approve_fid_ary !== array(-1))
610
		{
611
			$sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
612
		}
613
614
		if ($topic_id)
615
		{
616
			$sql_where[] = 'p.topic_id = ' . $topic_id;
617
		}
618
619
		if (sizeof($author_ary))
620
		{
621
			$sql_where[] = $db->sql_in_set('p.poster_id', $author_ary);
622
		}
623
624
		if (sizeof($ex_fid_ary))
625
		{
626
			$sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true);
627
		}
628
629
		if ($sort_days)
630
		{
631
			$sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
632
		}
633
634
		$sql_array['WHERE'] = implode(' AND ', $sql_where);
635
636
		$is_mysql = false;
637
		// if the total result count is not cached yet, retrieve it from the db
638
		if (!$total_results)
639
		{
640
			$sql = '';
641
			$sql_array_count = $sql_array;
642
643
			switch ($db->sql_layer)
644
			{
645
				case 'mysql4':
646
				case 'mysqli':
647
648
					// 3.x does not support SQL_CALC_FOUND_ROWS
649
					$sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
650
					$is_mysql = true;
651
652
				break;
653
654
				case 'sqlite':
655
					$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
656
					$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
657
							FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')';
658
659
				// no break
660
661
				default:
662
					$sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
663
					$sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql;
664
665
					$result = $db->sql_query($sql);
666
					$total_results = (int) $db->sql_fetchfield('total_results');
667
					$db->sql_freeresult($result);
668
669
					if (!$total_results)
670
					{
671
						return false;
672
					}
673
				break;
674
			}
675
676
			unset($sql_array_count, $sql);
677
		}
678
679
		// Build sql strings for sorting
680
		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
681
682
		switch ($sql_sort[0])
683
		{
684
			case 'u':
685
				$sql_array['FROM'][USERS_TABLE] = 'u';
686
				$sql_where[] = 'u.user_id = p.poster_id ';
687
			break;
688
689
			case 't':
690
				if (!isset($sql_array['FROM'][TOPICS_TABLE]))
691
				{
692
					$sql_array['FROM'][TOPICS_TABLE] = 't';
693
					$sql_where[] = 'p.topic_id = t.topic_id';
694
				}
695
			break;
696
697
			case 'f':
698
				$sql_array['FROM'][FORUMS_TABLE] = 'f';
699
				$sql_where[] = 'f.forum_id = p.forum_id';
700
			break;
701
		}
702
703
		$sql_array['WHERE'] = implode(' AND ', $sql_where);
704
		$sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
705
		$sql_array['ORDER_BY'] = $sql_sort;
706
707
		unset($sql_where, $sql_sort, $group_by);
708
709
		$sql = $db->sql_build_query('SELECT', $sql_array);
710
		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
711
712
		while ($row = $db->sql_fetchrow($result))
713
		{
714
			$id_ary[] = $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
715
		}
716
		$db->sql_freeresult($result);
717
718
		if (!sizeof($id_ary))
719
		{
720
			return false;
721
		}
722
723
		// if we use mysql and the total result count is not cached yet, retrieve it from the db
724
		if (!$total_results && $is_mysql)
725
		{
726
			$sql = 'SELECT FOUND_ROWS() as total_results';
727
			$result = $db->sql_query($sql);
728
			$total_results = (int) $db->sql_fetchfield('total_results');
729
			$db->sql_freeresult($result);
730
731
			if (!$total_results)
732
			{
733
				return false;
734
			}
735
		}
736
737
		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
738
		$this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
739
		$id_ary = array_slice($id_ary, 0, (int) $per_page);
740
741
		return $total_results;
742
	}
743
744
	/**
745
	* Performs a search on an author's posts without caring about message contents. Depends on display specific params
746
	*
747
	* @param	string		$type				contains either posts or topics depending on what should be searched for
748
	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered
749
	* @param	array		&$sort_by_sql		contains SQL code for the ORDER BY part of a query
750
	* @param	string		&$sort_key			is the key of $sort_by_sql for the selected sorting
751
	* @param	string		&$sort_dir			is either a or d representing ASC and DESC
752
	* @param	string		&$sort_days			specifies the maximum amount of days a post may be old
753
	* @param	array		&$ex_fid_ary		specifies an array of forum ids which should not be searched
754
	* @param	array		&$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
755
	* @param	int			&$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
756
	* @param	array		&$author_ary		an array of author ids
757
	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
758
	* @param	int			$start				indicates the first index of the page
759
	* @param	int			$per_page			number of ids each page is supposed to contain
760
	* @return	boolean|int						total number of results
761
	*
762
	* @access	public
763
	*/
764
	function author_search($type, $firstpost_only, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
765
	{
766
		global $config, $db;
767
768
		// No author? No posts.
769
		if (!sizeof($author_ary))
770
		{
771
			return 0;
772
		}
773
774
		// generate a search_key from all the options to identify the results
775
		$search_key = md5(implode('#', array(
776
			'',
777
			$type,
778
			($firstpost_only) ? 'firstpost' : '',
779
			'',
780
			'',
781
			$sort_days,
782
			$sort_key,
783
			$topic_id,
784
			implode(',', $ex_fid_ary),
785
			implode(',', $m_approve_fid_ary),
786
			implode(',', $author_ary)
787
		)));
788
789
		// try reading the results from cache
790
		$total_results = 0;
791
		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
792
		{
793
			return $total_results;
794
		}
795
796
		$id_ary = array();
797
798
		// Create some display specific sql strings
799
		$sql_author		= $db->sql_in_set('p.poster_id', $author_ary);
800
		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
801
		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
802
		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
803
		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
804
805
		// Build sql strings for sorting
806
		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
807
		$sql_sort_table = $sql_sort_join = '';
808
		switch ($sql_sort[0])
809
		{
810
			case 'u':
811
				$sql_sort_table	= USERS_TABLE . ' u, ';
812
				$sql_sort_join	= ' AND u.user_id = p.poster_id ';
813
			break;
814
815
			case 't':
816
				$sql_sort_table	= ($type == 'posts') ? TOPICS_TABLE . ' t, ' : '';
817
				$sql_sort_join	= ($type == 'posts') ? ' AND t.topic_id = p.topic_id ' : '';
818
			break;
819
820
			case 'f':
821
				$sql_sort_table	= FORUMS_TABLE . ' f, ';
822
				$sql_sort_join	= ' AND f.forum_id = p.forum_id ';
823
			break;
824
		}
825
826
		if (!sizeof($m_approve_fid_ary))
827
		{
828
			$m_approve_fid_sql = ' AND p.post_approved = 1';
829
		}
830
		else if ($m_approve_fid_ary == array(-1))
831
		{
832
			$m_approve_fid_sql = '';
833
		}
834
		else
835
		{
836
			$m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
837
		}
838
839
		$select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
840
		$is_mysql = false;
841
842
		// If the cache was completely empty count the results
843
		if (!$total_results)
844
		{
845
			switch ($db->sql_layer)
846
			{
847
				case 'mysql4':
848
				case 'mysqli':
849
					$select = 'SQL_CALC_FOUND_ROWS ' . $select;
850
					$is_mysql = true;
851
				break;
852
853
				default:
854
					if ($type == 'posts')
855
					{
856
						$sql = 'SELECT COUNT(p.post_id) as total_results
857
							FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
858
							WHERE $sql_author
859
								$sql_topic_id
860
								$sql_firstpost
861
								$m_approve_fid_sql
862
								$sql_fora
863
								$sql_time";
864
					}
865
					else
866
					{
867
						if ($db->sql_layer == 'sqlite')
868
						{
869
							$sql = 'SELECT COUNT(topic_id) as total_results
870
								FROM (SELECT DISTINCT t.topic_id';
871
						}
872
						else
873
						{
874
							$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
875
						}
876
877
						$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
878
							WHERE $sql_author
879
								$sql_topic_id
880
								$sql_firstpost
881
								$m_approve_fid_sql
882
								$sql_fora
883
								AND t.topic_id = p.topic_id
884
								$sql_time" . (($db->sql_layer == 'sqlite') ? ')' : '');
885
					}
886
					$result = $db->sql_query($sql);
887
888
					$total_results = (int) $db->sql_fetchfield('total_results');
889
					$db->sql_freeresult($result);
890
891
					if (!$total_results)
892
					{
893
						return false;
894
					}
895
				break;
896
			}
897
		}
898
899
		// Build the query for really selecting the post_ids
900
		if ($type == 'posts')
901
		{
902
			$sql = "SELECT $select
903
				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($topic_id || $firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
904
				WHERE $sql_author
905
					$sql_topic_id
906
					$sql_firstpost
907
					$m_approve_fid_sql
908
					$sql_fora
909
					$sql_sort_join
910
					$sql_time
911
				ORDER BY $sql_sort";
912
			$field = 'post_id';
913
		}
914
		else
915
		{
916
			$sql = "SELECT $select
917
				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
918
				WHERE $sql_author
919
					$sql_topic_id
920
					$sql_firstpost
921
					$m_approve_fid_sql
922
					$sql_fora
923
					AND t.topic_id = p.topic_id
924
					$sql_sort_join
925
					$sql_time
926
				GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
927
				ORDER BY ' . $sql_sort;
928
			$field = 'topic_id';
929
		}
930
931
		// Only read one block of posts from the db and then cache it
932
		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
933
934
		while ($row = $db->sql_fetchrow($result))
935
		{
936
			$id_ary[] = $row[$field];
937
		}
938
		$db->sql_freeresult($result);
939
940
		if (!$total_results && $is_mysql)
941
		{
942
			$sql = 'SELECT FOUND_ROWS() as total_results';
943
			$result = $db->sql_query($sql);
944
			$total_results = (int) $db->sql_fetchfield('total_results');
945
			$db->sql_freeresult($result);
946
947
			if (!$total_results)
948
			{
949
				return false;
950
			}
951
		}
952
953
		if (sizeof($id_ary))
954
		{
955
			$this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
956
			$id_ary = array_slice($id_ary, 0, $per_page);
957
958
			return $total_results;
959
		}
960
		return false;
961
	}
962
963
	/**
964
	* Split a text into words of a given length
965
	*
966
	* The text is converted to UTF-8, cleaned up, and split. Then, words that
967
	* conform to the defined length range are returned in an array.
968
	*
969
	* NOTE: duplicates are NOT removed from the return array
970
	*
971
	* @param	string	$text	Text to split, encoded in UTF-8
972
	* @return	array			Array of UTF-8 words
973
	*
974
	* @access	private
975
	*/
976
	function split_message($text)
977
	{
978
		global $phpbb_root_path, $phpEx, $user;
979
980
		$match = $words = array();
981
982
		/**
983
		* Taken from the original code
984
		*/
985
		// Do not index code
986
		$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
987
		// BBcode
988
		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
989
990
		$min = $this->word_length['min'];
991
		$max = $this->word_length['max'];
992
993
		$isset_min = $min - 1;
994
995
		/**
996
		* Clean up the string, remove HTML tags, remove BBCodes
997
		*/
998
		$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
999
1000
		while (strlen($word))
1001
		{
1002
			if (strlen($word) > 255 || strlen($word) <= $isset_min)
1003
			{
1004
				/**
1005
				* Words longer than 255 bytes are ignored. This will have to be
1006
				* changed whenever we change the length of search_wordlist.word_text
1007
				*
1008
				* Words shorter than $isset_min bytes are ignored, too
1009
				*/
1010
				$word = strtok(' ');
1011
				continue;
1012
			}
1013
1014
			$len = utf8_strlen($word);
1015
1016
			/**
1017
			* Test whether the word is too short to be indexed.
1018
			*
1019
			* Note that this limit does NOT apply to CJK and Hangul
1020
			*/
1021
			if ($len < $min)
1022
			{
1023
				/**
1024
				* Note: this could be optimized. If the codepoint is lower than Hangul's range
1025
				* we know that it will also be lower than CJK ranges
1026
				*/
1027
				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
1028
				 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
1029
				 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
1030
				{
1031
					$word = strtok(' ');
1032
					continue;
1033
				}
1034
			}
1035
1036
			$words[] = $word;
1037
			$word = strtok(' ');
1038
		}
1039
1040
		return $words;
1041
	}
1042
1043
	/**
1044
	* Updates wordlist and wordmatch tables when a message is posted or changed
1045
	*
1046
	* @param	string	$mode		Contains the post mode: edit, post, reply, quote
1047
	* @param	int		$post_id	The id of the post which is modified/created
1048
	* @param	string	&$message	New or updated post content
1049
	* @param	string	&$subject	New or updated post subject
1050
	* @param	int		$poster_id	Post author's user id
1051
	* @param	int		$forum_id	The id of the forum in which the post is located
1052
	*
1053
	* @access	public
1054
	*/
1055
	function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
1056
	{
1057
		global $config, $db, $user;
1058
1059
		if (!$config['fulltext_native_load_upd'])
1060
		{
1061
			/**
1062
			* The search indexer is disabled, return
1063
			*/
1064
			return;
1065
		}
1066
1067
		// Split old and new post/subject to obtain array of 'words'
1068
		$split_text = $this->split_message($message);
1069
		$split_title = $this->split_message($subject);
1070
1071
		$cur_words = array('post' => array(), 'title' => array());
1072
1073
		$words = array();
1074
		if ($mode == 'edit')
1075
		{
1076
			$words['add']['post'] = array();
1077
			$words['add']['title'] = array();
1078
			$words['del']['post'] = array();
1079
			$words['del']['title'] = array();
1080
1081
			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1082
				FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
1083
				WHERE m.post_id = $post_id
1084
					AND w.word_id = m.word_id";
1085
			$result = $db->sql_query($sql);
1086
1087
			while ($row = $db->sql_fetchrow($result))
1088
			{
1089
				$which = ($row['title_match']) ? 'title' : 'post';
1090
				$cur_words[$which][$row['word_text']] = $row['word_id'];
1091
			}
1092
			$db->sql_freeresult($result);
1093
1094
			$words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
1095
			$words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
1096
			$words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
1097
			$words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
1098
		}
1099
		else
1100
		{
1101
			$words['add']['post'] = $split_text;
1102
			$words['add']['title'] = $split_title;
1103
			$words['del']['post'] = array();
1104
			$words['del']['title'] = array();
1105
		}
1106
		unset($split_text);
1107
		unset($split_title);
1108
1109
		// Get unique words from the above arrays
1110
		$unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
1111
		
1112
		// We now have unique arrays of all words to be added and removed and
1113
		// individual arrays of added and removed words for text and title. What
1114
		// we need to do now is add the new words (if they don't already exist)
1115
		// and then add (or remove) matches between the words and this post
1116
		if (sizeof($unique_add_words))
1117
		{
1118
			$sql = 'SELECT word_id, word_text
1119
				FROM ' . SEARCH_WORDLIST_TABLE . '
1120
				WHERE ' . $db->sql_in_set('word_text', $unique_add_words);
1121
			$result = $db->sql_query($sql);
1122
1123
			$word_ids = array();
1124
			while ($row = $db->sql_fetchrow($result))
1125
			{
1126
				$word_ids[$row['word_text']] = $row['word_id'];
1127
			}
1128
			$db->sql_freeresult($result);
1129
			$new_words = array_diff($unique_add_words, array_keys($word_ids));
1130
1131
			$db->sql_transaction('begin');
1132
			if (sizeof($new_words))
1133
			{
1134
				$sql_ary = array();
1135
1136
				foreach ($new_words as $word)
1137
				{
1138
					$sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
1139
				}
1140
				$db->sql_return_on_error(true);
1141
				$db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
1142
				$db->sql_return_on_error(false);
1143
			}
1144
			unset($new_words, $sql_ary);
1145
		}
1146
		else
1147
		{
1148
			$db->sql_transaction('begin');
1149
		}
1150
1151
		// now update the search match table, remove links to removed words and add links to new words
1152
		foreach ($words['del'] as $word_in => $word_ary)
1153
		{
1154
			$title_match = ($word_in == 'title') ? 1 : 0;
1155
1156
			if (sizeof($word_ary))
1157
			{
1158
				$sql_in = array();
1159
				foreach ($word_ary as $word)
1160
				{
1161
					$sql_in[] = $cur_words[$word_in][$word];
1162
				}
1163
1164
				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1165
					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1166
						AND post_id = ' . intval($post_id) . "
1167
						AND title_match = $title_match";
1168
				$db->sql_query($sql);
1169
1170
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1171
					SET word_count = word_count - 1
1172
					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1173
						AND word_count > 0';
1174
				$db->sql_query($sql);
1175
1176
				unset($sql_in);
1177
			}
1178
		}
1179
1180
		$db->sql_return_on_error(true);
1181
		foreach ($words['add'] as $word_in => $word_ary)
1182
		{
1183
			$title_match = ($word_in == 'title') ? 1 : 0;
1184
1185
			if (sizeof($word_ary))
1186
			{
1187
				$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
1188
					SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
1189
					FROM ' . SEARCH_WORDLIST_TABLE . '
1190
					WHERE ' . $db->sql_in_set('word_text', $word_ary);
1191
				$db->sql_query($sql);
1192
1193
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1194
					SET word_count = word_count + 1
1195
					WHERE ' . $db->sql_in_set('word_text', $word_ary);
1196
				$db->sql_query($sql);
1197
			}
1198
		}
1199
		$db->sql_return_on_error(false);
1200
1201
		$db->sql_transaction('commit');
1202
1203
		// destroy cached search results containing any of the words removed or added
1204
		$this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
1205
1206
		unset($unique_add_words);
1207
		unset($words);
1208
		unset($cur_words);
1209
	}
1210
1211
	/**
1212
	* Removes entries from the wordmatch table for the specified post_ids
1213
	*/
1214
	function index_remove($post_ids, $author_ids, $forum_ids)
1215
	{
1216
		global $db;
1217
1218
		if (sizeof($post_ids))
1219
		{
1220
			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1221
				FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
1222
				WHERE ' . $db->sql_in_set('m.post_id', $post_ids) . '
1223
					AND w.word_id = m.word_id';
1224
			$result = $db->sql_query($sql);
1225
1226
			$message_word_ids = $title_word_ids = $word_texts = array();
1227
			while ($row = $db->sql_fetchrow($result))
1228
			{
1229
				if ($row['title_match'])
1230
				{
1231
					$title_word_ids[] = $row['word_id'];
1232
				}
1233
				else
1234
				{
1235
					$message_word_ids[] = $row['word_id'];
1236
				}
1237
				$word_texts[] = $row['word_text'];
1238
			}
1239
			$db->sql_freeresult($result);
1240
1241
			if (sizeof($title_word_ids))
1242
			{
1243
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1244
					SET word_count = word_count - 1
1245
					WHERE ' . $db->sql_in_set('word_id', $title_word_ids) . '
1246
						AND word_count > 0';
1247
				$db->sql_query($sql);
1248
			}
1249
1250
			if (sizeof($message_word_ids))
1251
			{
1252
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1253
					SET word_count = word_count - 1
1254
					WHERE ' . $db->sql_in_set('word_id', $message_word_ids) . '
1255
						AND word_count > 0';
1256
				$db->sql_query($sql);
1257
			}
1258
1259
			unset($title_word_ids);
1260
			unset($message_word_ids);
1261
1262
			$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1263
				WHERE ' . $db->sql_in_set('post_id', $post_ids);
1264
			$db->sql_query($sql);
1265
		}
1266
1267
		$this->destroy_cache(array_unique($word_texts), $author_ids);
1268
	}
1269
1270
	/**
1271
	* Tidy up indexes: Tag 'common words' and remove
1272
	* words no longer referenced in the match table
1273
	*/
1274
	function tidy()
1275
	{
1276
		global $db, $config;
1277
1278
		// Is the fulltext indexer disabled? If yes then we need not
1279
		// carry on ... it's okay ... I know when I'm not wanted boo hoo
1280
		if (!$config['fulltext_native_load_upd'])
1281
		{
1282
			set_config('search_last_gc', time(), true);
1283
			return;
1284
		}
1285
1286
		$destroy_cache_words = array();
1287
1288
		// Remove common words
1289
		if ($config['num_posts'] >= 100 && $config['fulltext_native_common_thres'])
1290
		{
1291
			$common_threshold = ((double) $config['fulltext_native_common_thres']) / 100.0;
1292
			// First, get the IDs of common words
1293
			$sql = 'SELECT word_id, word_text
1294
				FROM ' . SEARCH_WORDLIST_TABLE . '
1295
				WHERE word_count > ' . floor($config['num_posts'] * $common_threshold) . '
1296
					OR word_common = 1';
1297
			$result = $db->sql_query($sql);
1298
1299
			$sql_in = array();
1300
			while ($row = $db->sql_fetchrow($result))
1301
			{
1302
				$sql_in[] = $row['word_id'];
1303
				$destroy_cache_words[] = $row['word_text'];
1304
			}
1305
			$db->sql_freeresult($result);
1306
1307
			if (sizeof($sql_in))
1308
			{
1309
				// Flag the words
1310
				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1311
					SET word_common = 1
1312
					WHERE ' . $db->sql_in_set('word_id', $sql_in);
1313
				$db->sql_query($sql);
1314
1315
				// by setting search_last_gc to the new time here we make sure that if a user reloads because the
1316
				// following query takes too long, he won't run into it again
1317
				set_config('search_last_gc', time(), true);
1318
1319
				// Delete the matches
1320
				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1321
					WHERE ' . $db->sql_in_set('word_id', $sql_in);
1322
				$db->sql_query($sql);
1323
			}
1324
			unset($sql_in);
1325
		}
1326
1327
		if (sizeof($destroy_cache_words))
1328
		{
1329
			// destroy cached search results containing any of the words that are now common or were removed
1330
			$this->destroy_cache(array_unique($destroy_cache_words));
1331
		}
1332
1333
		set_config('search_last_gc', time(), true);
1334
	}
1335
1336
	/**
1337
	* Deletes all words from the index
1338
	*/
1339
	function delete_index($acp_module, $u_action)
1340
	{
1341
		global $db;
1342
1343
		switch ($db->sql_layer)
1344
		{
1345
			case 'sqlite':
1346
			case 'firebird':
1347
				$db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
1348
				$db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);
1349
				$db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE);
1350
			break;
1351
1352
			default:
1353
				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE);
1354
				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE);
1355
				$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
1356
			break;
1357
		}
1358
	}
1359
1360
	/**
1361
	* Returns true if both FULLTEXT indexes exist
1362
	*/
1363
	function index_created()
1364
	{
1365
		if (!sizeof($this->stats))
1366
		{
1367
			$this->get_stats();
1368
		}
1369
1370
		return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
1371
	}
1372
1373
	/**
1374
	* Returns an associative array containing information about the indexes
1375
	*/
1376
	function index_stats()
1377
	{
1378
		global $user;
1379
1380
		if (!sizeof($this->stats))
1381
		{
1382
			$this->get_stats();
1383
		}
1384
1385
		return array(
1386
			$user->lang['TOTAL_WORDS']		=> $this->stats['total_words'],
1387
			$user->lang['TOTAL_MATCHES']	=> $this->stats['total_matches']);
1388
	}
1389
1390
	function get_stats()
1391
	{
1392
		global $db;
1393
1394
		$sql = 'SELECT COUNT(*) as total_words
1395
			FROM ' . SEARCH_WORDLIST_TABLE;
1396
		$result = $db->sql_query($sql);
1397
		$this->stats['total_words'] = (int) $db->sql_fetchfield('total_words');
1398
		$db->sql_freeresult($result);
1399
1400
		$sql = 'SELECT COUNT(*) as total_matches
1401
			FROM ' . SEARCH_WORDMATCH_TABLE;
1402
		$result = $db->sql_query($sql);
1403
		$this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches');
1404
		$db->sql_freeresult($result);
1405
	}
1406
1407
	/**
1408
	* Clean up a text to remove non-alphanumeric characters
1409
	*
1410
	* This method receives a UTF-8 string, normalizes and validates it, replaces all
1411
	* non-alphanumeric characters with strings then returns the result.
1412
	*
1413
	* Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
1414
	*
1415
	* @param	string	$text			Text to split, in UTF-8 (not normalized or sanitized)
1416
	* @param	string	$allowed_chars	String of special chars to allow
1417
	* @param	string	$encoding		Text encoding
1418
	* @return	string					Cleaned up text, only alphanumeric chars are left
1419
	*
1420
	* @todo normalizer::cleanup being able to be used?
1421
	*/
1422
	function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
1423
	{
1424
		global $phpbb_root_path, $phpEx;
1425
		static $conv = array(), $conv_loaded = array();
1426
		$words = $allow = array();
1427
1428
		// Convert the text to UTF-8
1429
		$encoding = strtolower($encoding);
1430
		if ($encoding != 'utf-8')
1431
		{
1432
			$text = utf8_recode($text, $encoding);
1433
		}
1434
1435
		$utf_len_mask = array(
1436
			"\xC0"	=>	2,
1437
			"\xD0"	=>	2,
1438
			"\xE0"	=>	3,
1439
			"\xF0"	=>	4
1440
		);
1441
1442
		/**
1443
		* Replace HTML entities and NCRs
1444
		*/
1445
		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
1446
1447
		/**
1448
		* Load the UTF-8 normalizer
1449
		*
1450
		* If we use it more widely, an instance of that class should be held in a
1451
		* a global variable instead
1452
		*/
1453
		utf_normalizer::nfc($text);
1454
1455
		/**
1456
		* The first thing we do is:
1457
		*
1458
		* - convert ASCII-7 letters to lowercase
1459
		* - remove the ASCII-7 non-alpha characters
1460
		* - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
1461
		*   0xC1 and 0xF5-0xFF
1462
		*
1463
		* @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
1464
		*/
1465
		$sb_match	= "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
1466
		$sb_replace	= 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
1467
1468
		/**
1469
		* This is the list of legal ASCII chars, it is automatically extended
1470
		* with ASCII chars from $allowed_chars
1471
		*/
1472
		$legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
1473
1474
		/**
1475
		* Prepare an array containing the extra chars to allow
1476
		*/
1477
		if (isset($allowed_chars[0]))
1478
		{
1479
			$pos = 0;
1480
			$len = strlen($allowed_chars);
1481
			do
1482
			{
1483
				$c = $allowed_chars[$pos];
1484
1485
				if ($c < "\x80")
1486
				{
1487
					/**
1488
					* ASCII char
1489
					*/
1490
					$sb_pos = strpos($sb_match, $c);
1491
					if (is_int($sb_pos))
1492
					{
1493
						/**
1494
						* Remove the char from $sb_match and its corresponding
1495
						* replacement in $sb_replace
1496
						*/
1497
						$sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
1498
						$sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
1499
						$legal_ascii .= $c;
1500
					}
1501
1502
					++$pos;
1503
				}
1504
				else
1505
				{
1506
					/**
1507
					* UTF-8 char
1508
					*/
1509
					$utf_len = $utf_len_mask[$c & "\xF0"];
1510
					$allow[substr($allowed_chars, $pos, $utf_len)] = 1;
1511
					$pos += $utf_len;
1512
				}
1513
			}
1514
			while ($pos < $len);
1515
		}
1516
1517
		$text = strtr($text, $sb_match, $sb_replace);
1518
		$ret = '';
1519
1520
		$pos = 0;
1521
		$len = strlen($text);
1522
1523
		do
1524
		{
1525
			/**
1526
			* Do all consecutive ASCII chars at once
1527
			*/
1528
			if ($spn = strspn($text, $legal_ascii, $pos))
1529
			{
1530
				$ret .= substr($text, $pos, $spn);
1531
				$pos += $spn;
1532
			}
1533
1534
			if ($pos >= $len)
1535
			{
1536
				return $ret;
1537
			}
1538
1539
			/**
1540
			* Capture the UTF char
1541
			*/
1542
			$utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
1543
			$utf_char = substr($text, $pos, $utf_len);
1544
			$pos += $utf_len;
1545
1546
			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
1547
			 || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
1548
			 || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
1549
			{
1550
				/**
1551
				* All characters within these ranges are valid
1552
				*
1553
				* We separate them with a space in order to index each character
1554
				* individually
1555
				*/
1556
				$ret .= ' ' . $utf_char . ' ';
1557
				continue;
1558
			}
1559
1560
			if (isset($allow[$utf_char]))
1561
			{
1562
				/**
1563
				* The char is explicitly allowed
1564
				*/
1565
				$ret .= $utf_char;
1566
				continue;
1567
			}
1568
1569
			if (isset($conv[$utf_char]))
1570
			{
1571
				/**
1572
				* The char is mapped to something, maybe to itself actually
1573
				*/
1574
				$ret .= $conv[$utf_char];
1575
				continue;
1576
			}
1577
1578
			/**
1579
			* The char isn't mapped, but did we load its conversion table?
1580
			*
1581
			* The search indexer table is split into blocks. The block number of
1582
			* each char is equal to its codepoint right-shifted for 11 bits. It
1583
			* means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
1584
			* 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
1585
			* all UTF chars encoded in 2 bytes are in the same first block.
1586
			*/
1587
			if (isset($utf_char[2]))
1588
			{
1589
				if (isset($utf_char[3]))
1590
				{
1591
					/**
1592
					* 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
1593
					* 0000 0111 0011 1111 0010 0000
1594
					*/
1595
					$idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
1596
				}
1597
				else
1598
				{
1599
					/**
1600
					* 1110 nnnn 10nx xxxx 10xx xxxx
1601
					* 0000 0111 0010 0000
1602
					*/
1603
					$idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
1604
				}
1605
			}
1606
			else
1607
			{
1608
				/**
1609
				* 110x xxxx 10xx xxxx
1610
				* 0000 0000 0000 0000
1611
				*/
1612
				$idx = 0;
1613
			}
1614
1615
			/**
1616
			* Check if the required conv table has been loaded already
1617
			*/
1618
			if (!isset($conv_loaded[$idx]))
1619
			{
1620
				$conv_loaded[$idx] = 1;
1621
				$file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
1622
1623
				if (file_exists($file))
1624
				{
1625
					$conv += include($file);
1626
				}
1627
			}
1628
1629
			if (isset($conv[$utf_char]))
1630
			{
1631
				$ret .= $conv[$utf_char];
1632
			}
1633
			else
1634
			{
1635
				/**
1636
				* We add an entry to the conversion table so that we
1637
				* don't have to convert to codepoint and perform the checks
1638
				* that are above this block
1639
				*/
1640
				$conv[$utf_char] = ' ';
1641
				$ret .= ' ';
1642
			}
1643
		}
1644
		while (1);
1645
1646
		return $ret;
1647
	}
1648
1649
	/**
1650
	* Returns a list of options for the ACP to display
1651
	*/
1652
	function acp()
1653
	{
1654
		global $user, $config;
1655
1656
1657
		/**
1658
		* if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
1659
		*/
1660
1661
		$tpl = '
1662
		<dl>
1663
			<dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt>
1664
			<dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</label></dd>
1665
		</dl>
1666
		<dl>
1667
			<dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1668
			<dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd>
1669
		</dl>
1670
		<dl>
1671
			<dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1672
			<dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd>
1673
		</dl>
1674
		<dl>
1675
			<dt><label for="fulltext_native_common_thres">' . $user->lang['COMMON_WORD_THRESHOLD'] . ':</label><br /><span>' . $user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>
1676
			<dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (int) $config['fulltext_native_common_thres'] . '" /> %</dd>
1677
		</dl>
1678
		';
1679
1680
		// These are fields required in the config table
1681
		return array(
1682
			'tpl'		=> $tpl,
1683
			'config'	=> array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100')
1684
		);
1685
	}
1686
}
1687
1688
?>