1
by brian
clean slate |
1 |
#!/usr/bin/perl -w
|
2 |
# Copyright Abandoned 1998 TCX DataKonsult AB & Monty Program KB & Detron HB
|
|
3 |
# This file is public domain and comes with NO WARRANTY of any kind
|
|
4 |
#
|
|
5 |
# This program is brought to you by Janne-Petteri Koilo with the
|
|
6 |
# administration of Michael Widenius.
|
|
7 |
#
|
|
8 |
# Rewritten with a lot of bug fixes by Jani Tolonen and Thimble Smith
|
|
9 |
# 15.12.2000
|
|
10 |
#
|
|
11 |
# This program takes your mails and puts them into your database. It ignores
|
|
12 |
# messages with the same from, date and message text.
|
|
13 |
# You can use mail-files that are compressed or gzipped and ends with
|
|
14 |
# -.gz or -.Z.
|
|
15 |
||
16 |
use DBI; |
|
17 |
use Getopt::Long; |
|
18 |
||
19 |
$| = 1; |
|
20 |
$VER = "3.0"; |
|
21 |
||
22 |
$opt_help = 0; |
|
23 |
$opt_version = 0; |
|
24 |
$opt_debug = 0; |
|
25 |
$opt_host = undef(); |
|
26 |
$opt_port = undef(); |
|
27 |
$opt_socket = undef(); |
|
28 |
$opt_db = "mail"; |
|
29 |
$opt_user = undef(); |
|
30 |
$opt_password = undef(); |
|
31 |
$opt_max_mail_size = 65536; |
|
32 |
$opt_create = 0; |
|
33 |
$opt_test = 0; |
|
34 |
$opt_no_path = 0; |
|
35 |
$opt_stop_on_error = 0; |
|
36 |
$opt_stdin = 0; |
|
37 |
||
38 |
my ($dbh, $progname, $mail_no_from_f, $mail_no_txt_f, $mail_too_big, |
|
39 |
$mail_forwarded, $mail_duplicates, $mail_no_subject_f, $mail_inserted); |
|
40 |
||
41 |
$mail_no_from_f = $mail_no_txt_f = $mail_too_big = $mail_forwarded = |
|
42 |
$mail_duplicates = $mail_no_subject_f = $mail_inserted = 0; |
|
43 |
$mail_fixed=0; |
|
44 |
||
45 |
#
|
|
46 |
# Remove the following message-ends from message
|
|
47 |
#
|
|
48 |
@remove_tail= ( |
|
49 |
"\n-*\nSend a mail to .*\n.*\n.*\$", |
|
50 |
"\n-*\nPlease check .*\n.*\n\nTo unsubscribe, .*\n.*\n.*\nIf you have a broken.*\n.*\n.*\$", |
|
51 |
"\n-*\nPlease check .*\n(.*\n){1,3}\nTo unsubscribe.*\n.*\n.*\$", |
|
52 |
"\n-*\nPlease check .*\n.*\n\nTo unsubscribe.*\n.*\$", |
|
53 |
"\n-*\nTo request this thread.*\nTo unsubscribe.*\n.*\.*\n.*\$", |
|
54 |
"\n -*\n.*Send a mail to.*\n.*\n.*unsubscribe.*\$", |
|
55 |
"\n-*\nTo request this thread.*\n\nTo unsubscribe.*\n.*\$"
|
|
56 |
);
|
|
57 |
||
58 |
# Generate regexp to remove tails where the unsubscribed is quoted
|
|
59 |
{
|
|
60 |
my (@tmp, $tail); |
|
61 |
@tmp=(); |
|
62 |
foreach $tail (@remove_tail) |
|
63 |
{
|
|
64 |
$tail =~ s/\n/\n[> ]*/g; |
|
65 |
push(@tmp, $tail); |
|
66 |
}
|
|
67 |
push @remove_tail,@tmp; |
|
68 |
}
|
|
69 |
||
70 |
my %months = ('Jan' => 1, 'Feb' => 2, 'Mar' => 3, 'Apr' => 4, 'May' => 5, |
|
71 |
'Jun' => 6, 'Jul' => 7, 'Aug' => 8, 'Sep' => 9, 'Oct' => 10, |
|
72 |
'Nov' => 11, 'Dec' => 12); |
|
73 |
||
74 |
$progname = $0; |
|
75 |
$progname =~ s/.*[\/]//; |
|
76 |
||
77 |
main(); |
|
78 |
||
79 |
####
|
|
80 |
#### main sub routine
|
|
81 |
####
|
|
82 |
||
83 |
sub main |
|
84 |
{
|
|
85 |
my ($connect_arg, @args, $ignored, @defops, $i); |
|
86 |
||
87 |
if (defined(my_which("my_print_defaults"))) |
|
88 |
{
|
|
89 |
@defops = `my_print_defaults mail_to_db`; |
|
90 |
chop @defops; |
|
91 |
splice @ARGV, 0, 0, @defops; |
|
92 |
}
|
|
93 |
else
|
|
94 |
{
|
|
95 |
print "WARNING: No command 'my_print_defaults' found; unable to read\n"; |
|
96 |
print "the my.cnf file. This command is available from the latest MySQL\n"; |
|
97 |
print "distribution.\n"; |
|
98 |
}
|
|
99 |
GetOptions("help","version","host=s","port=i","socket=s","db=s", |
|
100 |
"user=s","password=s","max_mail_size=i","create","test", |
|
101 |
"no_path","debug","stop_on_error","stdin") |
|
102 |
|| die "Wrong option! See $progname --help\n"; |
|
103 |
||
104 |
usage($VER) if ($opt_help || $opt_version || |
|
105 |
(!$ARGV[0] && !$opt_create && !$opt_stdin)); |
|
106 |
||
107 |
# Check that the given inbox files exist and are regular files
|
|
108 |
for ($i = 0; ! $opt_stdin && defined($ARGV[$i]); $i++) |
|
109 |
{
|
|
110 |
die "FATAL: Can't find inbox file: $ARGV[$i]\n" if (! -f $ARGV[$i]); |
|
111 |
}
|
|
112 |
||
113 |
$connect_arg = "DBI:mysql:"; |
|
114 |
push @args, "database=$opt_db" if defined($opt_db); |
|
115 |
push @args, "host=$opt_host" if defined($opt_host); |
|
116 |
push @args, "port=$opt_port" if defined($opt_port); |
|
117 |
push @args, "mysql_socket=$opt_socket" if defined($opt_socket); |
|
118 |
push @args, "mysql_read_default_group=mail_to_db"; |
|
119 |
$connect_arg .= join ';', @args; |
|
120 |
$dbh = DBI->connect("$connect_arg", $opt_user, $opt_password, |
|
121 |
{ PrintError => 0}) |
|
122 |
|| die "Couldn't connect: $DBI::errstr\n"; |
|
123 |
||
124 |
die "You must specify the database; use --db=" if (!defined($opt_db)); |
|
125 |
||
126 |
create_table($dbh) if ($opt_create); |
|
127 |
||
128 |
if ($opt_stdin) |
|
129 |
{
|
|
130 |
open(FILE, "-"); |
|
131 |
process_mail_file($dbh, "READ-FROM-STDIN"); |
|
132 |
}
|
|
133 |
else
|
|
134 |
{
|
|
135 |
foreach (@ARGV) |
|
136 |
{
|
|
137 |
# Check if the file is compressed
|
|
138 |
if (/^(.*)\.(gz|Z)$/) |
|
139 |
{
|
|
140 |
open(FILE, "zcat $_ |"); |
|
141 |
process_mail_file($dbh, $1); |
|
142 |
}
|
|
143 |
else
|
|
144 |
{
|
|
145 |
open(FILE, $_); |
|
146 |
process_mail_file($dbh, $_); |
|
147 |
}
|
|
148 |
}
|
|
149 |
}
|
|
150 |
$dbh->disconnect if (!$opt_test); |
|
151 |
||
152 |
$ignored = ($mail_no_from_f + $mail_no_subject_f + $mail_no_txt_f + |
|
153 |
$mail_too_big + $mail_duplicates + $mail_fixed); |
|
154 |
print "################################ Mail Report #################################\n\n"; |
|
155 |
print "Mails inserted:\t\t\t\t\t$mail_inserted\n"; |
|
156 |
print "--------------- "; |
|
157 |
print "=" . "=" x length("$mail_inserted") . "=\n\n"; |
|
158 |
if ($ignored) |
|
159 |
{
|
|
160 |
print "Ignored mails\n"; |
|
161 |
print "-------------\n"; |
|
162 |
if ($mail_no_from_f) |
|
163 |
{
|
|
164 |
print "Reason: mail without \"From:\" -field:\t\t$mail_no_from_f\n"; |
|
165 |
}
|
|
166 |
else
|
|
167 |
{
|
|
168 |
print ""; |
|
169 |
}
|
|
170 |
if ($mail_no_txt_f) |
|
171 |
{
|
|
172 |
print "Reason: mail without message:\t\t\t$mail_no_txt_f\n"; |
|
173 |
}
|
|
174 |
else
|
|
175 |
{
|
|
176 |
print ""; |
|
177 |
}
|
|
178 |
if ($mail_no_subject_f) |
|
179 |
{
|
|
180 |
print "Reason: mail without subject:\t\t\t$mail_no_subject_f\n"; |
|
181 |
}
|
|
182 |
else
|
|
183 |
{
|
|
184 |
print ""; |
|
185 |
}
|
|
186 |
if ($mail_too_big) |
|
187 |
{
|
|
188 |
print "Reason: mail too big, over $opt_max_mail_size bytes:\t\t"; |
|
189 |
print $mail_too_big; |
|
190 |
print " (see --max_mail_size=#)\n"; |
|
191 |
}
|
|
192 |
else
|
|
193 |
{
|
|
194 |
print ""; |
|
195 |
}
|
|
196 |
if ($mail_duplicates) |
|
197 |
{
|
|
198 |
print "Reason: duplicate mail, or in db already:\t$mail_duplicates\n"; |
|
199 |
}
|
|
200 |
else
|
|
201 |
{
|
|
202 |
print ""; |
|
203 |
}
|
|
204 |
if ($mail_fixed) |
|
205 |
{
|
|
206 |
print "Reason: mail was an unsubscribe - mail:\t\t$mail_fixed\n"; |
|
207 |
}
|
|
208 |
else
|
|
209 |
{
|
|
210 |
print ""; |
|
211 |
}
|
|
212 |
print " "; |
|
213 |
print "=" . "=" x length("$ignored") . "=\n"; |
|
214 |
print "Total number of ignored mails:\t\t\t$ignored\n\n"; |
|
215 |
}
|
|
216 |
print "Total number of mails:\t\t\t\t"; |
|
217 |
print $mail_inserted + $ignored; |
|
218 |
print " (OK: "; |
|
219 |
print sprintf("%.1f", ($mail_inserted + $ignored) ? (($mail_inserted / ($mail_inserted+$ignored)) * 100) : 0.0); |
|
220 |
print "% Ignored: "; |
|
221 |
print sprintf("%.1f", ($mail_inserted + $ignored) ? (($ignored / ($mail_inserted + $ignored)) * 100) : 0); |
|
222 |
print "%)\n"; |
|
223 |
print "################################ End Report ##################################\n"; |
|
224 |
exit(0); |
|
225 |
}
|
|
226 |
||
227 |
####
|
|
228 |
#### table creation
|
|
229 |
####
|
|
230 |
||
231 |
sub create_table |
|
232 |
{
|
|
233 |
my ($dbh)= @_; |
|
234 |
my ($sth, $query); |
|
235 |
||
236 |
$query= <<EOF; |
|
237 |
CREATE TABLE my_mail
|
|
238 |
(
|
|
239 |
mail_id MEDIUMINT UNSIGNED NOT NULL auto_increment,
|
|
240 |
message_id VARCHAR(255),
|
|
241 |
in_reply_to VARCHAR(255),
|
|
242 |
date DATETIME NOT NULL,
|
|
243 |
time_zone VARCHAR(20),
|
|
244 |
mail_from VARCHAR(120) NOT NULL,
|
|
245 |
reply VARCHAR(120),
|
|
246 |
mail_to TEXT,
|
|
247 |
cc TEXT,
|
|
248 |
sbj VARCHAR(200),
|
|
249 |
txt MEDIUMTEXT NOT NULL,
|
|
250 |
file VARCHAR(64) NOT NULL,
|
|
251 |
hash INTEGER NOT NULL,
|
|
252 |
KEY (mail_id),
|
|
253 |
KEY (message_id),
|
|
254 |
KEY (in_reply_to),
|
|
255 |
PRIMARY KEY (mail_from, date, hash))
|
|
256 |
ENGINE=MyISAM COMMENT=''
|
|
257 |
EOF
|
|
258 |
$sth = $dbh->prepare($query) or die $DBI::errstr; |
|
259 |
$sth->execute() or die "Couldn't create table: $DBI::errstr\n"; |
|
260 |
}
|
|
261 |
||
262 |
####
|
|
263 |
#### inbox processing. Can be either a real file, or standard input.
|
|
264 |
####
|
|
265 |
||
266 |
sub process_mail_file |
|
267 |
{
|
|
268 |
my ($dbh, $file_name) = @_; |
|
269 |
my (%values, $type, $check); |
|
270 |
||
271 |
$file_name =~ s/.*[\/]// if ($opt_no_path); |
|
272 |
||
273 |
%values = (); |
|
274 |
$type = ""; |
|
275 |
$check = 0; |
|
276 |
while (<FILE>) |
|
277 |
{
|
|
278 |
chop; |
|
279 |
chop if (substr($_, -1, 1) eq "\r"); |
|
280 |
if ($type ne "message") |
|
281 |
{
|
|
282 |
if (/^Reply-To:\s*(.*)/i) |
|
283 |
{
|
|
284 |
$type = "reply"; |
|
285 |
$values{$type} = $1; |
|
286 |
}
|
|
287 |
elsif (/^From: (.*)/i) |
|
288 |
{
|
|
289 |
$type = "from"; |
|
290 |
$values{$type} = $1; |
|
291 |
}
|
|
292 |
elsif (/^To: (.*)/i) |
|
293 |
{
|
|
294 |
$type = "to"; |
|
295 |
$values{$type} = $1; |
|
296 |
}
|
|
297 |
elsif (/^Cc: (.*)/i) |
|
298 |
{
|
|
299 |
$type = "cc"; |
|
300 |
$values{$type} = $1; |
|
301 |
}
|
|
302 |
elsif (/^Subject: (.*)/i) |
|
303 |
{
|
|
304 |
$type = "subject"; |
|
305 |
$values{$type} = $1; |
|
306 |
}
|
|
307 |
elsif (/^Message-Id:\s*(.*)/i) |
|
308 |
{
|
|
309 |
$type = "message_id"; |
|
310 |
s/^\s*(<.*>)\s*/$1/; |
|
311 |
$values{$type} = $1; |
|
312 |
}
|
|
313 |
elsif (/^In-Reply-To:\s*(.*)/i) |
|
314 |
{
|
|
315 |
$type = "in_reply_to"; |
|
316 |
s/^\s*(<.*>)\s*/$1/; |
|
317 |
$values{$type} = $1; |
|
318 |
}
|
|
319 |
elsif (/^Date: (.*)/i) |
|
320 |
{
|
|
321 |
date_parser($1, \%values, $file_name); |
|
322 |
$type = "rubbish"; |
|
323 |
}
|
|
324 |
# Catch those fields that we don't or can't handle (yet)
|
|
325 |
elsif (/^[\w\W-]+:/) |
|
326 |
{
|
|
327 |
$type = "rubbish"; |
|
328 |
}
|
|
329 |
elsif ($_ eq "") |
|
330 |
{
|
|
331 |
$type = "message"; |
|
332 |
$values{$type} = ""; |
|
333 |
}
|
|
334 |
else
|
|
335 |
{
|
|
336 |
s/^\s*/ /; |
|
337 |
if ($type eq 'message_id' || $type eq 'in_reply_to') |
|
338 |
{
|
|
339 |
s/^\s*(<.*>)\s*/$1/; |
|
340 |
}
|
|
341 |
$values{$type} .= $_; |
|
342 |
}
|
|
343 |
}
|
|
344 |
elsif ($check != 0 && $_ ne "") # in case of forwarded messages |
|
345 |
{
|
|
346 |
$values{$type} .= "\n" . $_; |
|
347 |
$check--; |
|
348 |
}
|
|
349 |
elsif (/^From .* \d\d:\d\d:\d\d\s\d\d\d\d/ || |
|
350 |
/^From .* \d\d\d\d\s\d\d:\d\d:\d\d/) |
|
351 |
{
|
|
352 |
$values{'hash'} = checksum("$values{'message'}"); |
|
353 |
update_table($dbh, $file_name, \%values); |
|
354 |
%values = (); |
|
355 |
$type = ""; |
|
356 |
$check = 0; |
|
357 |
}
|
|
358 |
elsif (/-* forwarded message .*-*/i) # in case of forwarded messages |
|
359 |
{
|
|
360 |
$values{$type} .= "\n" . $_; |
|
361 |
$check++; |
|
362 |
$mail_forwarded++; |
|
363 |
}
|
|
364 |
else
|
|
365 |
{
|
|
366 |
$values{$type} .= "\n" . $_; |
|
367 |
}
|
|
368 |
}
|
|
369 |
if (defined($values{'message'})) |
|
370 |
{
|
|
371 |
$values{'hash'} = checksum("$values{'message'}"); |
|
372 |
update_table($dbh, $file_name, \%values); |
|
373 |
}
|
|
374 |
}
|
|
375 |
||
376 |
####
|
|
377 |
#### get date and timezone
|
|
378 |
####
|
|
379 |
||
380 |
sub date_parser |
|
381 |
{
|
|
382 |
my ($date_raw, $values, $file_name, $tmp) = @_; |
|
383 |
||
384 |
# If you ever need to change this test, be especially careful with
|
|
385 |
# the timezone; it may be just a number (-0600), or just a name (EET), or
|
|
386 |
# both (-0600 (EET), or -0600 (EET GMT)), or without parenthesis: GMT.
|
|
387 |
# You probably should use a 'greedy' regexp in the end
|
|
388 |
$date_raw =~ /^\D*(\d{1,2})\s+(\w+)\s+(\d{2,4})\s+(\d+:\d+)(:\d+)?\s*(\S+.*)?/; |
|
389 |
||
390 |
if (!defined($1) || !defined($2) || !defined($3) || !defined($4) || |
|
391 |
!defined($months{$2})) |
|
392 |
{
|
|
393 |
if ($opt_debug || $opt_stop_on_error) |
|
394 |
{
|
|
395 |
print "FAILED: date_parser: 1: $1 2: $2 3: $3 4: $4 5: $5\n"; |
|
396 |
print "months{2}: $months{$2}\n"; |
|
397 |
print "date_raw: $date_raw\n"; |
|
398 |
print "Inbox filename: $file_name\n"; |
|
399 |
}
|
|
400 |
exit(1) if ($opt_stop_on_error); |
|
401 |
$values->{'date'} = ""; |
|
402 |
$values->{'time_zone'} = ""; |
|
403 |
return; |
|
404 |
}
|
|
405 |
$tmp = $3 . "-" . $months{$2} . "-" . "$1 $4"; |
|
406 |
$tmp.= defined($5) ? $5 : ":00"; |
|
407 |
$values->{'date'} = $tmp; |
|
408 |
print "INSERTING DATE: $tmp\n" if ($opt_debug); |
|
409 |
$values->{'time_zone'} = $6; |
|
410 |
}
|
|
411 |
||
412 |
####
|
|
413 |
#### Insert to table
|
|
414 |
####
|
|
415 |
||
416 |
sub update_table |
|
417 |
{
|
|
418 |
my($dbh, $file_name, $values) = @_; |
|
419 |
my($q, $tail, $message); |
|
420 |
||
421 |
if (!defined($values->{'subject'}) || !defined($values->{'to'})) |
|
422 |
{
|
|
423 |
$mail_no_subject_f++; |
|
424 |
return; # Ignore these |
|
425 |
}
|
|
426 |
$message = $values->{'message'}; |
|
427 |
$message =~ s/^\s*//; # removes whitespaces from the beginning |
|
428 |
||
429 |
restart:
|
|
430 |
$message =~ s/[\s\n>]*$//; # removes whitespaces and '>' from the end |
|
431 |
$values->{'message'} = $message; |
|
432 |
foreach $tail (@remove_tail) |
|
433 |
{
|
|
434 |
$message =~ s/$tail//; |
|
435 |
}
|
|
436 |
if ($message ne $values->{'message'}) |
|
437 |
{
|
|
438 |
$message =~ s/\s*$//; # removes whitespaces from the end |
|
439 |
$mail_fixed++; |
|
440 |
goto restart; # Some mails may have duplicated messages |
|
441 |
}
|
|
442 |
||
443 |
$q = "INSERT INTO my_mail ("; |
|
444 |
$q.= "mail_id,"; |
|
445 |
$q.= "message_id,"; |
|
446 |
$q.= "in_reply_to,"; |
|
447 |
$q.= "date,"; |
|
448 |
$q.= "time_zone,"; |
|
449 |
$q.= "mail_from,"; |
|
450 |
$q.= "reply,"; |
|
451 |
$q.= "mail_to,"; |
|
452 |
$q.= "cc,"; |
|
453 |
$q.= "sbj,"; |
|
454 |
$q.= "txt,"; |
|
455 |
$q.= "file,"; |
|
456 |
$q.= "hash"; |
|
457 |
$q.= ") VALUES ("; |
|
458 |
$q.= "NULL,"; |
|
459 |
$q.= (defined($values->{'message_id'}) ? |
|
460 |
$dbh->quote($values->{'message_id'}) : "NULL"); |
|
461 |
$q.= ","; |
|
462 |
$q.= (defined($values->{'in_reply_to'}) ? |
|
463 |
$dbh->quote($values->{'in_reply_to'}) : "NULL"); |
|
464 |
$q.= ","; |
|
465 |
$q.= "'" . $values->{'date'} . "',"; |
|
466 |
$q.= (defined($values->{'time_zone'}) ? |
|
467 |
$dbh->quote($values->{'time_zone'}) : "NULL"); |
|
468 |
$q.= ","; |
|
469 |
$q.= defined($values->{'from'}) ? $dbh->quote($values->{'from'}) : "NULL"; |
|
470 |
$q.= ","; |
|
471 |
$q.= defined($values->{'reply'}) ? $dbh->quote($values->{'reply'}) : "NULL"; |
|
472 |
$q.= ","; |
|
473 |
$q.= defined($values->{'to'}) ? $dbh->quote($values->{'to'}) : "NULL"; |
|
474 |
$q.= ","; |
|
475 |
$q.= defined($values->{'cc'}) ? $dbh->quote($values->{'cc'}) : "NULL"; |
|
476 |
$q.= ","; |
|
477 |
$q.= $dbh->quote($values->{'subject'}); |
|
478 |
$q.= ","; |
|
479 |
$q.= $dbh->quote($message); |
|
480 |
$q.= ","; |
|
481 |
$q.= $dbh->quote($file_name); |
|
482 |
$q.= ","; |
|
483 |
$q.= "'" . $values->{'hash'} . "'"; |
|
484 |
$q.= ")"; |
|
485 |
||
486 |
# Don't insert mails bigger than $opt_max_mail_size
|
|
487 |
if (length($message) > $opt_max_mail_size) |
|
488 |
{
|
|
489 |
$mail_too_big++; |
|
490 |
}
|
|
491 |
# Don't insert mails without 'From' field
|
|
492 |
elsif (!defined($values->{'from'}) || $values->{'from'} eq "") |
|
493 |
{
|
|
494 |
$mail_no_from_f++; |
|
495 |
}
|
|
496 |
elsif ($opt_test) |
|
497 |
{
|
|
498 |
print "$q\n"; |
|
499 |
$mail_inserted++; |
|
500 |
}
|
|
501 |
# Don't insert mails without the 'message'
|
|
502 |
elsif ($message eq "") |
|
503 |
{
|
|
504 |
$mail_no_txt_f++; |
|
505 |
}
|
|
506 |
elsif ($dbh->do($q)) |
|
507 |
{
|
|
508 |
$mail_inserted++; |
|
509 |
}
|
|
510 |
# This should never happen. This means that the above q failed,
|
|
511 |
# but it wasn't because of a duplicate mail entry
|
|
512 |
elsif (!($DBI::errstr =~ /Duplicate entry /)) |
|
513 |
{
|
|
514 |
die "FATAL: Got error :$DBI::errstr\nAttempted query was: $q\n"; |
|
515 |
}
|
|
516 |
else
|
|
517 |
{
|
|
518 |
$mail_duplicates++; |
|
519 |
print "Duplicate mail: query: $q\n" if ($opt_debug); |
|
520 |
}
|
|
521 |
$q = ""; |
|
522 |
}
|
|
523 |
||
524 |
####
|
|
525 |
#### In case you have two identical messages we wanted to identify them
|
|
526 |
#### and remove additionals; We do this by calculating a hash number of the
|
|
527 |
#### message and ignoring messages with the same from, date and hash.
|
|
528 |
#### This function calculates a simple 32 bit hash value for the message.
|
|
529 |
####
|
|
530 |
||
531 |
sub checksum |
|
532 |
{
|
|
533 |
my ($txt)= @_; |
|
534 |
my ($crc, $i, $count); |
|
535 |
$count = length($txt); |
|
536 |
for ($crc = $i = 0; $i < $count ; $i++) |
|
537 |
{
|
|
538 |
$crc = (($crc << 1) + (ord (substr ($txt, $i, 1)))) + |
|
539 |
(($crc & (1 << 30)) ? 1 : 0); |
|
540 |
$crc &= ((1 << 31) -1); |
|
541 |
}
|
|
542 |
return $crc; |
|
543 |
}
|
|
544 |
||
545 |
####
|
|
546 |
#### my_which is used, because we can't assume that every system has the
|
|
547 |
#### which -command. my_which can take only one argument at a time.
|
|
548 |
#### Return values: requested system command with the first found path,
|
|
549 |
#### or undefined, if not found.
|
|
550 |
####
|
|
551 |
||
552 |
sub my_which |
|
553 |
{
|
|
554 |
my ($command) = @_; |
|
555 |
my (@paths, $path); |
|
556 |
||
557 |
return $command if (-f $command && -x $command); |
|
558 |
@paths = split(':', $ENV{'PATH'}); |
|
559 |
foreach $path (@paths) |
|
560 |
{
|
|
561 |
$path = "." if ($path eq ""); |
|
562 |
$path .= "/$command"; |
|
563 |
return $path if (-f $path && -x $path); |
|
564 |
}
|
|
565 |
return undef(); |
|
566 |
}
|
|
567 |
||
568 |
####
|
|
569 |
#### usage and version
|
|
570 |
####
|
|
571 |
||
572 |
sub usage |
|
573 |
{
|
|
574 |
my ($VER)= @_; |
|
575 |
||
576 |
if ($opt_version) |
|
577 |
{
|
|
578 |
print "$progname version $VER\n"; |
|
579 |
}
|
|
580 |
else
|
|
581 |
{
|
|
582 |
print <<EOF; |
|
583 |
$progname version $VER
|
|
584 |
||
585 |
Description: Insert mails from inbox file(s) into a table. This program
|
|
586 |
can read group [mail_to_db] from the my.cnf file. You may want to have db
|
|
587 |
and table set there at least.
|
|
588 |
||
589 |
Usage: $progname [options] file1 [file2 file3 ...]
|
|
590 |
or: $progname [options] --create [file1 file2...]
|
|
591 |
or: cat inbox | $progname [options] --stdin
|
|
592 |
||
593 |
The last example can be used to read mails from standard input and can
|
|
594 |
useful when inserting mails to database via a program 'on-the-fly'.
|
|
595 |
The filename will be 'READ-FROM-STDIN' in this case.
|
|
596 |
||
597 |
Options:
|
|
598 |
--help Show this help and exit.
|
|
599 |
--version Show the version number and exit.
|
|
600 |
--debug Print some extra information during the run.
|
|
601 |
--host=... Hostname to be used.
|
|
602 |
--port=# TCP/IP port to be used with connection.
|
|
603 |
--socket=... MySQL UNIX socket to be used with connection.
|
|
604 |
--db=... Database to be used.
|
|
605 |
--user=... Username for connecting.
|
|
606 |
--password=... Password for the user.
|
|
607 |
--stdin Read mails from stdin.
|
|
608 |
--max_mail_size=# Maximum size of a mail in bytes.
|
|
609 |
Beware of the downside letting this variable be too big;
|
|
610 |
you may easily end up inserting a lot of attached
|
|
611 |
binary files (like MS Word documents etc), which take
|
|
612 |
space, make the database slower and are not really
|
|
613 |
searchable anyway. (Default $opt_max_mail_size)
|
|
614 |
--create Create the mails table. This can be done with the first run.
|
|
615 |
--test Dry run. Print the queries and the result as it would be.
|
|
616 |
--no_path When inserting the file name, leave out any paths of
|
|
617 |
the name.
|
|
618 |
--stop_on_error Stop the run, if an unexpected, but not fatal error occurs
|
|
619 |
during the run. Without this option some fields may get
|
|
620 |
unwanted values. --debug will also report about these.
|
|
621 |
EOF
|
|
622 |
}
|
|
623 |
exit(0); |
|
624 |
}
|