继续抛砖头,perl脚本试了一下:
# 将分句后的文件存入input.txt文件
open (FILE, "input.txt") or die ("Cannnot find file");
open (FILE1, ">output.txt");
$i = 1;
while (<FILE>) {
chomp;
@x = split (/\s+/);
foreach $x (@x) {
if ($x=~/\.$/) {
++$all{"full_stop"};
++$all{"word"};
} elsif ($x=~/,$/) {
++$all{"comma"};
++$all{"word"};
}
# 在这里添加别的标点符号,把xxxx换成标点符号,去掉每行的#
# else if ($x=~/xxxx$/) {
# ++$all{"name of xxxx"};
# ++$all{"word"};
# }
else {++$all{"word"};}
}
print FILE1 "In sentence number $i\n";
foreach $key (sort {$all{$a} <=> $all{$b} } keys %all) {
print FILE1 "The number of $key(s) is $all{$key}\n";
}
print FILE1 "\n";
undef %all;
++$i;
}