スクリプトと同じディレクトリに、shiftjis、jis の混在したテキストファイル「popfilecd.msg」があるという条件で検証。
#!/usr/bin/perl
use strict;
use Encode;
use Encode::Guess;
use Text::Kakasi;
my %encoding_candidates = (
'Nihongo' => [ 'shiftjis', 'euc-jp', '7bit-jis' ]
);
$Text::Kakasi::HAS_ENCODE = 0;
Text::Kakasi::getopt_argv("kakasi", "-w -ieuc -oeuc");
kakasi_test();
Text::Kakasi::close_kanwadict();
sub kakasi_test {
my $line;
open(DB, "./popfilecd.msg");
while (<DB>) {
$line = $_;
$line = convert_encoding( $line, '7bitjis', 'euc-jp', '7bitjis', @{$encoding_candidates{'Nihongo'}} );
print $line;
$line = Text::Kakasi::do_kakasi($line);
print $line;
}
close(DB);
}
sub convert_encoding
{
my ( $string, $from, $to, $default, @candidates ) = @_;
require Encode;
require Encode::Guess;
# First, guess the encoding.
my $enc = Encode::Guess::guess_encoding( $string, @candidates );
if(ref $enc){
$from= $enc->name;
print "Guessed encoding : $from\n";
} else {
# If guess does not work, check whether $from is valid.
if (!(Encode::resolve_alias($from))) {
# Use $default as $from when $from is invalid.
$from = $default;
}
}
unless ($from eq $to) {
my ($orig_string) = $string;
# Workaround for Encode::Unicode error bug.
eval {
Encode::from_to($string, $from, $to);
};
$string = $orig_string if ($@);
}
return $string;
}