スクリプトと同じディレクトリに、shiftjis、jis の混在したテキストファイル「popfilecd.msg」があるという条件で検証。
#!/usr/bin/perl use strict; use Encode; use Encode::Guess; use Text::Kakasi; my %encoding_candidates = ( 'Nihongo' => [ 'shiftjis', 'euc-jp', '7bit-jis' ] ); $Text::Kakasi::HAS_ENCODE = 0; Text::Kakasi::getopt_argv("kakasi", "-w -ieuc -oeuc"); kakasi_test(); Text::Kakasi::close_kanwadict(); sub kakasi_test { my $line; open(DB, "./popfilecd.msg"); while (<DB>) { $line = $_; $line = convert_encoding( $line, '7bitjis', 'euc-jp', '7bitjis', @{$encoding_candidates{'Nihongo'}} ); print $line; $line = Text::Kakasi::do_kakasi($line); print $line; } close(DB); } sub convert_encoding { my ( $string, $from, $to, $default, @candidates ) = @_; require Encode; require Encode::Guess; # First, guess the encoding. my $enc = Encode::Guess::guess_encoding( $string, @candidates ); if(ref $enc){ $from= $enc->name; print "Guessed encoding : $from\n"; } else { # If guess does not work, check whether $from is valid. if (!(Encode::resolve_alias($from))) { # Use $default as $from when $from is invalid. $from = $default; } } unless ($from eq $to) { my ($orig_string) = $string; # Workaround for Encode::Unicode error bug. eval { Encode::from_to($string, $from, $to); }; $string = $orig_string if ($@); } return $string; }