my $s_6 = test1_6($text); print length($s_6), '/'; |
if ( $s ne $s_6 ) { print length($s) . "\n"; print length($s_6) . "\n"; print "Error in test1_6.\n"; } |
'v1_6' => 'test1_6', |
$s =~ s/\G($euc_jp*)(($euc_jp)|[\x80-\xFF])?/$1$3/g; |
$s =~ s/\G($euc_jp*)(?:(?=$euc_jp)|[\x80-\xFF])?/$1/g; |
$s =~ s/\G((?:$euc_jp)*)(($euc_jp)|[\x80-\xFF])?/$1$3/og; |
$s =~ s/\G((?:$euc_jp)*)(?:(?=$euc_jp)|[\x80-\xFF])?/$1/og; |
$s =~ s/\G((?:$euc_jp){0,300})(($euc_jp)|[\x80-\xFF])?/$1$3/og; |
$s =~ s/\G((?:$euc_jp){0,300})(?:(?=$euc_jp)|[\x80-\xFF])?/$1/og; |
while ( $s =~ s/^($euc_jp{0,1000})(($euc_jp)|[\x80-\xFF])?//o ) { $result .= $1 . $3; |
while ( $s =~ s/^($euc_jp{0,1000})(?:(?=$euc_jp)|[\x80-\xFF])?//o ) { $result .= $1; |
while ( $s =~ /($euc_jp{0,300})(($euc_jp)|[\x80-\xFF])?/gso ) { $result .= $1 . $3; |
while ( $s =~ /($euc_jp{0,300})(?:($euc_jp)|[\x80-\xFF])?/gso ) { $result .= $1 . $2; } return $result; } sub test1_6 { my $s = $text; my $result = ''; while ( $s =~ /((?:$euc_jp){0,300})(?:(?=$euc_jp)|[\x80-\xFF])?/gso ) { $result .= $1; |
</pre> |
</pre> |
#!/usr/bin/perl use strict; use Benchmark qw(timethese cmpthese); # These are used for Japanese support # ASCII characters my $ascii = '[\x00-\x7F]'; # EUC-JP 2 byte characters my $two_bytes_euc_jp = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])'; # EUC-JP 3 byte characters my $three_bytes_euc_jp = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])'; # EUC-JP characters my $euc_jp = "(?:$ascii|$two_bytes_euc_jp|$three_bytes_euc_jp)"; my $text = '"テスト文字列" <test@example.com>,'x6600; my $text_bak = $text; print length($text), "\n"; for (1..1000) { substr($text,int(rand(length($text))),1) = chr(128+int(rand(128))); } print "NOT " if $text ne $text_bak; print "SAME\n"; for my $c (1..20) { print "Test count: $c ... "; for (1..100) { substr($text,int(rand(length($text))),1) = chr(128+int(rand(128))); } my $s = test1($text); print length($s), '/'; my $s_1 = test1_1($text); print length($s_1), '/'; my $s_2 = test1_2($text); print length($s_2), '/'; my $s_3 = test1_3($text); print length($s_3), '/'; my $s_4 = test1_4($text); print length($s_4), '/'; my $s_5 = test1_5($text); print length($s_5), '/'; my $s_6 = test1_6($text); print length($s_6), '/'; print "\n"; # print test2($s); if ( $s ne $s_1 ) { print length($s) . "\n"; print length($s_1) . "\n"; print "Error in test1_1.\n"; } if ( $s ne $s_2 ) { print length($s) . "\n"; print length($s_2) . "\n"; print "Error in test1_2.\n"; } if ( $s ne $s_3 ) { print length($s) . "\n"; print length($s_3) . "\n"; print "Error in test1_3.\n"; } if ( $s ne $s_4 ) { print length($s) . "\n"; print length($s_4) . "\n"; print "Error in test1_4.\n"; } if ( $s ne $s_5 ) { print length($s) . "\n"; print length($s_5) . "\n"; print "Error in test1_5.\n"; } if ( $s ne $s_6 ) { print length($s) . "\n"; print length($s_6) . "\n"; print "Error in test1_6.\n"; } } for (1..10) { $text = $text_bak; substr($text,int(rand(length($text))),1) = chr(128+int(rand(128))); timethese (100, { 'v1' => 'test1', 'v1_1' => 'test1_1', 'v1_2' => 'test1_2', 'v1_3' => 'test1_3', 'v1_4' => 'test1_4', 'v1_5' => 'test1_5', 'v1_6' => 'test1_6', } ); print "\n"; } timethese (10000, { 'test2_v1' => 'test2', 'test2_v1+' => 'test2_1', } ); sub test1 { my $s = $text; $s =~ s/\G((?:$euc_jp)*)([\x80-\xFF](?=(?:$euc_jp)*))?/$1/og; $s; } sub test1_1 { my $s = $text; $s =~ s/\G($euc_jp*)(?:(?=$euc_jp)|[\x80-\xFF])?/$1/g; $s; } sub test1_2 { my $s = $text; $s =~ s/\G((?:$euc_jp)*)(?:(?=$euc_jp)|[\x80-\xFF])?/$1/og; $s; } sub test1_3 { my $s = $text; $s =~ s/\G((?:$euc_jp){0,300})(?:(?=$euc_jp)|[\x80-\xFF])?/$1/og; $s; } sub test1_4 { my $s = $text; my $result = ''; while ( $s =~ s/^($euc_jp{0,1000})(?:(?=$euc_jp)|[\x80-\xFF])?//o ) { $result .= $1; last if $s eq ''; } return $result; } sub test1_5 { my $s = $text; my $result = ''; while ( $s =~ /($euc_jp{0,300})(?:($euc_jp)|[\x80-\xFF])?/gso ) { $result .= $1 . $2; } return $result; } sub test1_6 { my $s = $text; my $result = ''; while ( $s =~ /((?:$euc_jp){0,300})(?:(?=$euc_jp)|[\x80-\xFF])?/gso ) { $result .= $1; } return $result; } sub test2 { my $s = shift; for my $length (39..60) { $s =~ /(.{$length})/; $1 =~ /((?:$euc_jp)*)/o; # print "$1...\n"; } } sub test2_1 { my $s = shift; for my $length (39..60) { $s =~ /(.{$length})/; $1 =~ /($euc_jp*)/o; # print "$1...\n"; } }