#!/usr/bin/perl
use strict;
use Benchmark qw(timethese cmpthese);
# These are used for Japanese support
# ASCII characters
my $ascii = '[\x00-\x7F]';
# EUC-JP 2 byte characters
my $two_bytes_euc_jp = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])';
# EUC-JP 3 byte characters
my $three_bytes_euc_jp = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])';
# EUC-JP characters
my $euc_jp = "(?:$ascii|$two_bytes_euc_jp|$three_bytes_euc_jp)";
my $text = '適当なテキスト、メールアドレスなど'x200;
my $text_bak = $text;
for (1..500) {
substr($text,int(rand(length($text))),1) = chr(128+int(rand(128)));
}
print "NOT " if $text ne $text_bak;
print "SAME\n";
for my $c (1..100) {
print "Test count: $c ... \n";
substr($text,int(rand(length($text))),1) = chr(128+int(rand(128)));
my $s = test1($text);
my $s_1 = test1_1($text);
my $s_2 = test1_2($text);
# print "\n";
# print test2($s);
if ( $s ne $s_1 ) {
print length($s) . "\n";
print length($s_1) . "\n";
print "Error in test1_1.\n";
}
if ( $s ne $s_2 ) {
print length($s) . "\n";
print length($s_2) . "\n";
print "Error in test1_2.\n";
}
}
timethese (100, {
'test_v1' => 'test1',
'test_v1+' => 'test1_1',
'test_v1++' => 'test1_2',
} );
print "\n";
timethese (10000, {
'test2_v1' => 'test2',
'test2_v1+' => 'test2_1',
} );
sub test1
{
my $s = $text;
$s =~ s/\G((?:$euc_jp)*)([\x80-\xFF](?=(?:$euc_jp)*))?/$1/og;
$s;
}
sub test1_1
{
my $s = $text;
$s =~ s/\G($euc_jp*)[\x80-\xFF]?/$1/og;
$s;
}
sub test1_2
{
my $s = $text;
$s =~ s/\G((?:$euc_jp)*)[\x80-\xFF]?/$1/og;
$s;
}
sub test2
{
my $s = shift;
for my $length (39..60) {
$s =~ /(.{$length})/;
$1 =~ /((?:$euc_jp)*)/o;
# print "$1...\n";
}
}
sub test2_1
{
my $s = shift;
for my $length (39..60) {
$s =~ /(.{$length})/;
$1 =~ /($euc_jp*)/o;
# print "$1...\n";
}
}