[Home]POPFile/V1FreezeUI

Amatubu_Wiki | POPFile | RecentChanges | Preferences

#!/usr/bin/perl

use strict;
use Benchmark qw(timethese cmpthese);

# These are used for Japanese support

# ASCII characters
my $ascii = '[\x00-\x7F]';

# EUC-JP 2 byte characters
my $two_bytes_euc_jp = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])';

# EUC-JP 3 byte characters
my $three_bytes_euc_jp = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])';

# EUC-JP characters
my $euc_jp = "(?:$ascii|$two_bytes_euc_jp|$three_bytes_euc_jp)";


my $text = '"テスト文字列" <test@example.com>,'x6600;
my $text_bak = $text;
print length($text), "\n";

for (1..1000) {
    substr($text,int(rand(length($text))),1) = chr(128+int(rand(128)));
}
print "NOT " if $text ne $text_bak;
print "SAME\n";

for my $c (1..20) {

    print "Test count: $c ... ";
    for (1..100) {
        substr($text,int(rand(length($text))),1) = chr(128+int(rand(128)));
    }

    my $s = test1($text); print length($s), '/';
    my $s_1 = test1_1($text); print length($s_1), '/';
    my $s_2 = test1_2($text); print length($s_2), '/';
    my $s_3 = test1_3($text); print length($s_3), '/';
    my $s_4 = test1_4($text); print length($s_4), '/';
    my $s_5 = test1_5($text); print length($s_5), '/';
    my $s_6 = test1_6($text); print length($s_6), '/';
    print "\n";
    # print test2($s);

    if ( $s ne $s_1 ) {
	    print length($s) . "\n";
	    print length($s_1) . "\n";
	    print "Error in test1_1.\n";
    }
    if ( $s ne $s_2 ) {
	    print length($s) . "\n";
	    print length($s_2) . "\n";
	    print "Error in test1_2.\n";
    }
    if ( $s ne $s_3 ) {
	    print length($s) . "\n";
	    print length($s_3) . "\n";
	    print "Error in test1_3.\n";
    }
    if ( $s ne $s_4 ) {
	    print length($s) . "\n";
	    print length($s_4) . "\n";
	    print "Error in test1_4.\n";
    }
    if ( $s ne $s_5 ) {
	    print length($s) . "\n";
	    print length($s_5) . "\n";
	    print "Error in test1_5.\n";
    }
    if ( $s ne $s_6 ) {
	    print length($s) . "\n";
	    print length($s_6) . "\n";
	    print "Error in test1_6.\n";
    }

}

for (1..10) {
    $text = $text_bak;
    substr($text,int(rand(length($text))),1) = chr(128+int(rand(128)));
timethese (100, { 
	'v1' => 'test1', 
	'v1_1' => 'test1_1', 
	'v1_2' => 'test1_2', 
	'v1_3' => 'test1_3', 
	'v1_4' => 'test1_4',
	'v1_5' => 'test1_5',
	'v1_6' => 'test1_6',
} );

print "\n";
}

timethese (10000, { 
	'test2_v1' => 'test2', 
	'test2_v1+' => 'test2_1', 
} );

sub test1
{
    my $s = $text;
    $s =~ s/\G((?:$euc_jp)*)([\x80-\xFF](?=(?:$euc_jp)*))?/$1/og;
    $s;
}

sub test1_1
{
    my $s = $text;
    $s =~ s/\G($euc_jp*)(?:(?=$euc_jp)|[\x80-\xFF])?/$1/g;
    $s;
}

sub test1_2
{
    my $s = $text;
    $s =~ s/\G((?:$euc_jp)*)(?:(?=$euc_jp)|[\x80-\xFF])?/$1/og;
    $s;
}

sub test1_3
{
    my $s = $text;
    $s =~ s/\G((?:$euc_jp){0,300})(?:(?=$euc_jp)|[\x80-\xFF])?/$1/og;
    $s;
}

sub test1_4
{
    my $s = $text;
    my $result = '';
    while ( $s =~ s/^($euc_jp{0,1000})(?:(?=$euc_jp)|[\x80-\xFF])?//o ) {
        $result .= $1;
        last if $s eq '';
    }
    return $result;
}

sub test1_5
{
    my $s = $text;
    my $result = '';
    while ( $s =~ /($euc_jp{0,300})(?:($euc_jp)|[\x80-\xFF])?/gso ) {
        $result .= $1 . $2;
    }
    return $result;
}

sub test1_6
{
    my $s = $text;
    my $result = '';
    while ( $s =~ /((?:$euc_jp){0,300})(?:(?=$euc_jp)|[\x80-\xFF])?/gso ) {
        $result .= $1;
    }
    return $result;
}

sub test2
{
    my $s = shift;
    for my $length (39..60) {

        $s =~ /(.{$length})/;
        $1 =~ /((?:$euc_jp)*)/o;
#        print "$1...\n";
    }
}

sub test2_1
{
    my $s = shift;
    for my $length (39..60) {

        $s =~ /(.{$length})/;
        $1 =~ /($euc_jp*)/o;
#        print "$1...\n";
    }
}

Amatubu_Wiki | POPFile | RecentChanges | Preferences
This page is read-only | View other revisions
Last edited January 22, 2008 17:08 by Amatubu (diff)
Search:

Copyright (c) 1996-2019 naoki iimura e-mail