| File | /usr/share/perl/5.10/utf8_heavy.pl |
| Statements Executed | 12028 |
| Total Time | 0.0297188000000036 seconds |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 16 | 10 | 7 | 25.8ms | 25.8ms | utf8::SWASHNEW |
| 0 | 0 | 0 | 0s | 0s | utf8::BEGIN |
| 0 | 0 | 0 | 0s | 0s | utf8::DESTROY |
| 0 | 0 | 0 | 0s | 0s | utf8::croak |
| Line | Stmts. | Exclusive Time | Avg. | Code |
|---|---|---|---|---|
| 1 | package utf8; | |||
| 2 | 3 | 31µs | 10µs | use strict; # spent 12µs making 1 call to strict::import |
| 3 | 3 | 354µs | 118µs | use warnings; # spent 26µs making 1 call to warnings::import |
| 4 | ||||
| 5 | sub DEBUG () { 0 } | |||
| 6 | ||||
| 7 | sub DESTROY {} | |||
| 8 | ||||
| 9 | 1 | 300ns | 300ns | my %Cache; |
| 10 | ||||
| 11 | 1 | 600ns | 600ns | our (%PropertyAlias, %PA_reverse, %PropValueAlias, %PVA_reverse, %PVA_abbr_map); |
| 12 | ||||
| 13 | sub croak { require Carp; Carp::croak(@_) } | |||
| 14 | ||||
| 15 | ## | |||
| 16 | ## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape. | |||
| 17 | ## It's a data structure that encodes a set of Unicode characters. | |||
| 18 | ## | |||
| 19 | ||||
| 20 | # spent 25.8ms (25.8+0ns) within utf8::SWASHNEW which was called 16 times, avg 1.61ms/call:
# 6 times (6.81ms+-6.81ms) by utf8::SWASHNEW at line 250, avg 0s/call
# 2 times (108µs+0s) by DBD::mysql::_OdbcParse at line 49 of /usr/lib/perl5/DBD/mysql.pm, avg 54µs/call
# once (15.0ms+0s) by C4::Context::db_scheme2dbi at line 267 of /home/chris/git/koha.git/C4/Context.pm
# once (3.54ms+0s) by DBI::connect at line 576 of /usr/lib/perl5/DBI.pm
# once (69µs+3.39ms) by C4::ClassSortRoutine::Dewey::get_class_sort_key at line 67 of /home/chris/git/koha.git/C4/ClassSortRoutine/Dewey.pm
# once (55µs+3.28ms) by C4::ClassSortRoutine::Dewey::get_class_sort_key at line 80 of /home/chris/git/koha.git/C4/ClassSortRoutine/Dewey.pm
# once (68µs+61µs) by C4::ClassSortRoutine::LCC::get_class_sort_key at line 56 of /home/chris/git/koha.git/C4/ClassSortRoutine/LCC.pm
# once (59µs+59µs) by C4::ClassSortRoutine::Generic::get_class_sort_key at line 62 of /home/chris/git/koha.git/C4/ClassSortRoutine/Generic.pm
# once (46µs+26µs) by C4::ClassSortRoutine::LCC::get_class_sort_key at line 64 of /home/chris/git/koha.git/C4/ClassSortRoutine/LCC.pm
# once (34µs+0s) by DBD::mysql::_OdbcParse at line 56 of /usr/lib/perl5/DBD/mysql.pm | |||
| 21 | 12010 | 27.8ms | 2µs | my ($class, $type, $list, $minbits, $none) = @_; |
| 22 | local $^D = 0 if $^D; | |||
| 23 | ||||
| 24 | print STDERR "SWASHNEW @_\n" if DEBUG; | |||
| 25 | ||||
| 26 | ## | |||
| 27 | ## Get the list of codepoints for the type. | |||
| 28 | ## Called from swash_init (see utf8.c) or SWASHNEW itself. | |||
| 29 | ## | |||
| 30 | ## Callers of swash_init: | |||
| 31 | ## op.c:pmtrans -- for tr/// and y/// | |||
| 32 | ## regexec.c:regclass_swash -- for /[]/, \p, and \P | |||
| 33 | ## utf8.c:is_utf8_common -- for common Unicode properties | |||
| 34 | ## utf8.c:to_utf8_case -- for lc, uc, ucfirst, etc. and //i | |||
| 35 | ## | |||
| 36 | ## Given a $type, our goal is to fill $list with the set of codepoint | |||
| 37 | ## ranges. If $type is false, $list passed is used. | |||
| 38 | ## | |||
| 39 | ## $minbits: | |||
| 40 | ## For binary properties, $minbits must be 1. | |||
| 41 | ## For character mappings (case and transliteration), $minbits must | |||
| 42 | ## be a number except 1. | |||
| 43 | ## | |||
| 44 | ## $list (or that filled according to $type): | |||
| 45 | ## Refer to perlunicode.pod, "User-Defined Character Properties." | |||
| 46 | ## | |||
| 47 | ## For binary properties, only characters with the property value | |||
| 48 | ## of True should be listed. The 3rd column, if any, will be ignored. | |||
| 49 | ## | |||
| 50 | ## To make the parsing of $type clear, this code takes the a rather | |||
| 51 | ## unorthodox approach of last'ing out of the block once we have the | |||
| 52 | ## info we need. Were this to be a subroutine, the 'last' would just | |||
| 53 | ## be a 'return'. | |||
| 54 | ## | |||
| 55 | my $file; ## file to load data from, and also part of the %Cache key. | |||
| 56 | my $ListSorted = 0; | |||
| 57 | ||||
| 58 | if ($type) | |||
| 59 | { | |||
| 60 | $type =~ s/^\s+//; | |||
| 61 | $type =~ s/\s+$//; | |||
| 62 | ||||
| 63 | print STDERR "type = $type\n" if DEBUG; | |||
| 64 | ||||
| 65 | GETFILE: | |||
| 66 | { | |||
| 67 | ## | |||
| 68 | ## It could be a user-defined property. | |||
| 69 | ## | |||
| 70 | ||||
| 71 | my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1); | |||
| 72 | ||||
| 73 | if (defined $caller1 && $type =~ /^(?:\w+)$/) { | |||
| 74 | my $prop = "${caller1}::$type"; | |||
| 75 | if (exists &{$prop}) { | |||
| 76 | 3 | 555µs | 185µs | no strict 'refs'; # spent 23µs making 1 call to strict::unimport |
| 77 | ||||
| 78 | $list = &{$prop}; | |||
| 79 | last GETFILE; | |||
| 80 | } | |||
| 81 | } | |||
| 82 | ||||
| 83 | my $wasIs; | |||
| 84 | ||||
| 85 | ($wasIs = $type =~ s/^Is(?:\s+|[-_])?//i) | |||
| 86 | or | |||
| 87 | $type =~ s/^(?:(?:General(?:\s+|_)?)?Category|gc)\s*[:=]\s*//i | |||
| 88 | or | |||
| 89 | $type =~ s/^(?:Script|sc)\s*[:=]\s*//i | |||
| 90 | or | |||
| 91 | $type =~ s/^Block\s*[:=]\s*/In/i; | |||
| 92 | ||||
| 93 | ||||
| 94 | ## | |||
| 95 | ## See if it's in some enumeration. | |||
| 96 | ## | |||
| 97 | require "unicore/PVA.pl"; | |||
| 98 | if ($type =~ /^([\w\s]+)[:=]\s*(.*)/) { | |||
| 99 | my ($enum, $val) = (lc $1, lc $2); | |||
| 100 | $enum =~ tr/ _-//d; | |||
| 101 | $val =~ tr/ _-//d; | |||
| 102 | ||||
| 103 | my $pa = $PropertyAlias{$enum} ? $enum : $PA_reverse{$enum}; | |||
| 104 | my $f = $PropValueAlias{$pa}{$val} ? $val : $PVA_reverse{$pa}{lc $val}; | |||
| 105 | ||||
| 106 | if ($pa and $f) { | |||
| 107 | $pa = "gc_sc" if $pa eq "gc" or $pa eq "sc"; | |||
| 108 | $file = "unicore/lib/$pa/$PVA_abbr_map{$pa}{lc $f}.pl"; | |||
| 109 | last GETFILE; | |||
| 110 | } | |||
| 111 | } | |||
| 112 | else { | |||
| 113 | my $t = lc $type; | |||
| 114 | $t =~ tr/ _-//d; | |||
| 115 | ||||
| 116 | if ($PropValueAlias{gc}{$t} or $PropValueAlias{sc}{$t}) { | |||
| 117 | $file = "unicore/lib/gc_sc/$PVA_abbr_map{gc_sc}{$t}.pl"; | |||
| 118 | last GETFILE; | |||
| 119 | } | |||
| 120 | } | |||
| 121 | ||||
| 122 | ## | |||
| 123 | ## See if it's in the direct mapping table. | |||
| 124 | ## | |||
| 125 | require "unicore/Exact.pl"; | |||
| 126 | if (my $base = $utf8::Exact{$type}) { | |||
| 127 | $file = "unicore/lib/gc_sc/$base.pl"; | |||
| 128 | last GETFILE; | |||
| 129 | } | |||
| 130 | ||||
| 131 | ## | |||
| 132 | ## If not there exactly, try the canonical form. The canonical | |||
| 133 | ## form is lowercased, with any separators (\s+|[-_]) removed. | |||
| 134 | ## | |||
| 135 | my $canonical = lc $type; | |||
| 136 | $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g; | |||
| 137 | print STDERR "canonical = $canonical\n" if DEBUG; | |||
| 138 | ||||
| 139 | require "unicore/Canonical.pl"; | |||
| 140 | if (my $base = ($utf8::Canonical{$canonical} || $utf8::Canonical{ lc $utf8::PropertyAlias{$canonical} })) { | |||
| 141 | $file = "unicore/lib/gc_sc/$base.pl"; | |||
| 142 | last GETFILE; | |||
| 143 | } | |||
| 144 | ||||
| 145 | ## | |||
| 146 | ## See if it's a user-level "To". | |||
| 147 | ## | |||
| 148 | ||||
| 149 | my $caller0 = caller(0); | |||
| 150 | ||||
| 151 | if (defined $caller0 && $type =~ /^To(?:\w+)$/) { | |||
| 152 | my $map = $caller0 . "::" . $type; | |||
| 153 | ||||
| 154 | if (exists &{$map}) { | |||
| 155 | 3 | 264µs | 88µs | no strict 'refs'; # spent 18µs making 1 call to strict::unimport |
| 156 | ||||
| 157 | $list = &{$map}; | |||
| 158 | last GETFILE; | |||
| 159 | } | |||
| 160 | } | |||
| 161 | ||||
| 162 | ## | |||
| 163 | ## Last attempt -- see if it's a standard "To" name | |||
| 164 | ## (e.g. "ToLower") ToTitle is used by ucfirst(). | |||
| 165 | ## The user-level way to access ToDigit() and ToFold() | |||
| 166 | ## is to use Unicode::UCD. | |||
| 167 | ## | |||
| 168 | if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/) { | |||
| 169 | $file = "unicore/To/$1.pl"; | |||
| 170 | ## would like to test to see if $file actually exists.... | |||
| 171 | last GETFILE; | |||
| 172 | } | |||
| 173 | ||||
| 174 | ## | |||
| 175 | ## If we reach this line, it's because we couldn't figure | |||
| 176 | ## out what to do with $type. Ouch. | |||
| 177 | ## | |||
| 178 | ||||
| 179 | return $type; | |||
| 180 | } | |||
| 181 | ||||
| 182 | if (defined $file) { | |||
| 183 | print STDERR "found it (file='$file')\n" if DEBUG; | |||
| 184 | ||||
| 185 | ## | |||
| 186 | ## If we reach here, it was due to a 'last GETFILE' above | |||
| 187 | ## (exception: user-defined properties and mappings), so we | |||
| 188 | ## have a filename, so now we load it if we haven't already. | |||
| 189 | ## If we have, return the cached results. The cache key is the | |||
| 190 | ## class and file to load. | |||
| 191 | ## | |||
| 192 | my $found = $Cache{$class, $file}; | |||
| 193 | if ($found and ref($found) eq $class) { | |||
| 194 | print STDERR "Returning cached '$file' for \\p{$type}\n" if DEBUG; | |||
| 195 | return $found; | |||
| 196 | } | |||
| 197 | ||||
| 198 | $list = do $file; die $@ if $@; | |||
| 199 | } | |||
| 200 | ||||
| 201 | $ListSorted = 1; ## we know that these lists are sorted | |||
| 202 | } | |||
| 203 | ||||
| 204 | my $extras; | |||
| 205 | my $bits = $minbits; | |||
| 206 | ||||
| 207 | my $ORIG = $list; | |||
| 208 | if ($list) { | |||
| 209 | my @tmp = split(/^/m, $list); | |||
| 210 | my %seen; | |||
| 211 | 3 | 675µs | 225µs | no warnings; # spent 26µs making 1 call to warnings::unimport |
| 212 | $extras = join '', grep /^[^0-9a-fA-F]/, @tmp; | |||
| 213 | $list = join '', | |||
| 214 | map { $_->[1] } | |||
| 215 | sort { $a->[0] <=> $b->[0] } | |||
| 216 | map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] } | |||
| 217 | grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right | |||
| 218 | } | |||
| 219 | ||||
| 220 | if ($none) { | |||
| 221 | my $hextra = sprintf "%04x", $none + 1; | |||
| 222 | $list =~ s/\tXXXX$/\t$hextra/mg; | |||
| 223 | } | |||
| 224 | ||||
| 225 | if ($minbits != 1 && $minbits < 32) { # not binary property | |||
| 226 | my $top = 0; | |||
| 227 | while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) { | |||
| 228 | my $min = CORE::hex $1; | |||
| 229 | my $max = defined $2 ? CORE::hex $2 : $min; | |||
| 230 | my $val = defined $3 ? CORE::hex $3 : 0; | |||
| 231 | $val += $max - $min if defined $3; | |||
| 232 | $top = $val if $val > $top; | |||
| 233 | } | |||
| 234 | my $topbits = | |||
| 235 | $top > 0xffff ? 32 : | |||
| 236 | $top > 0xff ? 16 : 8; | |||
| 237 | $bits = $topbits if $bits < $topbits; | |||
| 238 | } | |||
| 239 | ||||
| 240 | my @extras; | |||
| 241 | for my $x ($extras) { | |||
| 242 | pos $x = 0; | |||
| 243 | while ($x =~ /^([^0-9a-fA-F\n])(.*)/mg) { | |||
| 244 | my $char = $1; | |||
| 245 | my $name = $2; | |||
| 246 | print STDERR "$1 => $2\n" if DEBUG; | |||
| 247 | if ($char =~ /[-+!&]/) { | |||
| 248 | my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really | |||
| 249 | my $subobj; | |||
| 250 | if ($c eq 'utf8') { # spent 6.81ms making 6 calls to utf8::SWASHNEW, avg 0s/call, max recursion depth 1 | |||
| 251 | $subobj = utf8->SWASHNEW($t, "", $minbits, 0); | |||
| 252 | } | |||
| 253 | elsif (exists &$name) { | |||
| 254 | $subobj = utf8->SWASHNEW($name, "", $minbits, 0); | |||
| 255 | } | |||
| 256 | elsif ($c =~ /^([0-9a-fA-F]+)/) { | |||
| 257 | $subobj = utf8->SWASHNEW("", $c, $minbits, 0); | |||
| 258 | } | |||
| 259 | return $subobj unless ref $subobj; | |||
| 260 | push @extras, $name => $subobj; | |||
| 261 | $bits = $subobj->{BITS} if $bits < $subobj->{BITS}; | |||
| 262 | } | |||
| 263 | } | |||
| 264 | } | |||
| 265 | ||||
| 266 | print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG; | |||
| 267 | ||||
| 268 | my $SWASH = bless { | |||
| 269 | TYPE => $type, | |||
| 270 | BITS => $bits, | |||
| 271 | EXTRAS => $extras, | |||
| 272 | LIST => $list, | |||
| 273 | NONE => $none, | |||
| 274 | @extras, | |||
| 275 | } => $class; | |||
| 276 | ||||
| 277 | if ($file) { | |||
| 278 | $Cache{$class, $file} = $SWASH; | |||
| 279 | } | |||
| 280 | ||||
| 281 | return $SWASH; | |||
| 282 | } | |||
| 283 | ||||
| 284 | # Now SWASHGET is recasted into a C function S_swash_get (see utf8.c). | |||
| 285 | ||||
| 286 | 1 | 5µs | 5µs | 1; |