Filename | /usr/local/perls/perl-5.26.1/lib/site_perl/5.26.1/URI/Escape.pm |
Statements | Executed 352 statements in 2.22ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
20 | 1 | 1 | 86µs | 96µs | uri_unescape | URI::Escape::
1 | 1 | 1 | 32µs | 74µs | BEGIN@140 | URI::Escape::
1 | 1 | 1 | 32µs | 36µs | BEGIN@3 | URI::Escape::
2 | 1 | 1 | 14µs | 14µs | CORE:qr (opcode) | URI::Escape::
20 | 1 | 1 | 10µs | 10µs | CORE:subst (opcode) | URI::Escape::
1 | 1 | 1 | 9µs | 19µs | BEGIN@4 | URI::Escape::
1 | 1 | 1 | 7µs | 7µs | BEGIN@146 | URI::Escape::
0 | 0 | 0 | 0s | 0s | _fail_hi | URI::Escape::
0 | 0 | 0 | 0s | 0s | escape_char | URI::Escape::
0 | 0 | 0 | 0s | 0s | uri_escape | URI::Escape::
0 | 0 | 0 | 0s | 0s | uri_escape_utf8 | URI::Escape::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package URI::Escape; | ||||
2 | |||||
3 | 2 | 40µs | 2 | 40µs | # spent 36µs (32+4) within URI::Escape::BEGIN@3 which was called:
# once (32µs+4µs) by URI::BEGIN@23 at line 3 # spent 36µs making 1 call to URI::Escape::BEGIN@3
# spent 4µs making 1 call to strict::import |
4 | 2 | 128µs | 2 | 29µs | # spent 19µs (9+10) within URI::Escape::BEGIN@4 which was called:
# once (9µs+10µs) by URI::BEGIN@23 at line 4 # spent 19µs making 1 call to URI::Escape::BEGIN@4
# spent 10µs making 1 call to warnings::import |
5 | |||||
6 | =head1 NAME | ||||
7 | |||||
8 | URI::Escape - Percent-encode and percent-decode unsafe characters | ||||
9 | |||||
10 | =head1 SYNOPSIS | ||||
11 | |||||
12 | use URI::Escape; | ||||
13 | $safe = uri_escape("10% is enough\n"); | ||||
14 | $verysafe = uri_escape("foo", "\0-\377"); | ||||
15 | $str = uri_unescape($safe); | ||||
16 | |||||
17 | =head1 DESCRIPTION | ||||
18 | |||||
19 | This module provides functions to percent-encode and percent-decode URI strings as | ||||
20 | defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping". | ||||
21 | This is the terminology used by this module, which predates the formalization of the | ||||
22 | terms by the RFC by several years. | ||||
23 | |||||
24 | A URI consists of a restricted set of characters. The restricted set | ||||
25 | of characters consists of digits, letters, and a few graphic symbols | ||||
26 | chosen from those common to most of the character encodings and input | ||||
27 | facilities available to Internet users. They are made up of the | ||||
28 | "unreserved" and "reserved" character sets as defined in RFC 3986. | ||||
29 | |||||
30 | unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | ||||
31 | reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@" | ||||
32 | "!" / "$" / "&" / "'" / "(" / ")" | ||||
33 | / "*" / "+" / "," / ";" / "=" | ||||
34 | |||||
35 | In addition, any byte (octet) can be represented in a URI by an escape | ||||
36 | sequence: a triplet consisting of the character "%" followed by two | ||||
37 | hexadecimal digits. A byte can also be represented directly by a | ||||
38 | character, using the US-ASCII character for that octet. | ||||
39 | |||||
40 | Some of the characters are I<reserved> for use as delimiters or as | ||||
41 | part of certain URI components. These must be escaped if they are to | ||||
42 | be treated as ordinary data. Read RFC 3986 for further details. | ||||
43 | |||||
44 | The functions provided (and exported by default) from this module are: | ||||
45 | |||||
46 | =over 4 | ||||
47 | |||||
48 | =item uri_escape( $string ) | ||||
49 | |||||
50 | =item uri_escape( $string, $unsafe ) | ||||
51 | |||||
52 | Replaces each unsafe character in the $string with the corresponding | ||||
53 | escape sequence and returns the result. The $string argument should | ||||
54 | be a string of bytes. The uri_escape() function will croak if given a | ||||
55 | characters with code above 255. Use uri_escape_utf8() if you know you | ||||
56 | have such chars or/and want chars in the 128 .. 255 range treated as | ||||
57 | UTF-8. | ||||
58 | |||||
59 | The uri_escape() function takes an optional second argument that | ||||
60 | overrides the set of characters that are to be escaped. The set is | ||||
61 | specified as a string that can be used in a regular expression | ||||
62 | character class (between [ ]). E.g.: | ||||
63 | |||||
64 | "\x00-\x1f\x7f-\xff" # all control and hi-bit characters | ||||
65 | "a-z" # all lower case characters | ||||
66 | "^A-Za-z" # everything not a letter | ||||
67 | |||||
68 | The default set of characters to be escaped is all those which are | ||||
69 | I<not> part of the C<unreserved> character class shown above as well | ||||
70 | as the reserved characters. I.e. the default is: | ||||
71 | |||||
72 | "^A-Za-z0-9\-\._~" | ||||
73 | |||||
74 | =item uri_escape_utf8( $string ) | ||||
75 | |||||
76 | =item uri_escape_utf8( $string, $unsafe ) | ||||
77 | |||||
78 | Works like uri_escape(), but will encode chars as UTF-8 before | ||||
79 | escaping them. This makes this function able to deal with characters | ||||
80 | with code above 255 in $string. Note that chars in the 128 .. 255 | ||||
81 | range will be escaped differently by this function compared to what | ||||
82 | uri_escape() would. For chars in the 0 .. 127 range there is no | ||||
83 | difference. | ||||
84 | |||||
85 | Equivalent to: | ||||
86 | |||||
87 | utf8::encode($string); | ||||
88 | my $uri = uri_escape($string); | ||||
89 | |||||
90 | Note: JavaScript has a function called escape() that produces the | ||||
91 | sequence "%uXXXX" for chars in the 256 .. 65535 range. This function | ||||
92 | has really nothing to do with URI escaping but some folks got confused | ||||
93 | since it "does the right thing" in the 0 .. 255 range. Because of | ||||
94 | this you sometimes see "URIs" with these kind of escapes. The | ||||
95 | JavaScript encodeURIComponent() function is similar to uri_escape_utf8(). | ||||
96 | |||||
97 | =item uri_unescape($string,...) | ||||
98 | |||||
99 | Returns a string with each %XX sequence replaced with the actual byte | ||||
100 | (octet). | ||||
101 | |||||
102 | This does the same as: | ||||
103 | |||||
104 | $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
105 | |||||
106 | but does not modify the string in-place as this RE would. Using the | ||||
107 | uri_unescape() function instead of the RE might make the code look | ||||
108 | cleaner and is a few characters less to type. | ||||
109 | |||||
110 | In a simple benchmark test I did, | ||||
111 | calling the function (instead of the inline RE above) if a few chars | ||||
112 | were unescaped was something like 40% slower, and something like 700% slower if none were. If | ||||
113 | you are going to unescape a lot of times it might be a good idea to | ||||
114 | inline the RE. | ||||
115 | |||||
116 | If the uri_unescape() function is passed multiple strings, then each | ||||
117 | one is returned unescaped. | ||||
118 | |||||
119 | =back | ||||
120 | |||||
121 | The module can also export the C<%escapes> hash, which contains the | ||||
122 | mapping from all 256 bytes to the corresponding escape codes. Lookup | ||||
123 | in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))> | ||||
124 | each time. | ||||
125 | |||||
126 | =head1 SEE ALSO | ||||
127 | |||||
128 | L<URI> | ||||
129 | |||||
130 | |||||
131 | =head1 COPYRIGHT | ||||
132 | |||||
133 | Copyright 1995-2004 Gisle Aas. | ||||
134 | |||||
135 | This program is free software; you can redistribute it and/or modify | ||||
136 | it under the same terms as Perl itself. | ||||
137 | |||||
138 | =cut | ||||
139 | |||||
140 | 3 | 132µs | 3 | 116µs | # spent 74µs (32+42) within URI::Escape::BEGIN@140 which was called:
# once (32µs+42µs) by URI::BEGIN@23 at line 140 # spent 74µs making 1 call to URI::Escape::BEGIN@140
# spent 22µs making 1 call to Exporter::import
# spent 20µs making 1 call to version::_VERSION |
141 | our %escapes; | ||||
142 | 1 | 2µs | our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8); | ||
143 | 1 | 0s | our @EXPORT_OK = qw(%escapes); | ||
144 | 1 | 1µs | our $VERSION = "3.31"; | ||
145 | |||||
146 | 2 | 690µs | 1 | 7µs | # spent 7µs within URI::Escape::BEGIN@146 which was called:
# once (7µs+0s) by URI::BEGIN@23 at line 146 # spent 7µs making 1 call to URI::Escape::BEGIN@146 |
147 | |||||
148 | # Build a char->hex map | ||||
149 | 1 | 2µs | for (0..255) { | ||
150 | 256 | 1.03ms | $escapes{chr($_)} = sprintf("%%%02X", $_); | ||
151 | } | ||||
152 | |||||
153 | 1 | 0s | my %subst; # compiled patterns | ||
154 | |||||
155 | 1 | 74µs | 2 | 14µs | my %Unsafe = ( # spent 14µs making 2 calls to URI::Escape::CORE:qr, avg 7µs/call |
156 | RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/, | ||||
157 | RFC3986 => qr/[^A-Za-z0-9\-\._~]/, | ||||
158 | ); | ||||
159 | |||||
160 | sub uri_escape { | ||||
161 | my($text, $patn) = @_; | ||||
162 | return undef unless defined $text; | ||||
163 | if (defined $patn){ | ||||
164 | unless (exists $subst{$patn}) { | ||||
165 | # Because we can't compile the regex we fake it with a cached sub | ||||
166 | (my $tmp = $patn) =~ s,/,\\/,g; | ||||
167 | eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }"; | ||||
168 | Carp::croak("uri_escape: $@") if $@; | ||||
169 | } | ||||
170 | &{$subst{$patn}}($text); | ||||
171 | } else { | ||||
172 | $text =~ s/($Unsafe{RFC3986})/$escapes{$1} || _fail_hi($1)/ge; | ||||
173 | } | ||||
174 | $text; | ||||
175 | } | ||||
176 | |||||
177 | sub _fail_hi { | ||||
178 | my $chr = shift; | ||||
179 | Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr)); | ||||
180 | } | ||||
181 | |||||
182 | sub uri_escape_utf8 { | ||||
183 | my $text = shift; | ||||
184 | return undef unless defined $text; | ||||
185 | utf8::encode($text); | ||||
186 | return uri_escape($text, @_); | ||||
187 | } | ||||
188 | |||||
189 | # spent 96µs (86+10) within URI::Escape::uri_unescape which was called 20 times, avg 5µs/call:
# 20 times (86µs+10µs) by URI::_generic::path_segments at line 117 of URI/_generic.pm, avg 5µs/call | ||||
190 | # Note from RFC1630: "Sequences which start with a percent sign | ||||
191 | # but are not followed by two hexadecimal characters are reserved | ||||
192 | # for future extension" | ||||
193 | 20 | 6µs | my $str = shift; | ||
194 | 20 | 5µs | if (@_ && wantarray) { | ||
195 | # not executed for the common case of a single argument | ||||
196 | my @str = ($str, @_); # need to copy | ||||
197 | for (@str) { | ||||
198 | s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
199 | } | ||||
200 | return @str; | ||||
201 | } | ||||
202 | 20 | 53µs | 20 | 10µs | $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; # spent 10µs making 20 calls to URI::Escape::CORE:subst, avg 500ns/call |
203 | 20 | 45µs | $str; | ||
204 | } | ||||
205 | |||||
206 | # XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format. | ||||
207 | sub escape_char { | ||||
208 | # Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1). | ||||
209 | # The following forces a fetch to occur beforehand. | ||||
210 | my $dummy = substr($_[0], 0, 0); | ||||
211 | |||||
212 | if (utf8::is_utf8($_[0])) { | ||||
213 | my $s = shift; | ||||
214 | utf8::encode($s); | ||||
215 | unshift(@_, $s); | ||||
216 | } | ||||
217 | |||||
218 | return join '', @URI::Escape::escapes{split //, $_[0]}; | ||||
219 | } | ||||
220 | |||||
221 | 1 | 17µs | 1; | ||
# spent 14µs within URI::Escape::CORE:qr which was called 2 times, avg 7µs/call:
# 2 times (14µs+0s) by URI::BEGIN@23 at line 155, avg 7µs/call | |||||
# spent 10µs within URI::Escape::CORE:subst which was called 20 times, avg 500ns/call:
# 20 times (10µs+0s) by URI::Escape::uri_unescape at line 202, avg 500ns/call |