| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | function check_url ($url2check, $types) { |
|---|
| 4 | |
|---|
| 5 | # Be paranoid about using grouping! |
|---|
| 6 | |
|---|
| 7 | $nz_digit = '[1-9]'; |
|---|
| 8 | $nz_digits = "(?:$nz_digit\\d*)"; |
|---|
| 9 | $digits = '(?:\d+)'; |
|---|
| 10 | $space = '(?:%20)'; |
|---|
| 11 | $nl = '(?:%0[Aa])'; |
|---|
| 12 | $dot = '\.'; |
|---|
| 13 | $plus = '\+'; |
|---|
| 14 | $qm = '\?'; |
|---|
| 15 | $ast = '\*'; |
|---|
| 16 | $hex = '[a-fA-F\d]'; |
|---|
| 17 | $alpha = '[a-zA-Z]'; # No, no locale. |
|---|
| 18 | $alphas = "(?:${alpha}+)"; |
|---|
| 19 | $alphanum = '[a-zA-Z\d]'; # Letter or digit. |
|---|
| 20 | $xalphanum = "(?:${alphanum}|%(?:3\\d|[46]$hex|[57][Aa\\d]))"; |
|---|
| 21 | # Letter or digit, or hex escaped letter/digit. |
|---|
| 22 | $alphanums = "(?:${alphanum}+)"; |
|---|
| 23 | $escape = "(?:%$hex\{2})"; |
|---|
| 24 | $safe = '[$\-_.+]'; |
|---|
| 25 | $extra = "[\!*'(),]"; |
|---|
| 26 | $national = '[{}|\\^~[\]`]'; |
|---|
| 27 | $punctuation = '[<>#%"]'; |
|---|
| 28 | $reserved = '[;/?:@&=]'; |
|---|
| 29 | $uchar = "(?:${alphanum}|${safe}|${extra}|${escape})"; |
|---|
| 30 | $xchar = "(?:${alphanum}|${safe}|${extra}|${reserved}|${escape})"; |
|---|
| 31 | $uchar = str_replace (']|[', '', $uchar); // Make string smaller, and speed up regex. |
|---|
| 32 | $uchar = str_replace (']|[', '', $xchar); // Make string smaller, and speed up regex. |
|---|
| 33 | |
|---|
| 34 | # URL schemeparts for ip based protocols: |
|---|
| 35 | $user = "(?:(?:${uchar}|[;?&=])*)"; |
|---|
| 36 | $password = "(?:(?:${uchar}|[;?&=])*)"; |
|---|
| 37 | $hostnumber = "(?:${digits}(?:${dot}${digits}){3})"; |
|---|
| 38 | $toplabel = "(?:${alpha}(?:(?:${alphanum}|-)*${alphanum})?)"; |
|---|
| 39 | $domainlabel = "(?:${alphanum}(?:(?:${alphanum}|-)*${alphanum})?)"; |
|---|
| 40 | $hostname = "(?:(?:${domainlabel}${dot})*${toplabel})"; |
|---|
| 41 | $host = "(?:${hostname}|${hostnumber})"; |
|---|
| 42 | $hostport = "(?:${host}(?::${digits})?)"; |
|---|
| 43 | $login = "(?:(?:${user}(?::${password})?\@)?${hostport})"; |
|---|
| 44 | |
|---|
| 45 | # The predefined schemes: |
|---|
| 46 | |
|---|
| 47 | # FTP (see also RFC959) |
|---|
| 48 | $fsegment = "(?:(?:${uchar}|[?:\@&=])*)"; |
|---|
| 49 | $fpath = "(?:${fsegment}(?:/${fsegment})*)"; |
|---|
| 50 | $ftpurl = "(?:ftp://${login}(?:/${fpath}(?:;type=[AIDaid])?)?)"; |
|---|
| 51 | |
|---|
| 52 | # FILE |
|---|
| 53 | $fileurl = "(?:file://(?:${host}|localhost)?/${fpath})"; |
|---|
| 54 | |
|---|
| 55 | # HTTP |
|---|
| 56 | $hsegment = "(?:(?:${uchar}|[~;:\@&=])*)"; |
|---|
| 57 | $search = "(?:(?:${uchar}|[;:\@&=])*)"; |
|---|
| 58 | $hpath = "(?:${hsegment}(?:/${hsegment})*)"; |
|---|
| 59 | $httpurl = "(?:https?://${hostport}(?:/${hpath}(?:${qm}${search})?)?)"; |
|---|
| 60 | |
|---|
| 61 | # GOPHER (see also RFC1436) |
|---|
| 62 | $gopher_plus = "(?:${xchar}*)"; |
|---|
| 63 | $selector = "(?:${xchar}*)"; |
|---|
| 64 | $gtype = $xchar; // Omitted parens! |
|---|
| 65 | $gopherurl = "(?:gopher://${hostport}(?:/${gtype}(?:${selector}" . |
|---|
| 66 | "(?:%09${search}(?:%09${gopher_plus})?)?)?)?)"; |
|---|
| 67 | |
|---|
| 68 | # MAILTO (see also RFC822) |
|---|
| 69 | $encoded822addr = "(?:$xchar+)"; |
|---|
| 70 | $mailtourl = "(?:mailto:$encoded822addr)"; |
|---|
| 71 | $mailtonpurl = $encoded822addr; |
|---|
| 72 | |
|---|
| 73 | # NEWS (see also RFC1036) |
|---|
| 74 | $article = "(?:(?:${uchar}|[;/?:&=])+\@${host})"; |
|---|
| 75 | $group = "(?:${alpha}(?:${alphanum}|[_.+-])*)"; |
|---|
| 76 | $grouppart = "(?:${article}|${group}|${ast})"; |
|---|
| 77 | $newsurl = "(?:news:${grouppart})"; |
|---|
| 78 | |
|---|
| 79 | # NNTP (see also RFC977) |
|---|
| 80 | $nntpurl = "(?:nntp://${hostport}/${group}(?:/${digits})?)"; |
|---|
| 81 | |
|---|
| 82 | # TELNET |
|---|
| 83 | $telneturl = "(?:telnet://${login}/?)"; |
|---|
| 84 | |
|---|
| 85 | # WAIS (see also RFC1625) |
|---|
| 86 | $wpath = "(?:${uchar}*)"; |
|---|
| 87 | $wtype = "(?:${uchar}*)"; |
|---|
| 88 | $database = "(?:${uchar}*)"; |
|---|
| 89 | $waisdoc = "(?:wais://${hostport}/${database}/${wtype}/${wpath})"; |
|---|
| 90 | $waisindex = "(?:wais://${hostport}/${database}${qm}${search})"; |
|---|
| 91 | $waisdatabase = "(?:wais://${hostport}/${database})"; |
|---|
| 92 | # $waisurl = "(?:${waisdatabase}|${waisindex}|${waisdoc})"; |
|---|
| 93 | # Speed up: the 3 types share a common prefix. |
|---|
| 94 | $waisurl = "(?:wais://${hostport}/${database}" . |
|---|
| 95 | "(?:(?:/${wtype}/${wpath})|${qm}${search})?)"; |
|---|
| 96 | |
|---|
| 97 | # PROSPERO |
|---|
| 98 | $fieldvalue = "(?:(?:${uchar}|[?:\@&])*)"; |
|---|
| 99 | $fieldname = "(?:(?:${uchar}|[?:\@&])*)"; |
|---|
| 100 | $fieldspec = "(?:;${fieldname}=${fieldvalue})"; |
|---|
| 101 | $psegment = "(?:(?:${uchar}|[?:\@&=])*)"; |
|---|
| 102 | $ppath = "(?:${psegment}(?:/${psegment})*)"; |
|---|
| 103 | $prosperourl = "(?:prospero://${hostport}/${ppath}(?:${fieldspec})*)"; |
|---|
| 104 | |
|---|
| 105 | # LDAP (see also RFC1959) |
|---|
| 106 | # First. import stuff from RFC 1779 (Distinguished Names). |
|---|
| 107 | # We've modified things a bit. |
|---|
| 108 | $dn_separator = "(?:[;,])"; |
|---|
| 109 | $dn_optional_space = "(?:${nl}?${space}*)"; |
|---|
| 110 | $dn_spaced_separator = "(?:${dn_optional_space}${dn_separator}" . |
|---|
| 111 | "${dn_optional_space})"; |
|---|
| 112 | $dn_oid = "(?:${digits}(?:${dot}${digits})*)"; |
|---|
| 113 | $dn_keychar = "(?:${xalphanum}|${space})"; |
|---|
| 114 | $dn_key = "(?:${dn_keychar}+|(?:OID|oid)${dot}${dn_oid})"; |
|---|
| 115 | $dn_string = "(?:${uchar}*)"; |
|---|
| 116 | $dn_attribute = "(?:(?:${dn_key}${dn_optional_space}=" . |
|---|
| 117 | "${dn_optional_space})?${dn_string})"; |
|---|
| 118 | $dn_name_component = "(?:${dn_attribute}(?:${dn_optional_space}" . |
|---|
| 119 | "${plus}${dn_optional_space}${dn_attribute})*)"; |
|---|
| 120 | $dn_name = "(?:${dn_name_component}" . |
|---|
| 121 | "(?:${dn_spaced_separator}${dn_name_component})*" . |
|---|
| 122 | "${dn_spaced_separator}?)"; |
|---|
| 123 | |
|---|
| 124 | # RFC 1558 defines the filter syntax, but that requires a PDA to recognize. |
|---|
| 125 | # Since that's too powerful for Perl's REs, we allow any char between the |
|---|
| 126 | # parenthesis (which have to be there.) |
|---|
| 127 | $ldap_filter = "(?:\(${xchar}+\))"; |
|---|
| 128 | |
|---|
| 129 | # This is from RFC 1777. It defines an attributetype as an 'OCTET STRING', |
|---|
| 130 | # whatever that is. |
|---|
| 131 | $ldap_attr_type = "(?:${uchar}+)"; # I'm just guessing here. |
|---|
| 132 | # The RFCs aren't clear. |
|---|
| 133 | |
|---|
| 134 | # Now we are at the grammar of RFC 1959. |
|---|
| 135 | $ldap_attr_list = "(?:${ldap_attr_type}(?:,${ldap_attr_type})*)"; |
|---|
| 136 | $ldap_attrs = "(?:${ldap_attr_list}?)"; |
|---|
| 137 | |
|---|
| 138 | $ldap_scope = "(?:base|one|sub)"; |
|---|
| 139 | $ldapurl = "(?:ldap://(?:${hostport})?/${dn_name}" . |
|---|
| 140 | "(?:${qm}${ldap_attrs}" . |
|---|
| 141 | "(?:${qm}${ldap_scope}(?:${qm}${ldap_filter})?)?)?)"; |
|---|
| 142 | |
|---|
| 143 | |
|---|
| 144 | # RFC 2056 defines the format of URLs for the Z39.50 protocol. |
|---|
| 145 | $z_database = "(?:${uchar}+)"; |
|---|
| 146 | $z_docid = "(?:${uchar}+)"; |
|---|
| 147 | $z_elementset = "(?:${uchar}+)"; |
|---|
| 148 | $z_recordsyntax = "(?:${uchar}+)"; |
|---|
| 149 | $z_scheme = "(?:z39${dot}50[rs])"; |
|---|
| 150 | $z39_50url = "(?:${z_scheme}://${hostport}" . |
|---|
| 151 | "(?:/(?:${z_database}(?:${plus}${z_database})*" . |
|---|
| 152 | "(?:${qm}${z_docid})?)?" . |
|---|
| 153 | "(?:;esn=${z_elementset})?" . |
|---|
| 154 | "(?:;rs=${z_recordsyntax}" . |
|---|
| 155 | "(?:${plus}${z_recordsyntax})*)?))"; |
|---|
| 156 | |
|---|
| 157 | |
|---|
| 158 | # RFC 2111 defines the format for cid/mid URLs. |
|---|
| 159 | $url_addr_spec = "(?:(?:${uchar}|[;?:@&=])*)"; |
|---|
| 160 | $message_id = $url_addr_spec; |
|---|
| 161 | $content_id = $url_addr_spec; |
|---|
| 162 | $cidurl = "(?:cid:${content_id})"; |
|---|
| 163 | $midurl = "(?:mid:${message_id}(?:/${content_id})?)"; |
|---|
| 164 | |
|---|
| 165 | |
|---|
| 166 | # RFC 2122 defines the Vemmi URLs. |
|---|
| 167 | $vemmi_attr = "(?:(?:${uchar}|[/?:@&])*)"; |
|---|
| 168 | $vemmi_value = "(?:(?:${uchar}|[/?:@&])*)"; |
|---|
| 169 | $vemmi_service = "(?:(?:${uchar}|[/?:@&=])*)"; |
|---|
| 170 | $vemmi_param = "(?:;${vemmi_attr}=${vemmi_value})"; |
|---|
| 171 | $vemmiurl = "(?:vemmi://${hostport}" . |
|---|
| 172 | "(?:/${vemmi_service}(?:${vemmi_param}*))?)"; |
|---|
| 173 | |
|---|
| 174 | # RFC 2192 for IMAP URLs. |
|---|
| 175 | # Import from RFC 2060. |
|---|
| 176 | # $imap4_astring = ""; |
|---|
| 177 | # $imap4_search_key = ""; |
|---|
| 178 | # $imap4_section_text = ""; |
|---|
| 179 | $imap4_nz_number = $nz_digits; |
|---|
| 180 | $achar = "(?:${uchar}|[&=~])"; |
|---|
| 181 | $bchar = "(?:${uchar}|[&=~:\@/])"; |
|---|
| 182 | $enc_auth_type = "(?:${achar}+)"; |
|---|
| 183 | $enc_list_mbox = "(?:${bchar}+)"; |
|---|
| 184 | $enc_mailbox = "(?:${bchar}+)"; |
|---|
| 185 | $enc_search = "(?:${bchar}+)"; |
|---|
| 186 | $enc_section = "(?:${bchar}+)"; |
|---|
| 187 | $enc_user = "(?:${achar}+)"; |
|---|
| 188 | $i_auth = "(?:;[Aa][Uu][Tt][Hh]=(?:${ast}|${enc_auth_type}))"; |
|---|
| 189 | $i_list_type = "(?:[Ll](?:[Ii][Ss][Tt]|[Ss][Uu][Bb]))"; |
|---|
| 190 | $i_mailboxlist = "(?:${enc_list_mbox}?;[Tt][Yy][Pp][Ee]=${i_list_type})"; |
|---|
| 191 | $i_uidvalidity = "(?:;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=" . |
|---|
| 192 | "${imap4_nz_number})"; |
|---|
| 193 | $i_messagelist = "(?:${enc_mailbox}(?:${qm}${enc_search})?" . |
|---|
| 194 | "(?:${i_uidvalidity})?)"; |
|---|
| 195 | $i_section = "(?:/;[Ss][Ee][Cc][Tt][Ii][Oo][Nn]=${enc_section})"; |
|---|
| 196 | $i_uid = "(?:/;[Uu][Ii][Dd]=${imap4_nz_number})"; |
|---|
| 197 | $i_messagepart = "(?:${enc_mailbox}(?:${i_uidvalidity})?${i_uid}" . |
|---|
| 198 | "(?:${i_section})?)"; |
|---|
| 199 | $i_command = "(?:${i_mailboxlist}|${i_messagelist}|${i_messagepart})"; |
|---|
| 200 | $i_userauth = "(?:(?:${enc_user}(?:${i_auth})?)|" . |
|---|
| 201 | "(?:${i_auth}(?:${enc_user})?))"; |
|---|
| 202 | $i_server = "(?:(?:${i_userauth}\@)?${hostport})"; |
|---|
| 203 | $imapurl = "(?:imap://${i_server}/(?:$i_command)?)"; |
|---|
| 204 | |
|---|
| 205 | # RFC 2224 for NFS. |
|---|
| 206 | $nfs_mark = '[\$\-_.\!~*\'(),]'; |
|---|
| 207 | $nfs_unreserved = "(?:${alphanum}|${nfs_mark})"; |
|---|
| 208 | $nfs_unreserved = str_replace (']|[', '', $nfs_unreserved); // Make string smaller, and speed up regex. |
|---|
| 209 | $nfs_pchar = "(?:${nfs_unreserved}|${escape}|[:\@&=+])"; |
|---|
| 210 | $nfs_segment = "(?:${nfs_pchar}*)"; |
|---|
| 211 | $nfs_path_segs = "(?:${nfs_segment}(?:/${nfs_segment})*)"; |
|---|
| 212 | $nfs_url_path = "(?:/?${nfs_path_segs})"; |
|---|
| 213 | $nfs_rel_path = "(?:${nfs_path_segs}?)"; |
|---|
| 214 | $nfs_abs_path = "(?:/${nfs_rel_path})"; |
|---|
| 215 | $nfs_net_path = "(?://${hostport}(?:${nfs_abs_path})?)"; |
|---|
| 216 | $nfs_rel_url = "(?:${nfs_net_path}|${nfs_abs_path}|${nfs_rel_path})"; |
|---|
| 217 | $nfsurl = "(?:nfs:${nfs_rel_url})"; |
|---|
| 218 | |
|---|
| 219 | $valid_types = array ( |
|---|
| 220 | 'http', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'wais', 'mailto', |
|---|
| 221 | 'mailtonp', 'file', 'prospero', 'ldap', 'z39_50', 'cid', 'mid', 'vemmi', |
|---|
| 222 | 'imap', 'nfs' |
|---|
| 223 | ); |
|---|
| 224 | |
|---|
| 225 | # Combining all the different URL formats into a single regex. |
|---|
| 226 | |
|---|
| 227 | $valid = false; |
|---|
| 228 | |
|---|
| 229 | if (!is_array ($types)) { |
|---|
| 230 | $types = array ($types); |
|---|
| 231 | } |
|---|
| 232 | |
|---|
| 233 | foreach ($types as $type) { |
|---|
| 234 | if (!in_array ($type, $valid_types)) { |
|---|
| 235 | continue; |
|---|
| 236 | } |
|---|
| 237 | $re = $type.'url'; |
|---|
| 238 | if (preg_match ('!^'.$$re.'$!i', $url2check)) { |
|---|
| 239 | $valid = $type; |
|---|
| 240 | break; |
|---|
| 241 | } |
|---|
| 242 | } |
|---|
| 243 | |
|---|
| 244 | return $valid; |
|---|
| 245 | |
|---|
| 246 | } |
|---|
| 247 | |
|---|
| 248 | ?> |
|---|