#! /usr/local/bin/perl -c package referer_keyword; # ------------------------------------------------------------------- # referer_keywords # ------------------------------------------------------------------- # 与えられた URL から「検索用キーワードらしきもの」を抜き出します # # version: # 2000.11.11 aiba # ------------------------------------------------------------------- # sub get { local($za) = @_; if($za =~ /\?/) { if($za =~ /[\?\&](query|queryterm|general|suche|[Pp]arole|entry)\=[^\&]+/ || $za =~ /[\?\&](word|[Kk]eywords?|searchfor|raw\_keywords)\=[^\&]+/ || $za =~ /[\?\&](fi\_1)\=[^\&]+/ || # excite.com $za =~ /[\?\&](q|qr|qt|s|kw|key|ask|rawq)\=[^\&]+/ || $za =~ /[\?\&](p|r|mt|MT|QRY|KW|AW0|st)\=[^\&]+/ || $za =~ /[\?\&](phrase|QueryString|AK[0-9]|box|RAW\_KEYWORDS)\=[^\&]+/ || $za =~ /[\?\&]([Ss]earch|request|qu|w|LsSearchReq)\=[^\&]+/ || $za =~ /[\?\&](FI\_[0-9]|QUERY|genre|rads|saisie|Parola[0-9])|SearchWd\=[^\&]+/ ) { local($zb) = $&; $zb =~ s/^[^\=]*\=[ \t]*//; if($zb =~ /./) { # 以下はデータの整形部分 $zb = &chr_normalize($zb); $zb = " $zb "; $zb =~ y/A-Z/a-z/; $zb =~ s/[\"\t\r\+\|\/\(\),;:\[\]\<\>\=\+]/ /g; $zb =~ s/\*+/\*/g; $zb =~ s/ \-[^ ]*/ /g; $zb =~ s/ [\'\#\!\\\_]+/ /g; $zb =~ s/[\'\#\!\\\_\~]+ / /g; $zb =~ s/ (AND|and|OR|or|\&) / /g; # 先頭と末尾につけた空白を抜く $zb =~ s/^[ \t]*//; $zb =~ s/[ \t\r\n]*$//; return split(/ +/,$zb); } } } return undef; } # ----------------------------------------------- # 文字列の表記を統一する # sub chr_normalize { local($za) = @_; $za =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("c",hex($1))/eg; if($za =~ /[^ -~]/) { require 'jcode.pl'; &jcode'convert(*za, 'sjis'); &jcode'h2z_sjis(*za); &jcode'convert(*za, 'euc'); &jcode'tr(*za, "A-z","A-z"); &jcode'tr(*za, "0-9","0-9"); &jcode'tr(*za, " ()[]/¥!?|"," ()[]/\\!?\|"); } return $za; } 1;