search:
code: Select all
#!/bin/bash
query=`echo $1 | ./urlencode`
pages=$2
pages=$((pages-1))
for i in `seq 0 $pages`; do
offset=$((i*10))
lynx -source "http://www.google.com/ie?q=$query&start=$offset" | grep NOBR | sed -e "s/.*HREF=//" | sed -e "s/>.*//" | urldecode
donecode: Select all
#!/bin/bash
query=$1
pages=$2
pages=$((pages-1))
for i in `seq 0 $pages`; do
offset=$((i*10))
lynx -source "http://www.google.com/ie?q=$query&start=$offset" | grep NOBR | sed -e "s/.*HREF=//" | sed -e "s/>.*//" | urldecode
donecode: Select all
:
##########################################################################
# Shellscript: urlencode - encode URL data
# Version : 1.2
# Author : Heiner Steven (heiner.steven@odn.de)
# Date : 2000-03-15
# Categories : File Conversion, WWW, CGI
# SCCS-Id. : @(#) urlencode 1.2 04/03/03
##########################################################################
# Description
# Encode data according to
# RFC 1738: "Uniform Resource Locators (URL)" and
# RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
#
# This encoding is used i.e. for the MIME type
# "application/x-www-form-urlencoded"
#
# Notes
# o The default behaviour is not to encode the line endings. This
# may not be what was intended, because the result will be
# multiple lines of output (which cannot be used in an URL or a
# HTTP "POST" request). If the desired output should be one
# line, use the "-l" option.
#
# o The "-l" option assumes, that the end-of-line is denoted by
# the character LF (ASCII 10). This is not true for Windows or
# Mac systems, where the end of a line is denoted by the two
# characters CR LF (ASCII 13 10).
# We use this for symmetry; data processed in the following way:
# cat | urlencode -l | urldecode -l
# should (and will) result in the original data
#
# o Large lines (or binary files) will break many AWK
# implementations. If you get the message
# awk: record `...' too long
# record number xxx
# consider using GNU AWK (gawk).
#
# o urlencode will always terminate it's output with an EOL
# character
#
# See also
# urldecode
##########################################################################
PN=`basename "$0"` # Program name
VER='1.2'
: ${AWK=awk}
Usage () {
echo >&2 "$PN - encode URL data, $VER
usage: $PN [-l] [file ...]
-l: encode line endings (result will be one line of output)
The default is to encode each input line on its own."
exit 1
}
Msg () {
for MsgLine
do echo "$PN: $MsgLine" >&2
done
}
Fatal () { Msg "$@"; exit 1; }
set -- `getopt hl "$@" 2>/dev/null` || Usage
[ $# -lt 1 ] && Usage # "getopt" detected an error
EncodeEOL=no
while [ $# -gt 0 ]
do
case "$1" in
-l) EncodeEOL=yes;;
--) shift; break;;
-h) Usage;;
-*) Usage;;
*) break;; # First file name
esac
shift
done
$AWK '
BEGIN {
# We assume an awk implementation that is just plain dumb.
# We will convert an character to its ASCII value with the
# table ord[], and produce two-digit hexadecimal output
# without the printf("%02X") feature.
EOL = "%0A" # "end of line" string (encoded)
split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
hextab [0] = 0
for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
}
{
encoded = ""
for ( i=1; i<=length ($0); ++i ) {
c = substr ($0, i, 1)
if ( c ~ /[a-zA-Z0-9.-]/ ) {
encoded = encoded c # safe character
} else if ( c == " " ) {
encoded = encoded "+" # special handling
} else {
# unsafe character, encode it as a two-digit hex-number
lo = ord [c] % 16
hi = int (ord [c] / 16);
encoded = encoded "%" hextab [hi] hextab [lo]
}
}
if ( EncodeEOL ) {
printf ("%s", encoded EOL)
} else {
print encoded
}
}
END {
#if ( EncodeEOL ) print ""
}
' "$@"code: Select all
:
##########################################################################
# Title : urldecode - decode URL data
# Author : Heiner Steven (heiner.steven@odn.de)
# Date : 2000-03-15
# Categories : File Conversion, WWW, CGI
# SCCS-Id. : @(#) urldecode 1.4 04/03/03
##########################################################################
# Description
# Decode data according to
# RFC 1738: "Uniform Resource Locators (URL)" and
# RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
# RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax"
#
# This encoding is used i.e. for the MIME type
# "application/x-www-form-urlencoded"
#
# Notes
# o The default behaviour is to decode each line independently of the
# other, and print the results on an own line. If the line
# endings are encoded, too (i.e. "%0A" or "%0D%0A"), use the
# "-l" option to prevent urldecode from adding additional line
# endings.
#
# o Large lines (or binary files) will break many AWK
# implementations. If you get the message like
# awk: record `%3A%0A%23%23%23%23%2...' too long
# consider using GNU AWK (gawk).
# If the input line was generated using "urlencode -l", try to
# omit the "-l" option, if applicable.
#
# See also
# urlencode
##########################################################################
PN=`basename "$0"` # Program name
VER='1.4'
: ${AWK:=awk}
Usage () {
echo >&2 "$PN - decode URL data, $VER
usage: $PN [-l] [file ...]
-l: single-line input (line endings are encoded)"
exit 1
}
Msg () {
for MsgLine
do echo "$PN: $MsgLine" >&2
done
}
Fatal () { Msg "$@"; exit 1; }
set -- `getopt hl "$@" 2>/dev/null` || Usage
[ $# -lt 1 ] && Usage # "getopt" detected an error
EncodedLF=no
while [ $# -gt 0 ]
do
case "$1" in
-l) EncodedLF=yes;;
--) shift; break;;
-h) Usage;;
-*) Usage;;
*) break;; # First file name
esac
shift
done
$AWK '
BEGIN {
hextab ["0"] = 0; hextab ["8"] = 8;
hextab ["1"] = 1; hextab ["9"] = 9;
hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10
hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11;
hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12;
hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13;
hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14;
hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15;
if ("'"$EncodedLF"'" == "yes") EncodedLF = 1; else EncodedLF = 0
}
{
decoded = ""
i = 1
len = length ($0)
while ( i <= len ) {
c = substr ($0, i, 1)
if ( c == "%" ) {
if ( i+2 <= len ) {
c1 = substr ($0, i+1, 1)
c2 = substr ($0, i+2, 1)
if ( hextab [c1] == "" || hextab [c2] == "" ) {
print "WARNING: invalid hex encoding: %" c1 c2 | \
"cat >&2"
} else {
code = 0 + hextab [c1] * 16 + hextab [c2] + 0
#print "\ncode=", code
c = sprintf ("%c", code)
i = i + 2
}
} else {
print "WARNING: invalid % encoding: " substr ($0, i, len - i)
}
} else if ( c == "+" ) { # special handling: "+" means " "
c = " "
}
decoded = decoded c
++i
}
if ( EncodedLF ) {
printf "%s", decoded # no line newline on output
} else {
print decoded
}
}
' "$@"Stick them all in one directory to use, and chmod a+x * them.