#!/usr/local/bin/perl ############################################################ # HTTP-LIB.PL # # Change history: # 14jun98 P Gargano proxy and HEAD support, code rationalise # 15may96 G Birznieks created # # This script was originally written by Gunther Birznieks. # # You may copy this under the terms of the GNU General Public # License or the Artistic License which is distributed with # copies of Perl v5.x for UNIX. # # Purpose: Provides a set of library routines to connect as # a browser to another HTTP site and then return the results to # the caller. # # Main Procedures: # connectHTTP - GETs, POSTs or HEADs # # Set the $http_os variable equal to NT if you are on Windows NT perl # Set it to UNIX for normal UNIX operations. # # If you do not have a version of PERL with the Socket.pm, you # can manually define $AF_INET and $SOCK_STREAM to 2 and 1 respectively. # On some systems, SOCK_STREAM may be 2. # ############################################################ # Use the Sockets library for TCP/IP Communications use Socket; $http_os = 'NT'; # set to 'UNIX' for non NT systems $httpBuf = ''; $httpHead = ''; # Header set when error (non 200 status) $httpErrorCode = 0; $proxyHost = ''; # no proxying if eq '' $proxyPort = 80; # default port is 80 $proxyMod = ''; # holds 'If-Modified-Since' info ############################################################ # # subroutine: connectHTTP # Usage: # $buf = &connectHTTP('GET', 'www.eff.org', '/index.html', 80, $vars); # $vars = 'var1=value1&var2=value2&etc=etc'; # # $buf = &connectHTTP ($method, $hostname, $port, $path, $vars); # # Parameters: # $method = 'GET', 'POST', or 'HEAD' # $hostname = HostName of WebSite to Connect To # $port = port to connect to (normally 80) # $path = relative path or full URL (for proxy operation) # $vars = Form Values To Mimic sending to the site # # Output: # $buf = Output from the Web Server. # ############################################################ sub HTTPPost { local ($url, $hostname, $port, $in) = @_; return ConnectHTTP ('POST', $hostname, $url, $port, $in); } sub HTTPGet { local ($url, $hostname, $port, $in) = @_; return ConnectHTTP ('GET', $hostname, $port, $url, $in); } sub HTTPHead { local ($url, $hostname, $port, $in) = @_; return ConnectHTTP ('HEAD', $hostname, $port, $url, $in); } sub HTTPProxy { local ($host, $port) = @_; $proxyHost = $host; $proxyPort = $port; } sub ConnectHTTP { local ($method, $hostname, $port, $path, $vars) = @_; local ($x, $socket); local ($varsLength, $postHeader, $modHeader); $socket = &OpenSocket ($hostname, $port); $vars =~ s/ /%20/g; # should expand this for all values that need massaging $varsLength = length($vars); # form variables $postHeader = ''; $postVars = ''; if ($varsLength) { if ($method eq 'GET') { # data is passed in URL $path .= '?' . $vars; # add ?name=var&... variables } elsif ($method eq 'POST') { # data pased after HTTP header $postHeader = "Content-type: application/x-www-form-urlencoded\r\nContent-length: $varsLength\r\n"; $postVars = $vars; } } # for proxy operation the path is made into a full URL and the # variable $proxyHost and $proxyPort hold the proxy parameters # the RetrieveHTTP subroutine handles the proxy details if ($proxyHost ne '') { $path = "http://$hostname:$port$path"; } if ($proxyMod ne '') { $modHeader = "If-Modified-Since: $proxyMod\r\n"; } # Send HTTP request to server/proxy. $reqTxt = "$method $path HTTP/1.0\r\nAccept: */*\r\nUser-Agent: Tech-Edgilla/0.0 (Perl Win32)\r\n$modHeader$postHeader\r\n$postVars"; print $socket $reqTxt; ##print "~" . $reqTxt . "\n"; #--debug-- $httpErrorCode = &RetrieveHTTP ($socket); # Retrieve HTTP data from server/proxy. return $httpBuf; } #ConnectHTTP ############################################################ # # subroutine: RetrieveHTTP # Usage: # $buf = &RetrieveHTTP($socket_handle); # # Parameters: # $socket = Handle to the socket we are communicating # with # # Output: # $buf = Buffer containing the output of the HTTP Server # ############################################################ sub RetrieveHTTP { local ($socket) = @_; local ($splitIndex, $splitLength); GetSocketData ($socket, 6); # if the buffer has a status code of 200 in it, # then we know its safe to read in the rest of the document if ($httpBuf =~ /200/) { while(<$socket>) { $httpBuf .= $_; } } # The HTTP header is delimited by either # 1. two newlines carriage return pairs (as per RFC 1945) # 2. two newlines (in contravention of RFC 1945) # Different Web Servers use different delimiters, sometimes. $splitIndex = index($httpBuf, "\r\n\r\n"); $splitLength = 4; if ($splitIndex == -1) { $splitIndex = index($httpBuf, "\n\n"); $splitLength = 2; } else { $httpHead = substr($httpBuf, 0, $splitIndex); $httpBuf = substr($httpBuf, $splitIndex + $splitLength); } if ($httpHead =~ /200/) { # status 200 is OK return 0; # Okay } # error, return HTTP error header in buffer $splitIndex = index($httpHead, "\r"); $httpBuf = substr($httpHead, 0, $splitIndex); # return ASCII error code from header return 1; # error } # End of RetrieveHTTP ############################################################ # # subroutine: OpenSocket # Usage: # $socket_handle = &OpenSocket($host, $port); # # Parameters: # $host = host name to connect to # $port = port to connect to # # Output: # Handle to socket that was opened # ############################################################ sub OpenSocket { local($hostname, $port) = @_; local($ipaddress, $fullipaddress, $packconnectip); local($packthishostip); local($AF_INET, $SOCK_STREAM, $SOCK_ADDR); local($PROTOCOL, $HTTP_PORT); # The following variables are set using values defined in # The sockets.pm library. If your version of perl (v4) does # not have the sockets library, you can substitute some # default values such as 2 for AF_INIT, and 1 for SOCK_STREAM. # if 1 does not work for SOCK_STREAM, try using 2. # AF_INET defines the internet class of addressing # # SOCK_STREAM is a variable telling the program to use # a socket connection. This varies from using SOCK_DGRAM # which would send UDP datagrams using a connectionless paradigm # instead. # # PROTOCOL is TCPIP (6). # # PORT Should generally be 80 for HTTP Access, some sites use # alternative ports such as 8080. # # SOCK_ADDR is the packeted format of the full socket address # including the AF_INIT value, HTTP_PORT, and IP ADDRESS in that order # $AF_INET = AF_INET; $SOCK_STREAM = SOCK_STREAM; $SOCK_ADDR = 'S n a4 x8'; # The following routines get the protocol information $HTTP_PORT = ($proxyPort) ? $proxyPort : $port; $PROTOCOL = (getprotobyname('tcp'))[2]; $HTTP_PORT = 80 unless ($HTTP_PORT =~ /^\d+$/); $PROTOCOL = 6 unless ($PROTOCOL =~ /^\d+$/); # Ip address is the Address of the host that we need to connect # to $requestHost = ($proxyHost) ? $proxyHost : $hostname; if ($requestHost =~ m/^\d+\.\d+\.\d+\.\d+$/) { # if n1.n2.n3.n4 $ipaddress = pack('C4', split(/\./, $requestHost)); $fullipaddress = $requestHost; } else { # name is in form 'host.name.com' $ipaddress = (gethostbyname ($requestHost))[4]; $fullipaddress = join('.', unpack('C4', $ipaddress)); } ## print "~info :\tSOCK_ADDR=$SOCK_ADDR\n\tAF_INET=$AF_INET\n\tPort=$HTTP_PORT\n\tIP=$fullipaddress\n"; #--debug-- $packConnectIP = pack($SOCK_ADDR, $AF_INET, $HTTP_PORT, $ipaddress); $packThisHostIP = pack($SOCK_ADDR, $AF_INET, 0, "\0\0\0\0"); # First we allocate the socket socket (S, $AF_INET, $SOCK_STREAM, $PROTOCOL) || &web_error( "Can't make socket:$!\n"); # Then we bind the socket to the local host bind (S, $packThisHostIP) || &web_error( "Can't bind:$!\n"); # Then we connect the socket to the remote host connect (S, $packConnectIP) || &web_error( "Can't connect socket: $!\n"); # selects the socket handle select (S); $| = 1; # and turn off output buffering select (STDOUT); return S; # socket } # End of OpenSocket ############################################################ # # subroutine: GetSocketData # Usage: # &GetSocketDataet(SOCKET_HANDLE, $timeout); # # Parameters: # SOCKET_HANDLE = Handle to an allocated Socket # $timeout = amount of time GetSocketData is allowed to # wait for input before timing out # (measured in seconds) # # Output: # $httpBuf contains what was read from the socket # ############################################################ sub GetSocketData { local ($handle, $endtime) = @_; local ($localbuf); local ($rin, $rout, $nfound); $endtime += time; # Set endtime to be time + endtime. $httpBuf = ''; # Clear buffer $rin = ''; # Clear $rin (Read Input variable) # Set $rin to be a vector of the socket file handle vec($rin, fileno($handle), 1) = 1; # nfound is 0 since we have not read anything yet $nfound = 0; # Loop until we time out or something was read GetSocketDataet: while (($endtime > time) && ($nfound <= 0)) { # Read 1024 bytes at a time $length = 1024; # Preallocate buffer $localbuf = ' ' x 1025; # NT does not support select for polling to see if # There are characters to be received. This is important # Because we dont want to block if there is nothing # being received. $nfound = 1; if ($http_os ne 'NT') { # The following polls to see if there is anything in the input # buffer to read. If there is, we will later call the sysread routine $nfound = select($rout=$rin, undef, undef, .2); } } # If we found something in the read socket, we should # get it using sysread. if ($nfound > 0) { $length = sysread($handle, $localbuf, 1024); if ($length > 0) { $httpBuf .= $localbuf; } } } ############################################################ # # subroutine: web_error # Usage: # &web_error("File xxx could not be opened"); # # Parameters: # $error = Description of Web Error # # Output: # None # ############################################################ sub web_error { local ($error) = @_; $error = "Error Occured: $error"; print "$error\n"; # Die exits the program prematurely and prints an error to # stderr die $error; } # end of web_error 1;