#!/usr/bin/env perl # # -- Copy files from an http URL to STDOUT # # Syntax: httpcat http://hostname/filepath # # This script copies a file from a given URL by opening port 80 (or a # port given in the URL) on the named host and giving a GET request # to the httpd. It's very simple, so it can be used in # non-interactive scripts. It works for binary files too. # # Our motto: "Browser? We don't need no stinkin' browser!" # # Thanks to Larry Wall and Randal L. Schwartz for the useful examples # in their book "Programming Perl" (O'Reilly, 1991), which made this # so easy to implement. # # Here is one simple example of how you might use this: # # httpcat http://www.kgw.com/images/skycam/skycam-8-75.jpg > /tmp/salem.jpg # xv -max -root -quit /tmp/salem.jpg # # Given what happened to Randal Schwartz, those guys probably need # to be watched. Speaking of that... # # This is Shareware. If you use it, please send at least $10 to the # Legal Defense Fund for Randal Schwartz (payable to "Stonehenge") at # Stonehenge Consulting Services, attn: Legal Defense Fund # 4470 SW Hall Suite 107 , Beaverton, Oregon 97005-2122 USA # See http://www.lightlink.com/fors/ for more information. # # This version has been updated to send a simple HTTP/1.1 request # (not just an implied HTTP/0.9 GET) and to receive simple HTTP/1.1 # replies. # # (C) Copyright 1997-1999 by Eric Myers # Department of Physics, University of Michigan, Ann Arbor, MI # @(#) $Id: httpcat,v 2.8 2004/11/24 01:49:14 myers Exp myers $ #======================================================================* chop($prog=`basename $0`); # this program name (probably httpcat) $port = 80; # default port for httpd (usually 80) $HTTP = 1.1; # default HTTP protocol level die "usage: $prog http://host.domain/directory/path/to/filename\n" unless @ARGV; ($URL) = @ARGV; # info about this script and system, for the User-Agent: ($rcsversion) = '$Revision: 2.8 $' =~ m/\s+(\d*\.\d*)/; chop($uname= `uname`); ################ ## Parse the URL for hostname and filepath # http://host/filepath ( $host, $filepath ) = $URL =~ m!http://([-\w\.:]+)(/\S*)$!; # Allow the http: protocol indication to be omitted ( $host, $filepath ) = $URL =~ m!//([-\w\.:]+)(/\S*)$! unless $host; # Allow rcp syntax host:filepath but no alternate port ( $host, $filepath ) = $URL =~ m!([-\w\.]+):(\S+)$! unless $host; # Extract any port number from host field $Host = $host; # save host:port first ($newhost, $newport ) = $host =~ m/^([-\w\.]+):(\d+)/; $port = $newport if $newport; $host = $newhost if $newhost; # make sure filepath begins with / ($filepath =~ m!^/! ) || ( $filepath = "/" . $filepath ); die "$prog: incomplete URL (no host name).\n" unless $host; die "$prog: incomplete URL (no file name).\n" unless $filepath; ## Get full remote host name (and address) ($remotehost, $aliasses, $addrtype, $length, $rmtaddr) = gethostbyname($host); die "$prog: unknown host $host.\n" unless $remotehost; ################### # Socket Plumbing: $AF_INET = 2; $SOCK_STREAM = 1; $sockaddr = 'S n a4 x8'; ## Make the socket filehandle ($name,$aliases,$proto) = getprotobyname('tcp'); ($name,$aliases,$port) = getservbyname($port,'tcp') unless $port =~ /^\d+$/; ( socket(S, $AF_INET, $SOCK_STREAM, $proto) ) || ( die "socket error: $!" ); ## Bind the socket to local address chop($localhost = `hostname`); ($name, $aliasses, $addrtype, $length, $lcladdr) = gethostbyname($localhost); $lclsock = pack($sockaddr, $AF_INET, 0, $lcladdr); ( bind(S, $lclsock) ) || ( die "bind error: $!" ); ## Connect to httpd server at remote address $rmtsock = pack($sockaddr, $AF_INET, $port, $rmtaddr); ( connect(S, $rmtsock) ) || ( die "connect error: $!" ); ## Set the socket to be command buffered (line by line buffering) select(S); $| = 1; select(STDOUT); ## No need to fork here (I hope). Just give the commands ## then read response from the socket until done. ################### ## Send GET request, collect reply if ( $HTTP >= 1.0 ) { print S "GET $filepath HTTP/$HTTP\n"; print S "Host: $host\n"; print S "User-Agent: $prog/$rcsversion (Unix;; $uname)\n"; print S "\n"; } else { print S "GET $filepath \n"; } # First line back should be response, with status code $status = undef; $line = ; ($httpver, $status, $phrase) = $line =~ m/(\S+)\s+(\d\d\d)\s+(.*)$/; # If status was set then assume a Full-Response, so skip headers # Save content-length &tc. for possible future use. if ( $status ) { while() { ($Length) = /Content-length:\s*(\d+)/i unless $Length; ($Server) = /Server:\s(.*)/i unless $Server; last if ( /^\s*$/ ); # blank line ends headers } # Unless the status code was 200 we didn't get the file, so quit if ( $status != 200 ) { print STDERR "$prog: errror $status - $phrase\n"; $e2 = int($status/100 ) * 100; $errno = $status - $e2 + ($e2/10); exit $errno; } } # If no status then this is assumed raw HTTP/0.9 response (body) # and so the first line is part of that body if ( ! $status ) { print $line; } # read and echo the body (or rest of it) $body_cnt = 0; while() { print; $body_cnt += length(); last if ( $body_cnt >= $Length); } close(S); exit 0;