#!/usr/bin/perl -w # $Id: edgeproxy,v 1.18 2006-03-13 14:05:49 goodell Exp $ $license = < \$REMOTE, "local=s" => \$LOCAL, "service=s" => \$SERVICE, "debug=s" => \$DEBUG, "prefix=s" => \$PREFIX, ) or die < 'tcp', Reuse => 1, Listen => SOMAXCONN, ); push @proxy_server_config, LocalPort => $SERVICE if $SERVICE; push @proxy_server_config, LocalAddr => $LOCAL if $LOCAL; $proxy_server = IO::Socket::INET->new(@proxy_server_config) or die "can't create proxy server: $@"; log_info(1, "[proxy server on " . ($LOCAL || $SERVICE) . " initialized]"); } sub service_clients { my ( $local_client, # someone internal wanting out $lc_info, # local client's name/port information $remote_server, # the socket for escaping out @rs_config, # temp array for remote socket options $rs_info, # remote server's name/port information $kidpid, # spawned child for each connection ); $SIG{CHLD} = \&REAPER; # harvest the moribund accepting(); # an accepted connection here means someone inside wants out while (1) { while ($local_client = $proxy_server->accept()) { $lc_info = peerinfo($local_client); set_state("servicing local $lc_info"); log_info(1, "[received connect from $lc_info]"); @rs_config = ( Proto => 'tcp', PeerAddr => $REMOTE, ); push(@rs_config, PeerPort => $SERVICE) if $SERVICE; log_info(1, "[connecting to $REMOTE]"); set_state("connecting to $REMOTE"); # see below $remote_server = IO::Socket::INET->new(@rs_config); if(not defined $remote_server) { next; } $rs_info = peerinfo($remote_server); set_state("connected to $rs_info"); $kidpid = fork(); die "Cannot fork" unless defined $kidpid; if ($kidpid) { $Children{$kidpid} = time(); # remember his start time close $remote_server; # no use to master close $local_client; # likewise next; # go get another client } # at this point, we are the forked child process dedicated # to the incoming client. but we want a twin to make i/o # easier. close $proxy_server; # no use to slave pipe READER, WRITER; $kidpid = fork(); die "Cannot fork" unless defined $kidpid; # now each twin sits around and ferries lines of data. # see how simple the algorithm is when you can have # multiple threads of control? # this is the fork's parent, the master's child if ($kidpid) { close WRITER; my $proxypath = ""; my $reverseproxy = undef; my $serverhost = ; my $router = ; chomp $serverhost; if($router) { chomp $router; } else { $router = ""; } if($router =~ /\+(\S+)$/) { $proxypath = "/proxy/"; $reverseproxy = $proxypath . "http://$1"; $router =~ s/\+\S+$//; log_info(1, "reverse proxy: $reverseproxy"); } $router = undef if $router eq ""; set_state("$rs_info --> $lc_info"); select($local_client); $| = 1; # Perform substitution for A, IMG, and LINK tags in HTML documents my $html = undef; my $length = undef; my $content = ""; my $headers = ""; my $data = ""; my $type = ""; while(<$remote_server>) { log_info(1, " recv: $_"); if(/^Content-Type: (\S+?)(;.*)?\r$/i) { $type = $1; $html = $type if $type =~ /^text\/html$/; $headers .= $_; } elsif(/^Content-Length: (\S+)\r$/i) { $length = $1; $headers .= $_; } elsif(/^Location: (https?:\/\/)([A-Za-z0-9.-]+)(:[0-9]+)?(\/?)(.*?)\r/i) { my ($pre, $host, $port, $post, $rest) = ($1, $2, $3, $4, $5); $port = "" if not $port; my $before = "$pre$host$port$post"; $host = append_exit($host, $router) if $router; if($PREFIX and $PREFIX ne "$host$port") { $proxypath = "http://$PREFIX/"; $pre = "$proxypath$pre"; } elsif($reverseproxy) { $pre = "$proxypath$pre"; } my $after = "$pre$host$port$post"; log_info(2, "converting: Location: $before --> Location: $after"); $headers .= "Location: $after$rest\r\n"; } elsif(/^Location: \/(.*)\r$/) { my $before = "/$1"; if($PREFIX) { $reverseproxy = "/http://$serverhost"; } if($reverseproxy) { my $after = "$reverseproxy$before"; log_info(2, "converting: Location: $before --> Location: $after"); $headers .= "Location: $after\n"; } } elsif(/^\r$/) { $headers .= $_; last; } else { $headers .= $_; } } if($length) { log_info(0, " data: $type [$length]"); read($remote_server, $data, $length) or die " error: $!"; } else { # chunked transfer coding # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 if($type) { log_info(0, " data: $type [chunked]"); } else { log_info(0, " data: unspecified type"); } while(<$remote_server>) { my ($d, $length) = ("", 0); ($length = $_) =~ s/;.*$//; $length =~ s/\r$//; chomp $length; $length = hex $length; last unless $length; while(length $d < $length) { $d .= <$remote_server>; }; $d =~ s/\r$//; chomp $d; $data .= $d; } $data .= "\r\n"; } log_info(1, sprintf " data: received %d bytes", length $data); if($html) { log_info(0, " data: $type (recognized as HTML)"); my $next = ""; foreach my $line (split /\n/, $data) { unless($router or $reverseproxy or $PREFIX) { $content .= $line; next; } my $space = ""; $space = " " if length $next; $line = "$next$space$line"; chomp $line; $next = ""; while($line) { if($line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)(https?:\/\/)([A-Za-z0-9.-]+)(:[0-9]+)?(\/|\")(.*)$/i) { my ($tag, $attr, $label, $quote, $pre, $host, $port, $post, $rest) = ($1, $2, $3, $4, $5, $6, $7, $8, $9); $port = "" if not $port; my $before = "$tag$attr$label=$quote$pre$host$port"; # normalize $tag =~ y/A-Z/a-z/; $host =~ y/A-Z/a-z/; if($router) { $host = append_exit($host, $router); } if($PREFIX and $PREFIX ne "$host$port") { $proxypath = "http://$PREFIX/"; } my $after = "$tag$attr$label=$quote$proxypath$pre$host$port"; log_info(2, "converting: <$before --> <$after"); $content .= "<$after"; $line = "$post$rest"; } elsif($line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)\/(.*)$/i) { my $after; my ($tag, $attr, $label, $quote, $rest) = ($1, $2, $3, $4, $5); my $before = "$tag$attr$label=$quote/"; # normalize $tag =~ y/A-Z/a-z/; if($PREFIX) { $reverseproxy = "/http://$serverhost"; } if($reverseproxy) { $after = "$tag$attr$label=$quote$reverseproxy/"; log_info(2, "converting: <$before --> <$after"); $content .= "<$after"; $line = $rest; } } elsif($line =~ /^(<.*?>)(.*)$/) { $content .= $1; $line = $2; } elsif($line =~ /^(<.*)$/) { $next = $1; $line = undef; } elsif($line =~ /^(.*?)(<.*)$/) { $content .= $1; $line = $2; } else { $content .= $line; $line = undef; } } $content .= "\n"; } # explicitly set Content-Length my @headerlines = split /\n/, $headers; my @newheaders = (); foreach my $headerline (@headerlines) { next if $headerline =~ /^Transfer-Encoding: chunked/; unless($headerline =~ /^Content-Length: (\S+)\r$/i) { push @newheaders, $headerline if length $headerline > 2; } } push @newheaders, sprintf "Content-Length: %d", length $content; $headers = join "\n", @newheaders; print "$headers\n\n$content"; } else { print "$headers$data"; } kill('TERM', $kidpid); # kill my twin cause we're done } # this is the fork's child, the master's grandchild else { close READER; set_state("$rs_info <-- $lc_info"); select($remote_server); $| = 1; # Perform HTTP Host field substitution my $post = 1; while($post) { my $length = 0; my $reverseproxy = undef; $post = undef; while(<$local_client>) { if(/^Content-Length: (\S+)\r$/i) { $length = $1; print; log_info(1, " send: $_"); } elsif(/^Accept-Encoding: /i) { log_info(1, " suppress: $_"); } elsif(/^Host: (\S+)\r$/i) { my $h = $1; my $router = ""; my $repl = $reverseproxy || $1; if($repl =~ /\.[A-Za-z0-9-]+\.exit(:[0-9]+)?$/) { $repl =~ s/\.([A-Za-z0-9-]+\.exit)((:[0-9]+)?)$/$2/; $router = $1; } if($repl =~ /\.[cq]\.[A-Za-z0-9-.]+\.blossom(:[0-9]+)?$/) { $repl =~ s/\.([cq]\.[A-Za-z0-9-.]+\.blossom)((:[0-9]+)?)$/$2/; $router = $1; } log_info(0, "transmitting router: [$router]"); $router .= "+$reverseproxy" if $reverseproxy; print WRITER "$h\n$router\n"; close WRITER; $repl = "Host: $repl\r\n"; log_info(1, " send: $repl"); print $repl; } elsif(/^(GET|POST) /) { my $line = $_; my $type = $1; if($line =~ /^$type \/http:\/\/([^\/]+)\//) { $reverseproxy = $1; $line =~ s/^$type \//$type /; } print $line; log_info(0, " send: $line"); $post = 1 if $type eq "POST"; } elsif(/^\r$/) { print; last if $post; } else { print; log_info(1, " send: $_"); } } if($post) { my $data = ""; read($local_client, $data, $length) or die " error: $?"; print "$data"; log_info(1, " post: [$length]"); log_info(2, " data: $data"); } } kill('TERM', getppid()); # kill my twin cause we're done } exit; # whoever's still alive bites it } } } # helper function to produce a nice string in the form HOST:PORT sub peerinfo { my $sock = shift; my $hostinfo = undef; my ($peeraddr, $peerport) = ("*", "*"); if($sock->peeraddr) { $hostinfo = gethostbyaddr($sock->peeraddr); if($hostinfo and $hostinfo->name) { $peeraddr = $hostinfo->name; } elsif($hostinfo and $sock->peerhost) { $peeraddr = $sock->peerhost; } elsif($sock->peeraddr and length($sock->peeraddr) == 4) { $peeraddr = inet_ntoa($sock->peeraddr); } } $peerport = $sock->peerport if $sock->peerport; return sprintf "%s:%s", $peeraddr, $peerport; } # reset our $0, which on some systems make "ps" report # something interesting: the string we set $0 to! sub set_state { $0 = "$ME [@_]" } # helper function to call set_state sub accepting { set_state("accepting proxy for " . ($REMOTE || $SERVICE)); } # somebody just died. keep harvesting the dead until # we run out of them. check how long they ran. sub REAPER { my $child; my $start; while (($child = waitpid(-1,WNOHANG)) > 0) { if ($start = $Children{$child}) { my $runtime = time() - $start; my $line = sprintf "process $child completed in %dm %ss\n", $runtime / 60, $runtime % 60; chomp $line; log_info(1, $line); delete $Children{$child}; } else { log_info(1, "process $child exited [$?]"); } } # If I had to choose between System V and 4.2, I'd resign. --Peter Honeyman $SIG{CHLD} = \&REAPER; };