Security warning: Please don't use any version older than 1.11!

The gzip_cnc program

License agreement for gzip_cnc

gzip_cnc is free software. Bug reports and feature suggestions are always welcome.

The exact conditions for usage and redistribution of gzip_cnc are described in the Artistic License.

Copyright holder in the meaning of this license contract is Michael Schröpl.

Source code of gzip_cnc

#!/usr/bin/perl

##################################################################
### gzip_cnc - an Apache CGI handler to deliver gzipped static ###
### content via Content Negotiation, using its own file cache  ###
##################################################################
# by Michael Schröpl (http://www.schroepl.net/projekte/gzip_cnc/)

# =====================================================================
# 'enforce good coding style'
  use strict;
# =====================================================================



#############################
### configuration section ###
#############################

# =====================================================================
# (integer) the compression level to be used (values: [0-9]),
#           regardless whether we use system command or Perl zlib API
# 0 = largest  file (=worst compression), but faster (=less CPU load)
# 9 = smallest file (=best  compression), but slower (=more CPU load)
  my $gzip_quality           = 9;
# (as we will compress each file only once we will use the best quality)
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_QUALITY'}))
     {$gzip_quality          = $ENV{'GZIP_CNC_QUALITY'};}
# ---------------------------------------------------------------------
# find out whether we are able to use a Perl module for compression
  my $use_zlib               = 0;
#
# try to import the 'Compress::Zlib' module ...
  eval "use Compress::Zlib;";
# ... did it work? (has 'eval' set its error variable?)
  if (! $@) { $use_zlib = 1; }
# ---------------------------------------------------------------------
# if we don't have 'Compress::Zlib' available we need to use the 'gzip'
# command which we will invoke via 'system()' call but need to know how
# to exactly do this:
#
# (string) the pathname of some UNIX 'gzip' command capable of compres-
#          sing a file content using the GZIP algorithm
  my $gzip_path              = '/usr/bin/gzip';
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_PROGRAM'}))
     {$gzip_path             = $ENV{'GZIP_CNC_PROGRAM'};}
#
# compose UNIX commandline options
  my $gzip_options           = "-c -n -$gzip_quality";
# '-c' = "send compressed data to stdout, don't change original file"
# '-n' = "don't store original file name inside compressed file"
#
# these settings MAY be obsolete, as we will prefer to use the Perl
# module if we are able to do so, to avoid starting a separate process
# for compressing. we will find out this at runtime, to make this code
# work in as many installations as possible.
# ---------------------------------------------------------------------
# (string) the pathname of the root directory of the cache tree
#          where we have to store the gzip-compressed document versions
  my $cache_directory        = '';
# (setting this to an empty string defaults to '/.gzip_cnc_cache'
#  inside your domain's DOCUMENT_ROOT, which might not be the best
#  choice but at least is very likely to work for you.
#  note that in this case the cache tree will be part of your URL tree
#  and count towards your web space usage limit!)
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_CACHE'}))
     {$cache_directory       = $ENV{'GZIP_CNC_CACHE'};}
# ---------------------------------------------------------------------
# (string) the pathname of the log file reporting about all activities
#          from this program
  my $logfile_path           = '';
# (setting this to an empty string disables the logging feature)
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_LOGFILE'}))
     {$logfile_path          = $ENV{'GZIP_CNC_LOGFILE'};}
# ---------------------------------------------------------------------
# (string) the absolute URL or file name of our own Error404 handler
  my $error404_handler       = '';
# (setting this to an empty string enables our own tiny 404 error page)
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_404_HANDLER'}))
     {$error404_handler      = $ENV{'GZIP_CNC_404_HANDLER'};}
# check whether it is a file or an URL
  my $error404_is_url        = ($error404_handler =~ /^http:/ ? 1 : 0);
# ---------------------------------------------------------------------
# (string) the MIME type of the documents we are serving
  my $mime_type              = 'text/html';
# (this value must be set correctly, as an Apache handler embedded via
#  the 'Action:' interface has no access to the Apache configuration)
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_MIMETYPE'}))
     {$mime_type             = $ENV{'GZIP_CNC_MIMETYPE'};}
# ---------------------------------------------------------------------
# (flag)   send additional (proprietary) HTTP headers?
# ('0' or empty string is 'No', every other value is 'Yes')
  my $send_own_headers       = 1;
# (if set to 'Yes' we will send our own program name and version, and
#  also a header containing the size of the original uncompressed file
#  content. this can be helpful for debugging purposes, and be used for
#  bandwidth savings benchmarks by an intelligent client software.)
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_OWNHEADERS'}))
     {$send_own_headers      = $ENV{'GZIP_CNC_OWNHEADERS'};}
# ---------------------------------------------------------------------
# (flag)   enable self-testing mode for this script?
# ('0' or empty string is 'No', every other value is 'Yes')
  my $enable_self_test_mode  = 1;
  if (defined ($ENV{'GZIP_CNC_SELFTEST'}))
     {$enable_self_test_mode = $ENV{'GZIP_CNC_SELFTEST'};}
# (if set to 'Yes' the script will display selftest messages in the
#  browser if invoked directly via URL; it will then tell about real
#  absolute path names on your server, therefore you may want to
#  disable this feature here, e. g. after successful configuration)
#
# override this value by environment variable
# ---------------------------------------------------------------------
# (number) of seconds while the served page should be cached by the
#          client without asking for its content to be validated
# this has nothing to do with gzipping directly, but we are about to
# save bandwidth, and the cheapest HTTP requests are those that are not
# sent at all ...
  my $cache_expire_seconds   = 86400;
# (setting this to zero disables this feature)
#
# override this value by environment variable
  if (defined ($ENV{'GZIP_CNC_EXPIRES'}))
     {$cache_expire_seconds  = $ENV{'GZIP_CNC_EXPIRES'};}
# =====================================================================
# (you shouldn't normally have to change anything below this line!)



########################
### global variables ###
########################

# =====================================================================
# HTTP standard line separator
  my $crlf                    = "\015\012";
# ---------------------------------------------------------------------
# URL for the requested file
  my $url                     = '';
# ---------------------------------------------------------------------
# path names for requested and gzipped file
  my $pathname_requested      = '';
  my $pathname_gzipped        = '';
  my $pathname_to_serve       = '';
# ---------------------------------------------------------------------
# are we able to use our compressed cache content?
  my $use_cache_content       = 0;
  my $cache_status            = '01:OKAY';
# ---------------------------------------------------------------------
# file attributes for requested and gzipped file
  my $file_size_uncompressed  = undef;
  my $file_size_compressed    = undef;
  my $file_age_uncompressed   = undef;
  my $file_age_compressed     = undef;
# ---------------------------------------------------------------------
# strings for date formatting in HTTP headers according to RFC 822
  my @rfc822_weekdays         = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
  my @rfc822_months           = ('Jan','Feb','Mar','Apr','May','Jun',
                                 'Jul','Aug','Sep','Oct','Nov','Dec');
# ---------------------------------------------------------------------
# program version identifiers
  my $program_version         = '1.11';
  my $program_date            = '2002-09-05';
# ---------------------------------------------------------------------
# cache root directory default value, if none has been specified
  my $cache_default_directory = $ENV{'DOCUMENT_ROOT'} . '.gzip_cnc_cache';
# =====================================================================



##############################################################
### function: format a date/time value according to RFC822 ###
##############################################################

# =====================================================================
  sub date_rfc822 ($)
  {
    # =================================================================
    # take parameter value
      my ($time_stamp) = @_;
    # =================================================================
    # split time value into Greenwich time
      my ($sec,$min,$hour,$mday,$mon,$year,$week_day,$yday,$dst)
         = gmtime ($time_stamp);
    # -----------------------------------------------------------------
    # create a RFC822 compatible time string
      return sprintf ('%s,' . ' %d %s %d %02d:%02d:%02d GMT',
                      $rfc822_weekdays[$week_day], $mday,
                      $rfc822_months[$mon],    $year+1900,
                      $hour, $min, $sec)
    # =================================================================
  }
# =====================================================================



#########################################
### function: create a directory path ###
#########################################

# =====================================================================
  sub make_path ($)
  {
    # =================================================================
    # take parameter values
      my ($path_name) = @_;
    # =================================================================
    # separate between prefix an last directory name
      my ($prefix, $dir_name) = ('', $path_name);
      if ($path_name =~ /^(.*)\/([^\/]+)$/)
         { ($prefix, $dir_name) = ($1, $2); }
    # -----------------------------------------------------------------
    # does the prefix already exist?
      if (! -d $prefix)
         {
           # ----------------------------------------------------------
           # try to recursively create this one first ... did it work?
             if (make_path ($prefix) != 0) { return; }
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # now create the last directory of this path ... did it work?
      if (mkdir ($path_name, 0755) != 0) { return; }
    # (give write access to user only)
    # -----------------------------------------------------------------
    # everything's fine
      return 0;
    # =================================================================
  }
# =====================================================================



#################################################################
### function: write a log file entry and terminate processing ###
#################################################################
# we want to know what happened with our pages ...

# =====================================================================
  sub terminate ($$)
  {
    # -----------------------------------------------------------------
    # do we really want the logging feature?
      if (! $logfile_path) { exit (0); }
    # =================================================================
    # take parameter value
      my ($status_code, $path_name) = @_;
    # =================================================================
    # calculate saved volume
      my $saved_volume = (  $use_cache_content
                          ? (  $file_size_uncompressed
                             - $file_size_compressed)
                          : 0);
    # -----------------------------------------------------------------
    # compute printable date and time
      my ($sec,$min,$hour,$mday,$mon,$year,$week_day,$yday,$dst) =
         localtime (time());
      my $date_format = '%04d-%02d-%02d_%02d:%02d:%02d';
    # -----------------------------------------------------------------
    # calculate the total computing time
      my @cpu_time = times();
    # -----------------------------------------------------------------
    # calculate savings rate
      my $savings_rate = ($file_size_uncompressed > 0
                       ? (100 * $saved_volume) / $file_size_uncompressed
                       : '0');
    # -----------------------------------------------------------------
    # check whether the log file already exists
      if (! -f $logfile_path)
         {
           # ----------------------------------------------------------
           # split path name into directory and filename part
             if ($logfile_path !~ /^(.*)\/([^\/]+)$/) { return; }
             my $log_dir = $1;
           # ----------------------------------------------------------
           # check whether the directory for the log file already exists
             if (! -d $log_dir)
                {
                  # ---------------------------------------------------
                  # so let's try to create this directory
                    if (make_path ($log_dir) != 0)
                       {
                         # --------------------------------------------
                         # we are not able to open a log file here ...
                           return;
                         # --------------------------------------------
                       }
                  # ---------------------------------------------------
                }
           # (at this point we hope to be able to write a logfile entry)
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # open log file in extend mode
      if (open (LOG, ">>$logfile_path"))
         {
           # ----------------------------------------------------------
           # replace undefined values
             if (! defined ($file_size_uncompressed))
                { $file_size_uncompressed = '-'; }
             if (! defined ($file_size_compressed))
                { $file_size_compressed = '-'; }
           # ----------------------------------------------------------
           # write log entry
             print LOG sprintf ("$date_format %s: %s -> %s (%.2f%%) %.2f sec %s\n",
                                $year+1900, $mon+1, $mday, $hour, $min, $sec,
                                ($status_code ? $status_code : $cache_status),
                                $file_size_uncompressed,
                                $file_size_compressed,
                                $savings_rate,
                                $cpu_time[0]+$cpu_time[1]+$cpu_time[2]+$cpu_time[3],
                                $path_name);
           # ----------------------------------------------------------
           # close the log file
             close (LOG);
           # (again, noone would hear our last cry if this fails ...)
           # ----------------------------------------------------------
         }
    # (if we can't open the file, where should we write the message to?)
    # -----------------------------------------------------------------
    # finally do what the name of this function suggests us to ...
      exit (0);
    # =================================================================
  }
# =====================================================================



#######################################
### function: handle a 404 response ###
#######################################

# =====================================================================
# the requested document may not even exist - Apache didn't check for
# that before activating our handler, and we don't seem to have a
# chance to forward this duty back to the server.
  sub handle_404 ($$)
  {
    # =================================================================
    # take parameter value
      my ($gzipcnc_status_code, $path_name) = @_;
    # ----------------------------------------------------------------
    # if no value given then substitute a default value for display
      if (! $path_name) { $path_name = '-'; }
    # =================================================================
    # do we know about some 404 handler to forward this request to?
      if ($error404_is_url)
         {
           # ----------------------------------------------------------
           # redirect the request to the handler's URL
           # (and append the URL of the requested file)
             print        'Status: 302 Redirected',
                   $crlf, 'Location: ', $error404_handler,
                          ($error404_handler =~ /\?/ ? '&' : '?'),
                          'url=', $ENV{'PATH_INFO'},
                   $crlf,
                   $crlf;
           # ----------------------------------------------------------
           # write log entry and terminate program
             terminate ($gzipcnc_status_code, $path_name);
           # ----------------------------------------------------------
         }
    # ----------------------------------------------------------------
    # if we arrive here, it seems to be a file rather than an URL
      binmode (STDOUT);
      if ($error404_handler)
         {
           # ----------------------------------------------------------
           # try to open this file
             if (open (FILE_HANDLE, $error404_handler))
                {
                  # ---------------------------------------------------
                  # use input file handle in binary mode
                    binmode (FILE_HANDLE);
                  # (without this it won't work on Windows machines)
                  # ---------------------------------------------------
                  # set the input line separator to 'undef' and
                  # read the 404-file's content in one step
                    local $/;
                    my $error_document = <FILE_HANDLE>;
                    close   (FILE_HANDLE);
                  # ---------------------------------------------------
                  # print the error_document
                    print        'Status: 404 Not Found',
                          $crlf, 'Content-Type: text/html',
                          $crlf, 'Content-Length: ', length($error_document),
                          $crlf,
                          $crlf, $error_document;
                  # ---------------------------------------------------
                  # write log entry and terminate program
                    terminate ($gzipcnc_status_code, $path_name);
                  # ---------------------------------------------------
                }
           # ----------------------------------------------------------
         }
    # ----------------------------------------------------------------
    # if we arrive here we didn't (successfully) send the error_document)
    # - so let's send our own little one.
    # one might possibly write some cool 404 handler here;
    # as for now, we just keep it as simple as possible
      my $error_document = '<html><head><title>404 Not Found</title>'
                         . '</head><body><h1>404 Not Found</h1>'
                         . '<p>The requested document <code>' . $url
                         . '</code> was not found on this server.</p>'
                         . '</body></html>';
    # ----------------------------------------------------------------
    # send this error_document in uncompressed form
      print        'Status: 404 Not Found',
            $crlf, 'Content-type: text/html',
            $crlf, 'Content-Length: ', length($error_document),
            $crlf,
            $crlf, $error_document;
    # ----------------------------------------------------------------
    # write log entry and terminate program
      terminate ($gzipcnc_status_code, $path_name);
    # =================================================================
  }
# =====================================================================



##################################################
### function: validate being called as handler ###
##################################################

# =====================================================================
# gzip_cnc mainly relies upon evaluating PATH_INFO and PATH_TRANSLATED
# environment variables being set by its embedding into the Apache
# request chain, using the 'Action' configuration directive.
# but there happens to be another possibility of setting both these
# environment variables: explicitly requesting the CGI script and then
# appending some relative URL to the request path (NOT as query string
# but like it were some subdirectory of gzip_cnc itself) - example:
#     GET /cgi-bin/gzip_cnc.pl/index.html
# this will make gzip_cnc being invoked exactly like if it were set as
# handler for '*.html' and the request
#     GET /index.html
# had been sent to the server.
# and if gzip_cnc would be invoked this way, it would access the file
# that has been requested - regardless of any protection mechanisms
# being applied to the direct URL request! /index.html MAY well be
# protected (by Server Authentication etc.) but if the request would
# ask for the gzip_cnc script then THIS script file's protection is the
# one to be checked ONLY and NOT the protection of the file that would
# now actually be served by gzip_cnc.
# so if invoked directly via URL gzip_cnc would be a perfect 'tunnel'
# through HTTP security of all types - one could even read the source
# code of scripts that are installed inside some (otherwise safe)
# '/cgi-bin' directory that has been specified via the Apache
# 'ScriptAlias' directive.
#
# how can we reliably find out whether gzip_cnc is working as handler
# or invoked via URL?
# as for my own tests, the handler API of Apache will deliver another
# environment variable REDIRECT_URL which will be set in both cases
# but contain the value of the original request. thus it may contain
# either the same value as PATH_INFO or the explicit CGI script URL
# with the additional path to set PATH_TRANSLATED, i. e. the 'attack'.
#
# therefore the function below will check whether REDIRECT_URL and
# PATH_INFO have the same content and reject the access otherwise.
# I am not perfectly sure whether this will reject even too many
# requests, due to some type or URL translation unknown to me ...
#
  sub validate_handler_activation ()
  {
    # =================================================================
    # do PATH_INFO and REDIRECT_URL contain the same value?
      if ($ENV{'PATH_INFO'} eq $ENV{'REDIRECT_URL'})
         {
           # ----------------------------------------------------------
           # it looks like we really have been invoked as a handler
           # -> pass this test and continue operation
             return;
           # ----------------------------------------------------------
         }
    # ----------------------------------------------------------------
    # if we arrive here, we have to reject the request
      my $error_document = '<html><head><title>403 Forbidden</title>'
                         . '</head><body><h1>403 Forbidden</h1>'
                         . '<p>You are not entitled to invoke this '
                         . 'script the way you just tried to.</p>'
                         . '</body></html>';
      print        'Status: 403 Forbidden',
            $crlf, 'Content-Type: text/html',
            $crlf, 'Content-Length: ', length($error_document),
            $crlf,
            $crlf, $error_document;
    # ----------------------------------------------------------------
    # write log entry and terminate program
      terminate ('97:SELF_TEST_MISUSE', $ENV{'PATH_INFO'});
    # =================================================================
  }
# =====================================================================



####################################
### function: serve file content ###
####################################

# =====================================================================
  sub serve_file ($$)
  {
    # =================================================================
    # take parameter values
      my ($status_code, $pathname_to_log) = @_;
    # =================================================================
    # open the document file for reading
      if (open (FILE_HANDLE, $pathname_to_serve))
         {
           # ----------------------------------------------------------
           # use input file handle and STDOUT in binary mode
             binmode (FILE_HANDLE);
             binmode (STDOUT);
           # (without this it won't work on Windows machines)
           # ----------------------------------------------------------
           # we don't really know which MIME type we are handling
           # - as a first demo version we assume 'text/html' only
             my $now = time ();
             print        'Status: 200 Okay',
                   $crlf, 'Date: ', date_rfc822 ($now),
                   $crlf, 'Vary: Accept-Encoding',
                   $crlf, 'Last-Modified: ',
                            ($use_cache_content
                           ? date_rfc822 ($file_age_compressed)
                           : date_rfc822 ($file_age_uncompressed)),
                   $crlf, 'Content-Type: ', $mime_type,
                   $crlf, 'Content-Length: ',
                            ($use_cache_content
                           ? $file_size_compressed
                           : $file_size_uncompressed);
           # ----------------------------------------------------------
           # encourage browser caching, if activated by configuration
             if ($cache_expire_seconds)
                {
                  # ---------------------------------------------------
                  # sent corresponding HTTP headers
                    print $crlf, 'Cache-Control: public,max-age=',
                                 $cache_expire_seconds,
                          $crlf, 'Expires: ',
                                 date_rfc822 ($now + $cache_expire_seconds);
                  # ---------------------------------------------------
                }
           # ----------------------------------------------------------
           # send additional HTTP headers if cache content is used
             if ($use_cache_content)
                {
                  # ---------------------------------------------------
                  # tell the client about the encoding we have applied
                    print $crlf, 'Content-Encoding: gzip';
                  # ---------------------------------------------------
                  # send our own headers as well?
                    if ($send_own_headers)
                       {
                         # --------------------------------------------
                           my $x = 'X-Gzipcnc-';
                         # --------------------------------------------
                         # tell the client about the original file size
                           print $crlf, $x, 'Original-File-Size: ',
                                        $file_size_uncompressed,
                         # (this may be used by some benchmark tools)
                         # --------------------------------------------
                         # tell the client about the Apache handler used
                                 $crlf, $x, 'Version: ',
                                        $program_version, ' ',
                                        "($program_date)";
                         # (this is for information only)
                         # --------------------------------------------
                         # is the self-testing mode enabled?
                           if ($enable_self_test_mode)
                              {
                                # -------------------------------------
                                # supply two more diagnostic headers
                                  print $crlf, $x, 'Path-Info: ',
                                               $ENV{'PATH_INFO'};
                                  print $crlf, $x, 'Path-Translated: ',
                                               $ENV{'PATH_TRANSLATED'};
                                # -------------------------------------
                              }
                         # --------------------------------------------
                       }
                  # ---------------------------------------------------
                }
           # ----------------------------------------------------------
           # empty line to terminate the HTTP headers section
             print $crlf,
                   $crlf;
           # ----------------------------------------------------------
           # set the input line separator to 'undef'
             local $/;
           # (to read the original file's content in one step)
           # ----------------------------------------------------------
           # now we deliver the document's content
             print      <FILE_HANDLE>;
           # ----------------------------------------------------------
           # and finally close the document again
             close      (FILE_HANDLE);
           # ----------------------------------------------------------
           # write log entry and terminate program
             terminate  ($status_code, $pathname_to_log);
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # we could not open the file, but:
      if ($use_cache_content)
         {
           # ----------------------------------------------------------
           # if this happened during a cache access
           # we can still serve the original file:
             $pathname_to_serve = $pathname_requested;
             $use_cache_content = 0;
             serve_file ('21:FOPEN_FAILED_GZIP', $pathname_gzipped);
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # otherwise, we simply fail
      handle_404 ('22:FOPEN_FAILED_ORIGINAL', $pathname_requested);
    # =================================================================
  }
# =====================================================================



###################################
### function: create cache file ###
###################################
# gzip a copy of the original file and move the result into the cache

# =====================================================================
  sub make_cache_entry ($)
  {
    # =================================================================
    # take parameter values
      my ($status_code) = @_;
    # =================================================================
    # separate between path name and file name
      if ($pathname_gzipped !~ /^(.*)\/([^\/]*)$/)
         { serve_file ('41:PATHNAME_BROKEN', $pathname_gzipped); }
      my ($path_name, $file_name) = ($1, $2);
    # -----------------------------------------------------------------
    # does the target directory already exist?
      if (! -d $path_name)
         {
           # ----------------------------------------------------------
           # try to create the file inside the target cache directory
             if (make_path ($path_name) != 0)
                { serve_file ('42:MKDIR_FAILED', $path_name); }
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # create some unique temporary file name
      my $unique_tmp_pathname = "$path_name/$file_name.$$";
    # -----------------------------------------------------------------
    # are we entitled to use a Perl API to 'zlib'?
      if ($use_zlib)
              {
                # -----------------------------------------------------
                # try to open a temporary output file
                  if (! open (GZIP_HANDLE, ">$unique_tmp_pathname"))
                     { serve_file ('51:FOPEN_FAILED', $path_name); }
                # -----------------------------------------------------
                # use this file handle in binary mode
                  binmode (GZIP_HANDLE);
                # (without this it won't work on Windows machines)
                # -----------------------------------------------------
                # choose compression level and strategy
                  my $zlib_mode = 'wb' . $gzip_quality;
                # ("write binary, use default strategy"
                # - i. e. "try to match strings", not "Huffman only";
                # we expect to repeatedly match long strings in HTML)
                # -----------------------------------------------------
                # try to make the zlib API use this file ...
                  my $gz = Compress::Zlib::gzopen (*GZIP_HANDLE, $zlib_mode);
                # ... and create a zlib object '$gz' in case of success
                  if (! $gz)
                     {
                       # ----------------------------------------------
                         close      (GZIP_HANDLE);
                         unlink     ($unique_tmp_pathname);
                         serve_file ('52:ZLIBOPEN_FAILED',
                                     $unique_tmp_pathname);
                       # ----------------------------------------------
                     }
                # -----------------------------------------------------
                # set the input line separator to 'undef'
                  local $/;
                # (to read the original file's content in one step)
                # -----------------------------------------------------
                # open the original file
                  if (! defined (open (DAT, $pathname_requested)))
                     {
                       # ----------------------------------------------
                         close      (GZIP_HANDLE);
                         unlink     ($unique_tmp_pathname);
                         serve_file ('53:READ_FAILED',
                                     $pathname_requested);
                       # ----------------------------------------------
                     }
                # -----------------------------------------------------
                # read the content of the original file ...
                  my $content = <DAT>;
                # ... and close the file
                  close DAT;
                # -----------------------------------------------------
                # compress the original file content ...
                  $gz->gzwrite ($content);
                # ... and close the output file
                  $gz->gzclose ();
                # -----------------------------------------------------
              }
         else
              {
                # -----------------------------------------------------
                # form a system command to compress a file content
                  my $command = "$gzip_path $gzip_options "
                              . "$pathname_requested >$unique_tmp_pathname";
                # -----------------------------------------------------
                # convert path separators (if running on Windows)
                  if ($^O =~ /Win32/i)
                     { $command =~ tr!/!\\!; }
                # (although Perl itself can handle both types of path
                #  separators, the "system()" function cannot do so)
                # -----------------------------------------------------
                # try to compress the original file into another file
                # via 'gzip' system command ... did it work?
                  my $rc = system ($command);
                  if ($rc != 0)
                     {
                       # ----------------------------------------------
                         unlink     ($unique_tmp_pathname);
                         serve_file ('44:GZIPFILE_FAILED',
                                     $unique_tmp_pathname);
                       # ----------------------------------------------
                     }
                # -----------------------------------------------------
              }
    # -----------------------------------------------------------------
    # rename the file to its final name inside the cache
      if (! rename ($unique_tmp_pathname, $pathname_gzipped))
         {
           # ----------------------------------------------------------
             unlink     ($unique_tmp_pathname);
             serve_file ('45:RENAME_FAILED', $pathname_gzipped);
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # collect the attributes of the gzipped file
      ($file_size_compressed, $file_age_compressed)
        = (stat ($pathname_gzipped)) [7,9];
      if (! $file_age_compressed)
         {
           # ----------------------------------------------------------
             unlink     ($pathname_gzipped);
             serve_file ('46:STAT_FAILED_CACHE', $pathname_gzipped);
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # set the status variable
      $cache_status = $status_code;
    # =================================================================
  }
# =====================================================================



################################################################
### function: self-test in case of direct invocation via URL ###
################################################################

# =====================================================================
  sub self_test ()
  {
    # =================================================================
    # print HTTP headers
      print        'Status: 200 Okay',
            $crlf, 'Content-type: text/html',
            $crlf,
            $crlf;
    # -----------------------------------------------------------------
    # are we entitled to do the self test?
      if (! $enable_self_test_mode)
         {
           # ----------------------------------------------------------
           # display only a minimal output message
             print       '<html>',
                   "\n", '<head>',
                   "\n", '<title>gzip_cnc self test (disabled)</title>',
                   "\n", '</head>',
                   "\n", '<body>',
                   "\n", '<p><i>gzip_cnc</i> self test mode ',
                             'is disabled by configuration</p>',
                   "\n", '</body>',
                   "\n", '</html>';
           # ----------------------------------------------------------
           # write log entry and terminate program
             terminate ('98:SELF_TEST_DISABLED', '-');
           # ----------------------------------------------------------
         }
    # -----------------------------------------------------------------
    # otherwise: open XHTML document
      print       '<?xml version="1.0" encoding="iso-8859-1" ?>',
            "\n", '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
            "\n", '<html xmlns="http://www.w3.org/1999/xhtml">',
            "\n", '<head>',
            "\n", '<title>gzip_cnc self test</title>',
            "\n", '<style type="text/css">',
            "\n", '<!--',
            "\n", 'body{color:#000;background-color:#eee;font-size:16px;}',
            "\n", 'em{color:#909;}',
            "\n", 'tt{color:#00f;background-color:#fff;font-size:18px;}',
            "\n", 'strong{color:#003366;}',
            "\n", 'small{font-size:75%;}',
            "\n", '.okay,.error{color:#fff;padding:2px;}',
            "\n", '.okay{background-color:#090;}',
            "\n", '.warning{background-color:#ff0;}',
            "\n", '.error{background-color:#f00;}',
            "\n", 'em,tt,.okay,.warning,.error{font-weight:bold;}',
            "\n", 'em,strong{background-color:transparent;}',
            "\n", '//-->',
            "\n", '</style>',
            "\n", '</head>',
            "\n", '<body>',
            "\n", '<h1><a title="link to gzip_cnc project site" ',
                         'href="http://www.schroepl.net/projekte/gzip_cnc/">',
                         '<i>gzip_cnc</i></a> ',
                  $program_version, ' <small>(', $program_date, ')</small></h1>',
            "\n", '<p>this program is <strong>running in CGI mode</strong> ',
                     '- it has been<br /><em>invoked via URL</em>, ',
                     'i. e. <em>not</em> as an Apache handler,<br />and will ',
                     'now <strong>check its configuration</strong>.</p>';
    # -----------------------------------------------------------------
    # display path names
      print "\n", '<p><strong>this CGI script file</strong> has been ',
                      'installed at<br />&quot;<tt>',
                      $ENV{'SCRIPT_FILENAME'}, '</tt>&quot;;<br />',
                      'the path name of your <strong>document root ',
                      'directory</strong> is<br />&quot;<tt>',
                      $ENV{'DOCUMENT_ROOT'}, '</tt>&quot;',
                  '</p>';
    # -----------------------------------------------------------------
    # check GZIP quality
      print "\n", '<p>selected <strong>compression quality level</strong> ',
                      'for gzip: &quot;<tt>', $gzip_quality, '</tt>&quot;',
                  '</p>';
    # -----------------------------------------------------------------
    # check GZIP mode
      print "\n", '<p>checking which <strong>compression tool</strong> ',
                     'will be used:<br />&quot;',
                  ($use_zlib ? '<tt>zlib, via Perl module API</tt>&quot;'
                             . '<br /><small><em>(the Compress::Zlib package '
                             . 'is installed and usable)</em></small> '
                             . '<span class="okay">&nbsp;OKAY&nbsp;</span>'
                             : '<tt>gzip, via system() call</tt>&quot;'
                             . '<br /><small><em>(the Compress::Zlib package '
                             . 'is not accessible)</em></small> '),
                  '</p>';
    # -----------------------------------------------------------------
    # check GZIP command binary if necessary
      if ($use_zlib)
              {
                # -----------------------------------------------------
                # just a short message
                  print "\n", "<p>thus we <em>don't need to check</em> ",
                              'the setting for the <i>gzip</i> ',
                              '<strong>system command</strong><br />',
                              '&quot;<tt>', $gzip_path, '</tt>&quot; ',
                              "which won't be used now</p>";
                # -----------------------------------------------------
              }
         else
              {
                # -----------------------------------------------------
                # display and test gzip system command binary
                  print "\n", '<p><strong>gzip system command</strong>: ',
                                  '&quot;<tt>', $gzip_path, ' ',
                                  $gzip_options, '</tt>&quot;';
                # -----------------------------------------------------
                # does this file exist, and is it executable for us?
                  print '<br /><small><em>(this file ';
                  if    (! -f $gzip_path)
                        { print 'does not exist!)</em></small> ',
                                '<span class="error">&nbsp;ERROR'; }
                  elsif (! -x $gzip_path)
                        { print 'is not executable!)</em></small> ',
                                '<span class="error">&nbsp;ERROR'; }
                  else  { print 'exists and is executable)</em></small> ',
                                '<span class="okay">&nbsp;OKAY'; }
                  print '&nbsp;</span></p>';
                # -----------------------------------------------------
                # display server operating system
                  my $is_windows = ($^O =~ /Win32/i);
                  print "\n", '<p><strong>operating system</strong> ',
                                 'running on this machine: ',
                              '&quot;<tt>', $^O, '</tt>&quot;',
                              '<br /><small><em>(path separators for ',
                                     'the gzip system command will ',
                              ($is_windows ? 'be translated from '
                                           . '&quot;<tt>/</tt>&quot; to '
                                           . '&quot;<tt>\\</tt>&quot;'
                                           : 'remain unchanged'),
                              ')</em></small></p>';
                # -----------------------------------------------------
              }
    # -----------------------------------------------------------------
    # check cache root directory
      print "\n", '<p><strong>cache root directory</strong>:<br />',
                  ($cache_directory eq $cache_default_directory
                  ? '[none specified] -&gt; ' : ''),
                  "&quot;<tt>$cache_directory</tt>&quot;";
    # is this the default value?
      if ($cache_directory eq $cache_default_directory)
         { print ',<br />using the <em>default value</em> ',
                 'as none has been selected by the user'; }
    # does this directory already exist?
      print       '<br /><small><em>(this directory ';
      if (! -d $cache_directory)
               { print 'does not exist! gzip_cnc will try to create it ',
                       "when needed<br />but doesn't know right now ",
                       'whether this will work)</em></small> ',
                       '<span class="warning">&nbsp;WARNING'; }
         else  { print 'exists already)</em></small> ',
                       '<span class="okay">&nbsp;OKAY'; }
      print       '&nbsp;</span></p>';
    # -----------------------------------------------------------------
    # display gzip_cnc log file to be created
      print "\n", "<p><i>gzip_cnc</i>'s own <strong>log file</strong>:<br />",
                  ($logfile_path ? "&quot;<tt>$logfile_path</tt>&quot;"
                                 : '[none specified]'),
                  '<br /><small><em>(<i>gzip_cnc</i> will ';
      if ($logfile_path)
               { print 'try to write log messages into this file)</em></small></p>'; }
         else  { print 'not write any log messages)</em></small></p>'; }
    # -----------------------------------------------------------------
    # display error404 handler
      print "\n", '<p><strong>HTTP error 404 handling</strong>:<br />';
      if    (! $error404_handler)
            {
              # -------------------------------------------------------
              # no handler specified
                print '[none specified]<br /><small><em>(<i>gzip_cnc</i> ',
                      'will serve its own little error 404 document';
              # -------------------------------------------------------
            }
      elsif ($error404_is_url)
            {
              # -------------------------------------------------------
              # redirection required
                print '&quot;<tt><a href="' . $error404_handler . '">'
                    . $error404_handler . '</a></tt>&quot;<br />'
                    . '<small><em>(<i>gzip_cnc</i> will redirect to '
                    . 'this URL in case of requests for missing files';
              # -------------------------------------------------------
            }
      else
            {
              # -------------------------------------------------------
              # serve file
                print '&quot;<tt>' . $error404_handler . '</tt>&quot;<br />'
                    . '<small><em>(<i>gzip_cnc</i> will serve the content '
                    . 'of this file in case of requests for missing files';
              # -------------------------------------------------------
            }
      print       ')</em></small></p>';
    # -----------------------------------------------------------------
    # display MIME type of documents to be compressed
      print "\n", '<p><strong>MIME type</strong> of documents to be served: ',
                  '&quot;<tt>', $mime_type, '</tt>&quot;</p>';
    # -----------------------------------------------------------------
    # display whether we are sending proprietary HTTP headers
      print "\n", '<p>sending additional <strong>HTTP headers</strong>: ',
                  '&quot;<tt>', ($send_own_headers ? 'yes' : 'no'),
                  '</tt>&quot;</p>';
    # -----------------------------------------------------------------
    # display whether we are sending proprietary HTTP headers
      print "\n", '<p><strong>expiration interval</strong> for served pages: ',
                  '&quot;<tt>', $cache_expire_seconds, '</tt>&quot; seconds</p>';
    # -----------------------------------------------------------------
    # close HTML document
      print "\n", '</body></html>';
    # ----------------------------------------------------------------
    # write log entry and terminate program
      terminate ('99:SELF_TEST_OKAY', '-');
    # =================================================================
  }
# =====================================================================



#@@@@@@@@@@@@@@@@@@@@
#@@@@@@@@@@@@@@@@@@@@
#@@@ main program @@@
#@@@@@@@@@@@@@@@@@@@@
#@@@@@@@@@@@@@@@@@@@@

#######################
### parameter check ###
#######################

# =====================================================================
# cache tree root default
  if (! $cache_directory)
     { $cache_directory = $cache_default_directory; }
# ---------------------------------------------------------------------
# standardize directory separator in requested URL path
  my $path_translated = $ENV{'PATH_TRANSLATED'} || '';
  if ($^O =~ /Win32/i)
     { $path_translated =~ tr!\\!/!; }
#
# split translated path into directory and filename
  if ($path_translated !~ /^(.*)\/([^\/]+)$/)
     {
       # --------------------------------------------------------------
       # (if we aren't a handler we display diagnostic infos to stdout)
         self_test ();
       # --------------------------------------------------------------
     }
  my ($dir_translated, $file_translated) = ($1, $2);
# if we are an Apache handler, we should have got the (already
# translated) pathname (that may already be the result of a content
# negotiation!) via the CGI environment
# ---------------------------------------------------------------------
# but are we really an Apache handler? we must not allow direct
# invocations of our script that still set PATH_TRANSLATED:
  validate_handler_activation ();
# if we pass this, let's really start working.
# ---------------------------------------------------------------------
# requested file
  $pathname_requested = $path_translated;
# ---------------------------------------------------------------------
# in nearly all cases we will serve the original file
  $pathname_to_serve  = $pathname_requested;
# ---------------------------------------------------------------------
# split requested URL path into directory and filename
  if ($ENV{'PATH_INFO'} !~ /^(.*)\/([^\/]+)$/)
     {
       # --------------------------------------------------------------
       # serve original file if we can't address its cache instance
         serve_file ('03:NO_PATH_INFO', $pathname_requested);
       # --------------------------------------------------------------
     }
  my ($dir_info, $file_info) = ($1, $2);
# ---------------------------------------------------------------------
# pathname of the gzipped cache file corresponding to this request
  $url                = $ENV{'PATH_INFO'};
  $pathname_gzipped   = $cache_directory . $dir_info
                      . '/' . $file_translated . '.gz';
# =====================================================================



#######################################
### try to access the original file ###
#######################################
# we are doing this very early, as we need modification time and file
# size of the original file to compute appropriate HTTP header fields
# in case we have to serve the original file due to some other problem

# =====================================================================
# collect the attributes of the requested file
  ($file_size_uncompressed, $file_age_uncompressed)
    = (stat ($pathname_requested)) [7,9];
# ---------------------------------------------------------------------
# did we get this information?
  if (! $file_age_uncompressed)
     { handle_404 ('04:STAT_FAILED_ORIGINAL', $pathname_requested); }
# =====================================================================



###########################
### Content Negotiation ###
###########################
# check whether the client explicitly allowed us to serve gzipped content

# =====================================================================
# is the client willing to 'Accept-Encoding: gzip'-ped content?
  my $client_accepts_gzip
     = ($ENV {'HTTP_ACCEPT_ENCODING'} =~ /\bgzip\b/ ? 1 : 0);
# ---------------------------------------------------------------------
# if not, we will just do what Apache would have done as well
  if (! $client_accepts_gzip)
     { serve_file ('05:NO_ACCEPT_ENCODING', $url); }
# =====================================================================



######################################
### try to access the gzipped file ###
######################################

# =====================================================================
# collect the attributes of the gzipped file
  ($file_size_compressed, $file_age_compressed)
     = (stat ($pathname_gzipped)) [7,9];
# ---------------------------------------------------------------------
# did we get this information?
  if (! $file_age_compressed)
     { make_cache_entry ($use_zlib ? '06:CREATED_ZLIB'
                                   : '07:CREATED_GZIP'); }
# at this point we have the cache file and know its age and size
# ---------------------------------------------------------------------
# both files are on auction - but is the cache content valid?
  if ($file_age_compressed < $file_age_uncompressed)
     { make_cache_entry ($use_zlib ? '08:UPDATED_ZLIB'
                                   : '09:UPDATED_GZIP'); }
# ---------------------------------------------------------------------
# the cache content is valid - but does it pay to use it?
  if ($file_size_compressed > $file_size_uncompressed)
     { serve_file ('10:ORIGINAL_SMALLER', $url); }
# ---------------------------------------------------------------------
# we finally know we do want to serve the cache file
  $pathname_to_serve = $pathname_gzipped;
  $use_cache_content = 1;
  serve_file ('', $url);
# =====================================================================

(Michael Schröpl, 2002-09-08)