#! /usr/bin/env python """ Run logresolve and webalizer on all of your virtual web sites. Bruce Perens, March 1999. Copyright 1999 Bruce Perens. This is free software under the GNU General Public License published by the Free Software Foundation, version 2 or any later version. This program will move all of your log files to files suffixed ".today" and then will restart your httpd daemon. It will run logresolve on all of the access logs and append the compressed output to a file: for example /etc/httpd/logs/perens.com-access_log.today would have its IP addresses expanded to domain names, and it would be compressed and appended to /etc/httpd/logs/perens.com-access_log.gz . This prorgram will then create the directory "statistics" under each virtual web site's document root, and will run Webalizer to fill that directory with HTML pages containing the statistics. Edit the configuration variables for the specifics of your system, including where the log files are stored, where the document roots are, etc. """ import sys, os, re, types, glob, stat # CONFIGURATION VARIABLES # Shell path for system(). Path = "/bin:/usr/bin:/sbin:/usr/sbin" # LogPattern is a Perl-style regular expression that must match on all # of the access log files your virtual web sites, and nothing else. The # parenthesized part of the pattern must match start with "?P", which # names this part of the match for later retrieval, and must match the # portion of the namethat says what virtual site this log is for. # That substring must # be repeated within the name of the document root for that virtual # site. For example, the log file for the virtual site perens.com # is /etc/httpd/logs/perens.com-access_log, and the document root is # /etc/httpd/perens.com/html . The parenthesized part of the expression # matches "perens.com". LogPattern = "^/etc/httpd/logs/(?P.*)-access_log$" # LogGlob is a Shell-style regular expression used to find files that match # LogPattern, above. This will go away if I re-write the "glob" module to use # Perl-style regular expressions and return match objects. LogGlob = "/etc/httpd/logs/*-access_log" # Shell RE to find logs # DocumentRootPattern is a printf pattern that is used to match the document # roots of your virtual sites. The embedded %s will expand to the parenthesized # part of LogPattern. DocumentRootPattern = "/home/httpd/%s/html" # Restart command is run to re-start your web server. This version is for # graceful restart of Apache 1.3 and higher, including Red Hat Secure Web # Server (which calls itself "httpsd"). For lower versions of Apache, kill # and restart the server. RestartCommand = "killall -q -USR1 httpd httpsd; true" WebalizerConf = "/home/httpd/webalizer.conf" # Webalizer conf file. # Niceness is passed to os.nice() befor running logresolve and webalizer. Niceness=10 # Incremental is non-zero if webalizer incremental processing should be used. Incremental = 0 # Paths of external programs. Webalizer = "/usr/bin/webalizer" Gzip = "/bin/gzip" # The program you use to resolve web log IP addresses to domain names. # It must be capable of running in a pipeline. LogResolve = "/usr/sbin/logresolve" # End of configuration variables. class RunException(Exception): __doc__ = """\ RunException is thrown by the run() and system() routines of this module upon the failure of an external program to execute properly. Superclass: Exception Constructor: RunException(command, exitStatus, signal, dumpedCore) Arguments: command: full pathname of the external program, this should be a valid first argument to os.execve(). exitStatus: the exit status of the external program. signal: the signal number, if the external program got one. dumpedCore: non-zero if the external program dumped core. Members: command: full pathname of the external program, this should be a valid first argument to os.execve(). exitStatus: the exit status of the external program. signal: the signal number, if the external program got one. dumpedCore: non-zero if the external program dumped core. """ command = "" exitStatus = 0 signal = 0 dumpedCore = 0 def __init__(self, command, exitStatus, signal, dumpedCore): Exception.__init__(self, command, self.exitStatus, \ self.signal, self.dumpedCore) self.command = command self.exitStatus = exitStatus self.signal = signal self.dumpedCore = dumpedCore def __repr__(self): return "RunException(" + self.__str__() + ")" def __str__(self): return "command=%s, exitStatus=%d, signal=%d, dumpedCore=%d" \ % (self.command, self.exitStatus, self.signal, \ self.dumpedCore) class Site: # I just use this as a dictionary. pass def run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, \ wait=1, beforeExec=None, env=os.environ, arg=None): """\ Execute an external command, and throw RunException if the command fails to execute properly (exits with non-zero status or is killed by a signal). Arguments: command: full pathname of the external program, this should be a valid first argument to os.execve(). arg: a list of arguments to the program, this is passed as the value of argv[1] and subsequent arguments, _not_ argv[0]. The value of argv[0] will be taken from "command", above. env: a list containing the environment to be passed to the program. stdin: a file to use for standard input, or a string object containing the name of a file to be opened for input and used for the standard input. stdout: a file to use for standard output, or a string object containing the name of a file to be opened for input and used for the standard output. stderr: a file to use for standard error output, or a string object containing the name of a file to be opened for input and used for the standard output. beforeExec: function to run after fork(), but before execve(). If you want to change user-ID, group-ID, or any process parameter at all in the child process, put code to do that in this function. wait: if non-zero, this function will wait for the program to finish executing, and will raise an exception if the program exits with a status other than 0, or is killed by a signal. Return: A process id that can be passed to os.waitpid() if wait==0, otherwise does not return any value. """ closeStdin = 0 closeStdout = 0 closeStderr = 0 if arg == None: arg = [] if type(arg) == types.StringType: arg = [arg] arg.insert(-1, command) if type(stdin) == types.StringType: stdin = open(stdin, "r") closeStdin = 1 if type(stdout) == types.StringType: stdout = open(stdout, "w") closeStdout = 1 if type(stderr) == types.StringType: stderr = open(stderr, "w") closeStderr = 1 pid = os.fork() if pid == 0: if stdin != sys.stdin: os.dup2(stdin.fileno(),sys.stdin.fileno()) if stdout != sys.stdout: os.dup2(stdout.fileno(), sys.stdout.fileno()) if stderr != sys.stderr: os.dup2(stderr.fileno(), sys.stderr.fileno()) if beforeExec != None: ret = beforeExec(pid) os.execve(command, arg, env) _exit(255) else: if closeStdin: stdin.close() if closeStdout: stdout.close() if closeStderr: stderr.close() if wait == 0: return pid status = os.waitpid(pid, 0) if status[1] != 0: exc = RunException(command=arg, \ exitStatus=(status[1] >> 8) & 0xff, \ signal=status[1] & 0x7f, \ dumpedCore = status[1] & 0x80) raise exc def system(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, \ wait=1, beforeExec=None, env=os.environ): """\ Execute a string as a /bin/sh command, and throw RunException if the shell fails to execute properly (exits with non-zero status or gets killed by a signal). Arguments: command: A string to be executed by the shell. env: a list containing the environment to be passed to the program. stdin: a file to use for standard input, or a string object containing the name of a file to be opened for input and used for the standard input. stdout: a file to use for standard output, or a string object containing the name of a file to be opened for input and used for the standard output. stderr: a file to use for standard error output, or a string object containing the name of a file to be opened for input and used for the standard output. beforeExec: function to run after fork(), but before execve(). If you want to change user-ID, group-ID, or any process parameter at all in the child process, put code to do that in this function. wait: if non-zero, this function will wait for the program to finish executing, and will raise an exception if the program exits with a status other than 0, or is killed by a signal. Return: A process id that can be passed to os.waitpid() if wait==0, otherwise does not return any value. """ run("/bin/sh", arg=["-c", command], stdin=stdin, stdout=stdout, \ stderr=stderr, wait=wait, beforeExec=beforeExec, env=env) def main(): siteList = [] os.environ["PATH"] = Path pattern = re.compile(LogPattern) list = glob.glob(LogGlob) for logfile in list: match = pattern.match(logfile) if match == None: sys.stderr.write( \ "Not processing %s because it doesn't match the pattern %s.\n" \ % (logfile, LogPattern)) continue name = match.group("site") site = Site() site.name = name site.logFile = logfile site.documentRoot = DocumentRootPattern % name if not os.path.exists(site.documentRoot): sys.stderr.write( \ "Not processing %s because %s does not exist\n" \ % (site.logFile, site.documentRoot)) continue os.rename(site.logFile, site.logFile + ".today") siteList.append(site) system(RestartCommand) os.nice(Niceness) for site in siteList: name = site.documentRoot + "/statistics" if not os.path.exists(name): os.mkdir(name, 0755) if os.path.exists(site.logFile + ".today"): if os.stat(site.logFile + ".today")[stat.ST_SIZE] > 0: if Incremental: run(LogResolve, stdin=site.logFile + ".today", \ stdout = site.logFile + ".resolved") run(Webalizer, arg=["-n", site.name, \ "-c", WebalizerConf, "-o", name], stdin=site.logFile \ + ".resolved") stdout = open(site.logFile + ".gz", "a") run(Gzip, arg="-9", stdin=site.logFile \ + ".resolved", stdout=stdout) stdout.close() os.unlink(site.logFile + ".resolved") else: stdout=open(site.logFile + ".gz", "a") system(LogResolve + "| gzip -9", \ stdin=site.logFile + ".today", stdout=stdout) stdout.close() os.unlink(site.logFile + ".today") if not Incremental: for site in siteList: if os.path.exists(site.logFile + ".gz"): name = site.documentRoot + "/statistics" system("gunzip | " + Webalizer + " -n " + site.name \ + " -c " + WebalizerConf + " -o " + name, \ stdin=site.logFile + ".gz") sys.exit(0) if __name__ == "__main__": main()