# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/
#
# Copyright (C) 1999-2018 Peter Thoeny, peter[at]thoeny.org
# and TWiki Contributors. All Rights Reserved. TWiki Contributors
# are listed in the AUTHORS file in the root of this distribution.
# NOTE: Please extend that file, not this notice.
#
# Additional copyrights apply to some or all of the code in this
# file as follows:
# Copyright (C) 2002 Richard Donkin, rdonkin@bigfoot.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version. For
# more details read LICENSE in the root of this distribution.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# As per the GPL, removal of this notice is prohibited.
package ParaLogReader;
# An internal class to read TWiki log entries in the chronological order from
# multiple log files.
# Usage:
#
# my $reader = ParaLogReader->new(@logFiles);
# while ( my $line = $reader->getline() ) {
# ...
# }
#
use strict;
use IO::File;
sub insert {
my ($arrayRef, $val) = @_;
my $idx = 0;
while ( $idx < @$arrayRef ) {
last if ( $val lt $arrayRef->[$idx] );
$idx++;
}
splice(@$arrayRef, $idx, 0, $val);
$idx;
}
sub readEnt {
my $fh = shift;
my $line;
while ( $line = <$fh> ) {
last if ( $line =~ /^\| \d{4}-\d\d-\d\d - \d\d:\d\d/ );
}
$line;
}
sub new {
my ($class, @files) = @_;
my $self = bless {}, $class;
my @fhs;
my @heads;
for my $i ( @files ) {
my $fh = IO::File->new("< $i");
if ( $fh ) {
my $line = readEnt($fh);
if ( $line ) {
splice(@fhs, insert(\@heads, $line), 0, $fh);
}
else {
$fh->close();
}
}
}
$self->{fhs} = \@fhs;
$self->{heads} = \@heads;
$self;
}
sub getline {
my ($self) = @_;
my $fhsRef = $self->{fhs};
my $headsRef = $self->{heads};
return undef if ( @$headsRef == 0 );
my $val = shift @$headsRef;
my $fh = shift @$fhsRef;
my $next = readEnt($fh);
if ( $next ) {
splice(@$fhsRef, insert($headsRef, $next), 0, $fh);
}
else {
$fh->close();
}
$val;
}
#===========================================================
=begin twiki
---+ package TWiki::UI::Statistics
Statistics extraction and presentation
=cut
package TWiki::UI::Statistics;
use strict;
use Assert;
use File::Temp qw/ :seekable /;
use Error qw( :try );
require TWiki;
require TWiki::Sandbox;
my $debug = 0;
BEGIN {
# Do a dynamic 'use locale' for this module
if( $TWiki::cfg{UseLocale} ) {
require locale;
import locale();
}
}
=pod
---++ StaticMethod statistics( $session )
=statistics= command handler.
This method is designed to be
invoked via the =UI::run= method.
Generate statistics topic.
If a web is specified in the session object, generate WebStatistics
topic update for that web. Otherwise do it for all webs
=cut
#===========================================================
sub statistics {
my $session = shift;
my $webName = $session->{webName};
# web to redirect to after finishing
my $logDate = $session->{request}->param( 'logdate' ) || '';
$logDate =~ s/[^0-9]//g; # remove all non numerals
$debug = $session->{request}->param( 'debug' );
unless( $session->inContext( 'command_line' )) {
# running from CGI
$session->generateHTTPHeaders();
$session->{response}->body(
CGI::start_html( -title => 'TWiki: Create Usage Statistics' ) );
if ( $TWiki::cfg{Stats}{DisableInvocationFromBrowser} ) {
_printMsg( $session,
'This script is not for interactive use from browser' );
$session->{response}->body( $session->{response}->body .
CGI::end_html() );
return;
}
}
# Initial messages
_printMsg( $session, 'TWiki: Create Usage Statistics' );
_printMsg( $session, '!Do not interrupt this script!' );
_printMsg( $session, '(Please wait until page download has finished)' );
require TWiki::Time;
my $currentMonth = TWiki::Time::formatTime( time(), '$year$mo', 'gmtime' );
unless( $logDate ) {
$logDate = $currentMonth;
}
my $logMon;
my $logMo;
my $logYear;
if ( $logDate =~ /^(\d\d\d\d)(\d\d)$/ ) {
$logYear = $1;
$logMo = $2;
$logMon = $TWiki::Time::ISOMONTH[ ( $logMo % 12 ) - 1 ];
$currentMonth = ( $logDate eq $currentMonth ) ? 1 : 0;
} else {
_printMsg( $session, "!Error in date $logDate - must be YYYY-MM or YYYYMM" );
return;
}
my $logMonYear = "$logMon $logYear";
my $logYearMo = "$logYear-$logMo";
_printMsg( $session, "* Statistics for $logYearMo" );
_printMsg( $session, '* Executed by ' . $session->{users}->getWikiName( $session->{user} ) );
my @logFiles;
if ( my $logFileGlob = $TWiki::cfg{Stats}{LogFileGlob} ) {
$logFileGlob =~ s/%DATE%/$logDate/g;
@logFiles = glob $logFileGlob;
if ( @logFiles == 0 ) {
_printMsg( $session, "!Log files $logFileGlob do not exist; aborting" );
return;
}
}
else {
my $logFile = $TWiki::cfg{LogFileName};
$logFile =~ s/%DATE%/$logDate/g;
unless( -e $logFile ) {
_printMsg( $session, "!Log file $logFile does not exist; aborting" );
return;
}
@logFiles = ($logFile);
}
# Do a single data collection pass on the temporary copy of logfile,
# then process each web once.
my $stats = _collectLogData( $session, @logFiles );
_printMsg( $session, "* Finished collecting log data at " .
TWiki::Time::formatTime(time()) );
my %roWeb;
my $roWebStatsOn;
my @weblist;
my $webSet = TWiki::Sandbox::untaintUnchecked($session->{request}->param( 'webs' ))
|| $session->{requestedWebName};
if( $webSet) {
# do specific webs
push( @weblist,
grep { TWiki::Func::webExists( $_ ) }
map { s/$TWiki::cfg{NameFilter}//go; $_; }
split( /,\s*/, $webSet )
);
} else {
# otherwise do all user webs:
@weblist = $session->{store}->getListOfWebs( 'user,writable' );
# add read-only webs if appropriate
if ( $TWiki::cfg{Stats}{ReadOnlyWebs} ) {
if ( $roWebStatsOn = $TWiki::cfg{Stats}{ReadOnlyWebStatsOn} ) {
my %included = map { $_ => 1 } @weblist;
if ( $included{$roWebStatsOn} ) {
%roWeb = map { $_ => 1 }
@{$TWiki::cfg{Stats}{ReadOnlyWebs}};
push(@weblist, @{$TWiki::cfg{Stats}{ReadOnlyWebs}});
}
}
}
if ( $TWiki::cfg{Stats}{ExcludedWebRegex} ) {
@weblist = grep { !/$TWiki::cfg{Stats}{ExcludedWebRegex}/ }
@weblist;
}
}
# do site statistics (only if no specific webs selected, or if force update from SiteStatistics)
my $siteStatsTopic = $TWiki::cfg{Stats}{SiteStatsTopicName} || 'SiteStatistics';
# append YYYY to the end of SiteStatistics if $cfg{Stats}{TopicPerYear} = 1
$siteStatsTopic .= $logYear
if ( $TWiki::cfg{Stats}{TopicPerYear} );
my $usersWebMode = $session->getContentMode($TWiki::cfg{UsersWebName});
if( (!$webSet || $session->{topicName} eq $siteStatsTopic) &&
($usersWebMode eq 'local' || $usersWebMode eq 'master')
# Site statistics written on the {UsersWebName} web, if the web is not
# writable on this site, it's no use doing site statisitcs
) {
try {
my $siteStats = _collectSiteStats( $session, $currentMonth,
$logYearMo, $stats );
_processSiteStats( $session, $logYear, $logYearMo, $logMonYear, $siteStats );
} catch TWiki::AccessControlException with {
_printMsg( $session, ' - ERROR: no permission to CHANGE site statistics topic');
}
}
foreach my $web ( @weblist ) {
my $webToSave;
if ( $roWeb{$web} ) {
$webToSave = $roWebStatsOn;
}
try {
_processWeb( $session, $web, $logYear, $logYearMo, $logMonYear,
$stats, $webToSave );
} catch TWiki::AccessControlException with {
_printMsg( $session, ' - ERROR: no permission to CHANGE statistics topic in '.$web);
}
}
if( !$session->inContext( 'command_line' ) ) {
my $web = $session->{webName};
my $topic = $session->{topicName};
if( $topic eq $TWiki::cfg{HomeTopicName} ) {
$web = $TWiki::cfg{UsersWebName};
$topic = $siteStatsTopic;
}
my $url = $session->getScriptUrl( 0, 'view', $web, $topic );
_printMsg( $session, '* Go to '
. CGI::a( { href => $url,
rel => 'nofollow' }, "$web.$topic") );
}
_printMsg( $session, 'End creating usage statistics' );
$session->{response}->body( $session->{response}->body . CGI::end_html() )
unless ( $session->inContext('command_line') );
}
#===========================================================
# Debug only
# Print all entries in a view or contrib hash, sorted by web and item name
#===========================================================
sub _debugPrintHash {
my ($statsRef) = @_;
# print "Main.WebHome views = " . ${$statsRef}{'Main'}{'WebHome'}."\n";
# print "Main web, TWikiGuest contribs = " . ${$statsRef}{'Main'}{'Main.TWikiGuest'}."\n";
foreach my $web ( sort keys %$statsRef) {
my $count = 0;
print $web,' web:',"\n";
# Get reference to the sub-hash for this web
my $webhashref = ${$statsRef}{$web};
# print 'webhashref is ' . ref ($webhashref) ."\n";
# Items can be topics (for view hash) or users (for contrib hash)
foreach my $item ( sort keys %$webhashref ) {
print " $item = ",( ${$webhashref}{$item} || 0 ),"\n";
$count += ${$webhashref}{$item};
}
print " WEB TOTAL = $count\n";
}
}
#===========================================================
# Process the whole log file and collect information in hash tables.
# Must build stats for all webs, to handle case of renames into web
# requested for a single-web statistics run.
#
# Main hash tables are divided by web:
#
# $view{$web}{$TopicName} == number of views, by topic
# $contrib{$web}{"Main.".$WikiName} == number of saves/uploads, by user
#===========================================================
sub _collectLogData {
my( $session, @logFiles ) = @_;
# Log file format:
# | date | user | operation | web.topic | notes | ip address |
# date, such as "2011-04-17 - 02:43" (or "03 Feb 2000 - 02:43" up to TWiki-4.2)
# user, such as "Main.PeterThoeny" (legacy)
# user, such as "PeterThoeny" (TWiki internal authentication)
# user, such as "peter" (intranet login)
# operation, such as "view", "edit", "save"
# web.topic, such as "MyWeb.MyTopic"
# notes, such as "minor", "not on thursdays"
# ip address, such as "127.0.0.5"
my %view; # Hash of hashes, counts topic views by (web, topic)
my %save; # Hash of hashes, counts topic saves by (web, topic)
my %contrib; # Hash of hashes, counts uploads/saves by (web, user)
my %viewer; # Hash of hashes, counts views by (web, user)
# Hashes for each type of statistic, one hash entry per web
my %statViews;
my %statViewsBreakdown;
my %statViewsUnique;
my %statSaves;
my %statSavesBreakdown;
my %statSavesUnique;
my %statUploads;
my %statUploadsBreakdown;
my %statUploadsUnique;
my $users = $session->{users};
# Copy the log file to temp file, since analysis could take some time
my $tmpFileHandle;
if ( @logFiles > 1 ) {
$tmpFileHandle = ParaLogReader->new(@logFiles);
}
else {
$tmpFileHandle = new File::Temp(
DIR => $session->{store}->getWorkArea( 'CoreStatistics' ),
TEMPLATE => 'twiki-stats-XXXXXXXXXX',
SUFFIX => '.txt',
# UNLINK => 0 # To debug, uncomment this to keep the temp file
);
# Don't use File::Copy, it does not work with File::Temp older than 0.22
_copy( $logFiles[0], $tmpFileHandle ) or throw Error::Simple( 'Cannot backup log file: '.$! );
# Seek to start of temp file
$tmpFileHandle->seek( 0, 0 );
}
my $breakdown = $TWiki::cfg{Stats}{Breakdown};
my %u2a;
my $excluded;
if ( $TWiki::cfg{Stats}{ExcludedWebRegex} ) {
$excluded = qr/$TWiki::cfg{Stats}{ExcludedWebRegex}/;
}
else {
$excluded = qr/^$/; # a regex not matching any web
}
# main log file loop, line by line
while ( my $line = $tmpFileHandle->getline ) {
my @fields = split( /\s*\|\s*/, $line );
my( $date, $logFileUserName, $affiliation );
while( !$date && scalar( @fields )) {
$date = shift @fields;
}
while( !$logFileUserName && scalar( @fields )) {
$logFileUserName = shift @fields;
$logFileUserName = TWiki::Func::getCanonicalUserID($logFileUserName);
}
if ( $breakdown ) {
if ( !defined($affiliation = $u2a{$logFileUserName}) ) {
$affiliation = $users->getAffiliation($logFileUserName);
$affiliation = 'Unknown' if ( !defined($affiliation) );
$u2a{$logFileUserName} = $affiliation;
}
}
my( $opName, $webTopic, $notes, $ip ) = @fields;
# ignore minor changes - not statistically helpful
next if( $notes && $notes =~ /(minor|dontNotify)/ );
# ignore op names we don't need
next unless( $opName && $opName =~ /^(view|save|upload|rename)$/ );
# .+ is used because topics name can contain stuff like !, (, ), =, -, _ and they should have stats anyway
if( $opName && $webTopic =~ /(^$TWiki::regex{webNameRegex})\.(.+)/ ) {
my $webName = $1;
my $topicName = $2;
# ignore excluded webs
next if( $webName =~ $excluded );
my $fqWikiName = $users->webDotWikiName($logFileUserName);
if( $opName eq 'view' ) {
next if( $topicName =~ /^(WebAtom|WebRss|WebStatistics\d*)$/ );
next if( $notes &&
($notes =~ /\(not exist\)/ ||
$notes =~ /\(web doesn\'t exist\)/) );
$statViews{$webName}++;
$statViewsBreakdown{$webName}{$affiliation}++
if ( $breakdown );
$statViewsUnique{$webName}{$logFileUserName} = 1;
$view{$webName}{$topicName}++;
$viewer{$webName}{$fqWikiName}++;
} elsif( $opName eq 'save' ) {
next if( $topicName =~ /^(SiteStatistics|WebStatistics)\d*$/ );
$statSaves{$webName}++;
$statSavesBreakdown{$webName}{$affiliation}++
if ( $breakdown );
$statSavesUnique{$webName}{$logFileUserName} = 1;
$save{$webName}{$topicName}++;
$contrib{$webName}{$fqWikiName}++;
} elsif( $opName eq 'upload' ) {
$statUploads{$webName}++;
$statUploadsBreakdown{$webName}{$affiliation}++
if ( $breakdown );
$statUploadsUnique{$webName}{$logFileUserName} = 1;
$contrib{$webName}{$fqWikiName}++;
} elsif( $opName eq 'rename' ) {
# Pick up the old and new topic names
$notes =~/moved to ($TWiki::regex{webNameRegex})\.($TWiki::regex{wikiWordRegex}|$TWiki::regex{abbrevRegex}|\w+)/o;
my $newTopicWeb = $1;
my $newTopicName = $2;
# Get number of views for old topic this month (may be zero)
my $oldViews = $view{$webName}{$topicName} || 0;
# Transfer views from old to new topic
$view{$newTopicWeb}{$newTopicName} = $oldViews
unless ( $newTopicWeb =~ $excluded );
delete $view{$webName}{$topicName};
# Transfer views from old to new web
if ( $newTopicWeb ne $webName ) {
$statViews{$webName} -= $oldViews;
$statViews{$newTopicWeb} += $oldViews;
}
}
} else {
$session->writeDebug('WebStatistics: Bad logfile line '.$line)
if ( $debug && $webTopic !~ /^_/ );
}
}
# Note: No need to close $tmpFileHandle, temp file is removed by destructor
return {
view => \%view,
viewer => \%viewer,
views => \%statViews,
viewsb => \%statViewsBreakdown,
viewsu => \%statViewsUnique,
save => \%save,
saves => \%statSaves,
savesu => \%statSavesUnique,
savesb => \%statSavesBreakdown,
uploads => \%statUploads,
uploadsb => \%statUploadsBreakdown,
uploadsu => \%statUploadsUnique,
contrib => \%contrib,
};
}
#===========================================================
sub _copy {
my( $fromFile, $toHandle ) = @_;
open( FROM, '<', $fromFile ) or return;
binmode( FROM ); # $toHandle is already binmode
my $buff;
while( read( FROM, $buff, 8 * 2**10 ) ) {
print $toHandle $buff;
}
close( FROM ) or return; # do not close $toHandle
return 1;
}
#===========================================================
sub _getDiskUse {
my( $session, $dir ) = @_;
my $diskUse = 0;
my $cmd = $TWiki::cfg{Stats}{dfCmd} || 'df %DIRECTORY|F%';
my( $output, $exit );
try {
( $output, $exit ) = $TWiki::sandbox->sysCommand( $cmd, DIRECTORY => $dir );
if( $exit ) {
_printMsg( $session, " - ERROR: $cmd of $dir failed: $exit $output" );
return 0;
} elsif( $output =~ /^.*[ \t]([0-9\.]+)\%.*?$/s ) {
return $1;
}
return 0;
} catch Error::Simple with {
my $message = shift->{-text};
_printMsg( $session, " - ERROR: $cmd of $dir failed: $message" );
return 0;
}
}
#===========================================================
sub _getDirSize {
my( $dir ) = @_;
my $size = 0;
opendir( DIR, $dir ) || return $size;
my @files = map { $dir . '/' . $_ } # create full path
grep { !/^\.\.?$/ } # omit . and .. files
readdir( DIR );
closedir( DIR );
foreach my $f ( @files ) {
if( -d $f ) {
$size += _getDirSize( $f );
} else {
$size += ( -s $f || 0 );
}
}
return $size;
}
# Align numbers by putting two s for a digit. This is crude but
# works way better than single &nbps; per diget.
sub _alignNums {
defined($_[0]) or return ();
$_[0] =~ /^(\d*)/;
my $width = length($1);
return
map { s/^(\d+)/(' ' x (2*($width - length($1)))) . $1/e; $_ } @_;
}
sub _nbsp {
my $str = shift;
$str =~ s/\&/&/g;
$str =~ s/ / /g;
return $str;
}
sub _numberUniqueBreakdown {
my ($what, $stats, $maxAffiliations, $web) = @_;
my ($n, $nu);
if ( $web ) {
$n = $stats->{$what}{$web} || 0;
my $ref = $stats->{$what.'u'}{$web};
$nu = ref $ref eq 'HASH' ? scalar keys %$ref : 0;
}
else {
$n = 0;
foreach my $v ( values %{$stats->{$what}} ) {
$n += ( $v || 0 );
}
my %unique;
foreach my $v ( values %{$stats->{$what.'u'}} ) {
@unique{keys %$v} = (1) x (scalar keys %$v);
}
$nu = scalar keys %unique;
}
my $result = $n . CGI::br() . "($nu unique users)";
if ( $TWiki::cfg{Stats}{Breakdown} && $n ) {
my $breakdown;
if ( $web ) {
$breakdown = $stats->{$what.'b'}{$web};
}
else {
my %bd;
foreach my $v ( values %{$stats->{$what.'b'}} ) {
foreach my $w ( keys %$v ) {
$bd{$w} += ( $v->{$w} || 0 );
}
}
$breakdown = \%bd;
}
my $i = 0;
my @list =
grep { $i++ < $maxAffiliations }
map { _nbsp("$breakdown->{$_} $_") }
sort { $breakdown->{$b} <=> $breakdown->{$a} }
keys %$breakdown;
$result .= CGI::br() . join(CGI::br(), _alignNums(@list));
}
unless ( $TWiki::cfg{Stats}{Breakdown} ) {
$result = ' ' . $result;
# to make the cell right aligned.
}
return $result;
}
sub _extractNumber {
my $nub = shift;
$nub =~ /^(\d*)/;
return $1;
}
#===========================================================
sub _collectSiteStats {
my( $session, $currentMonth, $logYearMo, $stats) = @_;
_printMsg( $session, '* Reporting overall statistics' );
my $siteStats;
my $site = $TWiki::cfg{DefaultUrlHost} . $TWiki::cfg{ScriptUrlPath};
my $ff = chr(255) x length( $site );
$site = $site ^ $ff; # obfuscate site name
$siteStats->{statSite} = uc( unpack( "H*", $site ) ); # hex encode
$siteStats->{statVersion} = $TWiki::VERSION;
$siteStats->{statVersion} =~ s/[, ].*//;
$siteStats->{statVersion} = '' unless( $currentMonth );
$siteStats->{statDate} = $logYearMo;
my @weblist = $session->{store}->getListOfWebs( 'user' );
if ( $TWiki::cfg{Stats}{ExcludedWebRegex} ) {
@weblist = grep { !/$TWiki::cfg{Stats}{ExcludedWebRegex}/ } @weblist;
}
$siteStats->{statWebs} = scalar @weblist;
$siteStats->{statWebs} = 0 unless( $currentMonth );
$siteStats->{statTopics} = 0;
$siteStats->{statAttachments} = 0;
if( $currentMonth ) {
foreach my $w ( @weblist ) {
my @topics = $session->{store}->getTopicNames( $w );
# add number of topics in web
$siteStats->{statTopics} += scalar @topics;
# add number of attachments in web using a search
my $result = $session->{store}->searchInWebContent(
'[%]META:FILEATTACHMENT{', $w, \@topics,
{ type => 'regex' }
);
foreach my $topic ( keys %$result ) {
$siteStats->{statAttachments} += scalar @{$result->{$topic}};
}
}
_printMsg( $session,
" - webs: " . _extractNumber($siteStats->{statWebs}) .
", topics: " . _extractNumber($siteStats->{statTopics}) .
", attachments: " . _extractNumber($siteStats->{statAttachments}) );
}
my $ma = $TWiki::cfg{Stats}{SiteTopAffiliation}; # max affiliations
$siteStats->{statViews} = _numberUniqueBreakdown('views', $stats, $ma);
$siteStats->{statSaves} = _numberUniqueBreakdown('saves', $stats, $ma);
$siteStats->{statUploads} = _numberUniqueBreakdown('uploads', $stats, $ma);
my $statWebsViewed = scalar keys %{$stats->{view}};
if ( $TWiki::cfg{Stats}{SiteTopViews} ) {
my @topViews =
_getTopList($TWiki::cfg{Stats}{SiteTopViews}, $stats->{views}, 1);
if ( @topViews ) {
$statWebsViewed .= CGI::br() . join(CGI::br(), @topViews);
$topViews[0] =~ s/^.*\]\[([^\]]*).*$/$1/;
_printMsg( $session, ' - top web viewed: '.$topViews[0] );
}
}
else {
$statWebsViewed = ' ' . $statWebsViewed;
}
$siteStats->{statWebsViewed} = $statWebsViewed;
$siteStats->{statTopicsViewed} = 0;
foreach my $v ( values %{$stats->{view}} ) {
$siteStats->{statTopicsViewed} += scalar keys %$v
if ( ref $v eq 'HASH' );
}
my $statWebsUpdated = scalar keys %{$stats->{save}};
if ( $TWiki::cfg{Stats}{SiteTopUpdates} ) {
my @topUpdates =
_getTopList($TWiki::cfg{Stats}{SiteTopUpdates}, $stats->{saves}, 1);
if ( @topUpdates ) {
$statWebsUpdated .= CGI::br() . join(CGI::br(), @topUpdates);
$topUpdates[0] =~ s/^.*\]\[([^\]]*).*$/$1/;
_printMsg( $session, ' - top web updated: '.$topUpdates[0] );
}
}
else {
$statWebsUpdated = ' ' . $statWebsUpdated;
}
$siteStats->{statWebsUpdated} = $statWebsUpdated;
$siteStats->{statTopicsUpdated} = 0;
foreach my $v ( values %{$stats->{save}} ) {
$siteStats->{statTopicsUpdated} += scalar keys %$v
if ( ref $v eq 'HASH' );
}
_printMsg( $session,
" - view: " . _extractNumber($siteStats->{statViews}) .
", save: " . _extractNumber($siteStats->{statSaves}) .
", upload: " . _extractNumber($siteStats->{statUploads}) );
$siteStats->{statDataSize} = 0;
$siteStats->{statPubSize} = 0;
if( $currentMonth ) {
my $dataSize = 0;
my $pubSize = 0;
for my $diskID ( $session->getDiskList() ) {
$dataSize +=
_getDirSize($session->getDataDir('', $diskID)) / ( 1024 * 1024 );
$pubSize +=
_getDirSize($session->getPubDir('', $diskID)) / ( 1024 * 1024 );
}
$siteStats->{statDataSize} = sprintf("%0.1f", $dataSize );
$siteStats->{statPubSize} = sprintf("%0.1f", $pubSize );
_printMsg( $session, " - data size: " . $siteStats->{statDataSize} .
" MB, pub size: " . $siteStats->{statPubSize} . " MB" );
}
$siteStats->{statDiskUse} = 0;
if( $currentMonth ) {
my $totalDiskUse = '';
for my $diskID ( $session->getDiskList() ) {
my $dataUse =
_getDiskUse( $session, $session->getDataDir('', $diskID) );
my $pubUse =
_getDiskUse( $session, $session->getPubDir('', $diskID) );
if( $pubUse > $dataUse ) {
# pub is mounted on different disk, report this one as the more
# critical one
$dataUse = $pubUse;
}
if ( $diskID ) {
$totalDiskUse .= '
' . $dataUse . '%';
}
else {
$totalDiskUse = $dataUse . '%';
}
}
$siteStats->{statDiskUse} = $totalDiskUse;
_printMsg( $session, " - disk use: " . $siteStats->{statDiskUse} );
}
$siteStats->{statUsers} = 0;
$siteStats->{statGroups} = 0;
if( $currentMonth ) {
my $it = $session->{users}->eachUser();
$it->{process} = sub { return 1; };
while( $it->hasNext() ) {
$siteStats->{statUsers} += $it->next();
}
$it = $session->{users}->eachGroup();
$it->{process} = sub { return 1; };
while( $it->hasNext() ) {
$siteStats->{statGroups} += $it->next();
}
_printMsg( $session, " - users: " . $siteStats->{statUsers} .
", groups: " . $siteStats->{statGroups} );
}
$siteStats->{statPlugins} = 0;
if( $currentMonth ) {
$siteStats->{statPlugins} = scalar @{$session->{plugins}{plugins}};
unless( $TWiki::cfg{Stats}{DontContactTWikiOrg} ) {
my $url = 'http://twiki.org/cgi-bin/pluginstats?';
while ( my( $key, $val ) = each( %$siteStats ) ) {
$val = TWiki::urlEncode( $val );
$url .= "$key=" . $val . ";";
}
my $response = TWiki::Func::getExternalResource( $url );
if( $response->is_error() ) {
my $msg = "Code " . $response->code() . ": " . $response->message();
$msg =~ s/[\n\r]/ /gos;
_printMsg( $session, "! ERROR: $msg" );
} else {
my $text = $response->content();
if( $text =~ /plugins: ?([0-9]+)/ ) {
$siteStats->{statPlugins} .= " of $1";
}
}
}
_printMsg( $session, " - plugins: " . $siteStats->{statPlugins} );
}
$siteStats->{statTopViewers} = '';
my %viewers;
for my $v ( values %{$stats->{viewer}} ) {
while ( my ($user, $count) = each %$v ) {
$viewers{$user} += $count;
}
}
my @topViewers =
_getTopList($TWiki::cfg{Stats}{SiteTopViewers}, \%viewers);
if( @topViewers ) {
$siteStats->{statTopViewers} = join( CGI::br(), @topViewers );
$topViewers[0] =~ s/^.*\]\[([^\]]*).*$/$1/;
_printMsg( $session, ' - top viewer: '.$topViewers[0] );
}
$siteStats->{statTopContributors} = '';
my %contribs;
for my $v ( values %{$stats->{contrib}} ) {
while ( my ($user, $count) = each %$v ) {
$contribs{$user} += $count;
}
}
my @topContribs =
_getTopList($TWiki::cfg{Stats}{SiteTopContrib}, \%contribs);
if( @topContribs ) {
$siteStats->{statTopContributors} = join( CGI::br(), @topContribs );
$topContribs[0] =~ s/^.*\]\[([^\]]*).*$/$1/;
_printMsg( $session, ' - top contributor: '.$topContribs[0] );
}
_printMsg( $session, " - Finished collecting data at " .
TWiki::Time::formatTime(time()) );
# use Data::Dumper;
# print STDERR "=====\n" . Dumper($siteStats) . "=====\n";
return $siteStats;
}
#===========================================================
sub _processSiteStats {
my( $session, $logYear, $logYearMo, $logMonYear, $siteStats ) = @_;
# Update the SiteStatistics topic
my $web = $TWiki::cfg{UsersWebName};
my $statsTopic = $TWiki::cfg{Stats}{SiteStatsTopicName} || 'SiteStatistics';
$statsTopic .= $logYear if ( $TWiki::cfg{Stats}{TopicPerYear} );
my( $meta, $text );
if( $session->{store}->topicExists( $web, $statsTopic ) ) {
( $meta, $text ) = $session->{store}->readTopic( undef, $web, $statsTopic );
} else {
( $meta, $text ) = $session->{store}->readTopic(
undef, $TWiki::cfg{SystemWebName}, 'SiteStatisticsTemplate' );
$text = $session->expandVariablesOnTopicCreation( $text, $session->{user} );
}
my $line;
my @lines = split( /\r?\n/, $text );
my $statLine;
my $idxStat = -1;
my $idxTmpl = -1;
my $oldStats;
for( my $x = 0; $x < @lines; $x++ ) {
$line = $lines[$x];
# Check for existing line for this month+year in new and legacy format
if( $line =~ /^\| ($logYearMo|$logMonYear) / ) {
my @items = split( / *\| */, $line );
if( scalar @items >= 12 ) {
$oldStats->{statWebs} = $items[2];
$oldStats->{statTopics} = $items[5];
$oldStats->{statAttachments} = $items[8];
$oldStats->{statDataSize} = $items[12];
$oldStats->{statPubSize} = $items[13];
$oldStats->{statDiskUse} = $items[14];
$oldStats->{statUsers} = $items[15];
$oldStats->{statGroups} = $items[16];
$oldStats->{statPlugins} = $items[17];
}
$idxStat = $x;
} elsif( $line =~ /<\!\-\-statDate\-\->/ ) {
$statLine = $line;
$idxTmpl = $x;
}
}
if( ! $statLine ) {
$statLine = '| | | '
. '| | '
. '| | '
. '| | | '
. '| | | '
. '| | | '
. '| '
. '| | |';
}
# update statistics line with collected values
$statLine =~ s/<\!\-\-([^\-]+)\-\->/$siteStats->{$1} || $oldStats->{$1} || 0/ge;
if( $idxStat >= 0 ) {
# entry already exists, need to update
$lines[$idxStat] = $statLine;
} elsif( $idxTmpl >= 0 ) {
# entry does not exist, add after line
$lines[$idxTmpl] = "$lines[$idxTmpl]\n$statLine";
} else {
# entry does not exist, add at the end
$lines[@lines] = $statLine;
}
$text = join( "\n", @lines );
$text .= "\n";
$session->{store}->saveTopic( $session->{user}, $web, $statsTopic,
$text, $meta,
{ minor => 1,
dontlog => 1 } );
_printMsg( $session, " - Topic $web.$statsTopic updated" );
}
#===========================================================
sub _processWeb {
my( $session, $web, $logYear, $logYearMo, $logMonYear, $stats, $webToSave )
= @_;
_printMsg( $session, "* Reporting on $web web" );
my $ma = $TWiki::cfg{Stats}{TopAffiliation}; # max affiliations
my $statViews = _numberUniqueBreakdown('views', $stats, $ma, $web);
my $statSaves = _numberUniqueBreakdown('saves', $stats, $ma, $web);
my $statUploads = _numberUniqueBreakdown('uploads', $stats, $ma, $web);
_printMsg( $session,
" - view: " . _extractNumber($statViews) .
", save: " . _extractNumber($statSaves) .
", upload: " . _extractNumber($statUploads) );
# Get the top N views and contribs in this web
my @topViews =
_getTopList($TWiki::cfg{Stats}{TopViews}, $stats->{view}{$web});
my @topViewers =
_getTopList($TWiki::cfg{Stats}{TopContrib}, $stats->{viewer}{$web});
my @topContribs =
_getTopList($TWiki::cfg{Stats}{TopContrib}, $stats->{contrib}{$web});
# Print information to stdout
my $statTopViews = '';
my $statTopViewers = '';
my $statTopContributors = '';
if( @topViews ) {
$statTopViews = join( CGI::br(), @topViews );
$topViews[0] =~ s/[\[\]]*//g;
_printMsg( $session, ' - top view: '.$topViews[0] );
}
if( @topViewers ) {
$statTopViewers = join( CGI::br(), @topViewers );
$topViewers[0] =~ s/^.*\]\[([^\]]*).*$/$1/;
_printMsg( $session, ' - top viewer: '.$topViewers[0] );
}
if( @topContribs ) {
$statTopContributors = join( CGI::br(), @topContribs );
$topContribs[0] =~ s/^.*\]\[([^\]]*).*$/$1/;
_printMsg( $session, ' - top contributor: '.$topContribs[0] );
}
# Update the WebStatistics topic
my $line;
my $statsTopicTmpl = $TWiki::cfg{Stats}{TopicName};
my $statsTopic = $statsTopicTmpl;
$statsTopic .= $logYear if ( $TWiki::cfg{Stats}{TopicPerYear} );
if ( $webToSave ) {
$statsTopic = $web . $statsTopic;
}
else {
$webToSave = $web;
}
my( $meta, $text );
if( $session->{store}->topicExists( $webToSave, $statsTopic ) ) {
( $meta, $text ) = $session->{store}->readTopic( undef, $webToSave, $statsTopic );
} else {
( $meta, $text ) = $session->{store}->readTopic( undef, '_default', $statsTopicTmpl );
}
unless( $text ) {
_printMsg( $session, " - WARNING: No updates done, topic $web.$statsTopic and"
. " _default.$statsTopic do not exist." );
return;
}
my @lines = split( /\r?\n/, $text );
my $statLine;
my $idxStat = -1;
my $idxTmpl = -1;
for( my $x = 0; $x < @lines; $x++ ) {
$line = $lines[$x];
# Check for existing line for this month+year in new and legacy format
if( $line =~ /^\| ($logYearMo|$logMonYear) / ) {
$idxStat = $x;
} elsif( $line =~ /<\!\-\-statDate\-\->/ ) {
$statLine = $line;
$idxTmpl = $x;
}
}
if( ! $statLine ) {
$statLine =
'| ' .
'| | | ' .
'| | | |';
}
$statLine =~ s/<\!\-\-statDate\-\->/$logYearMo/;
$statLine =~ s/<\!\-\-statViews\-\->/$statViews/;
$statLine =~ s/<\!\-\-statSaves\-\->/$statSaves/;
$statLine =~ s/<\!\-\-statUploads\-\->/$statUploads/;
$statLine =~ s/<\!\-\-statTopViews\-\->/$statTopViews/;
$statLine =~ s/<\!\-\-statTopViewers\-\->/$statTopViewers/;
$statLine =~ s/<\!\-\-statTopContributors\-\->/$statTopContributors/;
if( $idxStat >= 0 ) {
# entry already exists, need to update
$lines[$idxStat] = $statLine;
} elsif( $idxTmpl >= 0 ) {
# entry does not exist, add after line
$lines[$idxTmpl] = "$lines[$idxTmpl]\n$statLine";
} else {
# entry does not exist, add at the end
$lines[@lines] = $statLine;
}
$text = join( "\n", @lines );
$text .= "\n";
unless ( $session->{store}->webExists($web) ) {
# There is a chance for the web to be delete after the web list is
# generated
_printMsg( $session,
" - Topic $statsTopic not updated because the $web web has been deleted" );
return;
}
$session->{store}->saveTopic( $session->{user}, $webToSave, $statsTopic,
$text, $meta,
{ minor => 1,
dontlog => 1 } );
_printMsg( $session, " - Topic $statsTopic updated" );
}
sub _itemLink {
my ( $item, $isWeb ) = @_;
if ( $isWeb ) {
return "[[$item.WebHome][$item]]";
}
else {
if ( $item =~ /^(.*)\.(.*)$/ ) {
return "[[$item][$2]]";
}
else {
return "[[$item]]";
}
}
}
#===========================================================
# Get the items with top N frequency counts
# Items can be topics (for view hash) or users (for contrib hash)
#===========================================================
sub _getTopList
{
my( $theMaxNum, $itemsRef, $isWeb ) = @_;
my $i = 0;
my @list =
grep { $i++ < $theMaxNum }
map { _nbsp($itemsRef->{$_} . ' ' . _itemLink($_, $isWeb)) }
sort { $itemsRef->{$b} <=> $itemsRef->{$a} }
keys %$itemsRef;
@list = _alignNums(@list);
return @list;
}
#===========================================================
sub _printMsg {
my( $session, $msg ) = @_;
if( $session->inContext('command_line') ) {
$msg =~ s/ / /go;
print $msg, "\n";
} else {
if( $msg =~ s/^\!// ) {
$msg = CGI::h4( CGI::span( { class=>'twikiAlert' }, $msg ));
} elsif( $msg =~ /^[A-Z]/ ) {
# SMELL: does not support internationalised script messages
$msg =~ s/^([A-Z].*)/CGI::h3($1)/ge;
} else {
$msg =~ s/(\*\*\*.*)/CGI::span( { class=>'twikiAlert' }, $1 )/ge;
$msg =~ s/^\s\s/ /go;
$msg =~ s/^\s/ /go;
$msg .= CGI::br();
}
$msg =~ s/==([A-Z]*)==/'=='.CGI::span( { class=>'twikiAlert' }, $1 ).'=='/ge;
$session->{response}->body( ($session->{response}->body || '') . $msg . "\n" );
}
}
#===========================================================
1;