Fix for bug 72721 (duplicates.cgi performs poorly with lots of bugs) and bug…

Fix for bug 72721 (duplicates.cgi performs poorly with lots of bugs) and bug 69054 (DB_File not portable): dependence on DB_File removed, now uses AnyDBM_File which comes standard with Perl. Duplicates.cgi now runs its queries against the shadow database if it's available, among many other improvements. Patch by (Gervase Markham) r= justdave
......@@ -24,7 +24,7 @@
# Run me out of cron at midnight to collect Bugzilla statistics.
use DB_File;
use AnyDBM_File;
use diagnostics;
use strict;
use vars @::legal_product;
......@@ -125,16 +125,16 @@ sub calculate_dupes {
my $key;
my $changed = 1;
my $today = &today;
my $today = &today_dash;
# Save % count here in a date-named file
# so we can read it back in to do changed counters
# First, delete it if it exists, so we don't add to the contents of an old file
if (-e "data/mining/dupes$today.db") {
system("rm -f data/mining/dupes$today.db");
if (-e "data/mining/dupes$today") {
system("rm -f data/mining/dupes$today");
dbmopen(%count, "data/mining/dupes$today.db", 0644) || die "Can't open DBM dupes file: $!";
dbmopen(%count, "data/mining/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
# Create a hash with key "a bug number", value "bug which that bug is a
# direct dupe of" - straight from the duplicates table.
......@@ -194,3 +194,8 @@ sub today {
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
sub today_dash {
my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
......@@ -120,10 +120,6 @@ sub check_shadowdb {
# t -- A short text entry field (suitable for a single line)
# l -- A long text field (suitable for many lines)
# b -- A boolean value (either 1 or 0)
# i -- An integer.
# defenum -- This param defines an enum that defines a column in one of
# the database tables. The name of the parameter is of the form
# "tablename.columnname".
"The email address of the person who maintains this installation of Bugzilla.",
......@@ -334,6 +330,10 @@ additional data you may have.</li>
"The minimum number of duplicates a bug needs to show up on the <A HREF=\"duplicates.cgi\">most frequently reported bugs page</a>. If you have a large database and this page takes a long time to load, try increasing this number.",
"This is the URL to use to bring up a simple 'all of my bugs' list for a user. %userid% will get replaced with the login name of a user.",
......@@ -25,82 +25,119 @@
use diagnostics;
use strict;
use CGI "param";
use DB_File;
use AnyDBM_File;
require "";
require "";
my %dbmcount;
my %count;
my $dobefore = 0;
my $before = "";
my %before;
my $changedsince;
my $maxrows = 500; # arbitrary limit on max number of rows
# Get params from URL
my $today = &days_ago(0);
my $changedsince = 7; # default one week
my $maxrows = 100; # arbitrary limit on max number of rows
my $sortby = "dup_count"; # default to sorting by dup count
if (defined(param("sortby")))
$sortby = param("sortby");
# Check for changedsince param, and see if it's a positive integer
if (defined(param("changedsince")) && param("changedsince") =~ /^\d{1,4}$/)
$changedsince = param("changedsince");
# check for max rows param, and see if it's a positive integer
if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/)
$maxrows = param("maxrows");
# Start the page
print "Content-type: text/html\n";
print "\n";
PutHeader("Most Frequently Reported Bugs");
# Open today's record of dupes
my $today = &days_ago(0);
if (-e "data/mining/dupes$today.db")
dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open today's dupes file: $!";
dbmopen(%dbmcount, "data/mining/dupes$today", 0644) ||
&die_politely("Can't open today's dupes file: $!");
# Try yesterday's, then (in case today's hasn't been created yet) :-)
# Try yesterday's, then (in case today's hasn't been created yet)
$today = &days_ago(1);
if (-e "data/mining/dupes$today.db")
dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open yesterday's dupes file: $!";
dbmopen(%dbmcount, "data/mining/dupes$today", 0644) ||
&die_politely("Can't open yesterday's dupes file: $!");
die "There are no duplicate statistics for today or yesterday.";
&die_politely("There are no duplicate statistics for today or yesterday.");
# Check for changedsince param, and see if it's a positive integer
if (defined(param("changedsince")) && param("changedsince") =~ /^\d{1,4}$/)
$changedsince = param("changedsince");
# Copy hash (so we don't mess up the on-disk file when we remove entries)
%count = %dbmcount;
my $key;
my $value;
my $threshold = Param("mostfreqthreshold");
# Remove all those dupes under the threshold (for performance reasons)
while (($key, $value) = each %count)
# Otherwise, default to one week
$changedsince = "7";
if ($value < $threshold)
delete $count{$key};
# Try and open the database from "changedsince" days ago
$before = &days_ago($changedsince);
# check for max rows parameter
if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/)
if (-e "data/mining/dupes$before.db")
$maxrows = param("maxrows");
dbmopen(%before, "data/mining/dupes$before", 0644) && ($dobefore = 1);
if (-e "data/mining/dupes${before}.db")
dbmopen(%before, "data/mining/dupes${before}.db", 0644) && ($dobefore = 1);
print "Content-type: text/html\n";
print "\n";
PutHeader("Most Frequently Reported Bugs");
print Param("mostfreqhtml");
print "
<table BORDER>
<td><center><b>Bug #</b></center></td>
<a href=\"duplicates.cgi?sortby=bug_no&maxrows=$maxrows&changedsince=$changedsince\">Bug #</a>
<a href=\"duplicates.cgi?sortby=dup_count&maxrows=$maxrows&changedsince=$changedsince\">Dupe<br>Count</a>
my %delta;
if ($dobefore)
print "<td><center><b>Change in last<br>$changedsince day(s)</b></center></td> ";
print "<td><center><b>
<a href=\"duplicates.cgi?sortby=delta&maxrows=$maxrows&changedsince=$changedsince\">Change in
last<br>$changedsince day(s)</a></b></center></td>";
# Calculate the deltas if we are doing a "before"
foreach (keys(%count))
$delta{$_} = $count{$_} - $before{$_};
print "
......@@ -111,25 +148,18 @@ print "
my %delta;
# Calculate the deltas if we are doing a "before"
if ($dobefore)
foreach (keys(%count))
$delta{$_} = $count{$_} - $before{$_};
# Offer the option of sorting on total count, or on the delta
# Sort, if required
my @sortedcount;
if (defined(param("sortby")) && param("sortby") == "delta")
if ($sortby eq "delta")
@sortedcount = sort by_delta keys(%count);
elsif ($sortby eq "bug_no")
@sortedcount = sort by_bug_no keys(%count);
elsif ($sortby eq "dup_count")
@sortedcount = sort by_dup_count keys(%count);
......@@ -170,6 +200,11 @@ print "</table><br><br>";
sub by_bug_no
return ($a <=> $b);
sub by_dup_count
return -($count{$a} <=> $count{$b});
......@@ -183,6 +218,24 @@ sub by_delta
sub days_ago
my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5];
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
sub die_politely {
my $msg = shift;
print <<FIN;
<table border=1 cellpadding=10>
<td align=center>
<font color=blue>$msg</font>
