Fix for bug 72721 (duplicates.cgi performs poorly with lots of bugs) and bug…

Fix for bug 72721 (duplicates.cgi performs poorly with lots of bugs) and bug 69054 (DB_File not portable): dependence on DB_File removed, now uses AnyDBM_File which comes standard with Perl. Duplicates.cgi now runs its queries against the shadow database if it's available, among many other improvements. Patch by gervase.markham@univ.ox.ac.uk (Gervase Markham) r= justdave
parent 9818692e
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
# Run me out of cron at midnight to collect Bugzilla statistics. # Run me out of cron at midnight to collect Bugzilla statistics.
use DB_File; use AnyDBM_File;
use diagnostics; use diagnostics;
use strict; use strict;
use vars @::legal_product; use vars @::legal_product;
...@@ -125,16 +125,16 @@ sub calculate_dupes { ...@@ -125,16 +125,16 @@ sub calculate_dupes {
my $key; my $key;
my $changed = 1; my $changed = 1;
my $today = &today; my $today = &today_dash;
# Save % count here in a date-named file # Save % count here in a date-named file
# so we can read it back in to do changed counters # so we can read it back in to do changed counters
# First, delete it if it exists, so we don't add to the contents of an old file # First, delete it if it exists, so we don't add to the contents of an old file
if (-e "data/mining/dupes$today.db") { if (-e "data/mining/dupes$today") {
system("rm -f data/mining/dupes$today.db"); system("rm -f data/mining/dupes$today");
} }
dbmopen(%count, "data/mining/dupes$today.db", 0644) || die "Can't open DBM dupes file: $!"; dbmopen(%count, "data/mining/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
# Create a hash with key "a bug number", value "bug which that bug is a # Create a hash with key "a bug number", value "bug which that bug is a
# direct dupe of" - straight from the duplicates table. # direct dupe of" - straight from the duplicates table.
...@@ -194,3 +194,8 @@ sub today { ...@@ -194,3 +194,8 @@ sub today {
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
} }
sub today_dash {
my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
}
...@@ -120,10 +120,6 @@ sub check_shadowdb { ...@@ -120,10 +120,6 @@ sub check_shadowdb {
# t -- A short text entry field (suitable for a single line) # t -- A short text entry field (suitable for a single line)
# l -- A long text field (suitable for many lines) # l -- A long text field (suitable for many lines)
# b -- A boolean value (either 1 or 0) # b -- A boolean value (either 1 or 0)
# i -- An integer.
# defenum -- This param defines an enum that defines a column in one of
# the database tables. The name of the parameter is of the form
# "tablename.columnname".
DefParam("maintainer", DefParam("maintainer",
"The email address of the person who maintains this installation of Bugzilla.", "The email address of the person who maintains this installation of Bugzilla.",
...@@ -334,6 +330,10 @@ additional data you may have.</li> ...@@ -334,6 +330,10 @@ additional data you may have.</li>
<br> <br>
}); });
DefParam("mostfreqthreshold",
"The minimum number of duplicates a bug needs to show up on the <A HREF=\"duplicates.cgi\">most frequently reported bugs page</a>. If you have a large database and this page takes a long time to load, try increasing this number.",
"t",
"2");
DefParam("mybugstemplate", DefParam("mybugstemplate",
"This is the URL to use to bring up a simple 'all of my bugs' list for a user. %userid% will get replaced with the login name of a user.", "This is the URL to use to bring up a simple 'all of my bugs' list for a user. %userid% will get replaced with the login name of a user.",
......
...@@ -25,82 +25,119 @@ ...@@ -25,82 +25,119 @@
use diagnostics; use diagnostics;
use strict; use strict;
use CGI "param"; use CGI "param";
use DB_File; use AnyDBM_File;
require "globals.pl"; require "globals.pl";
require "CGI.pl"; require "CGI.pl";
ConnectToDatabase(); ConnectToDatabase(1);
GetVersionTable(); GetVersionTable();
my %dbmcount;
my %count; my %count;
my $dobefore = 0; my $dobefore = 0;
my $before = ""; my $before = "";
my %before; my %before;
my $changedsince; # Get params from URL
my $maxrows = 500; # arbitrary limit on max number of rows
my $today = &days_ago(0); my $changedsince = 7; # default one week
my $maxrows = 100; # arbitrary limit on max number of rows
my $sortby = "dup_count"; # default to sorting by dup count
# Open today's record of dupes if (defined(param("sortby")))
if (-e "data/mining/dupes$today.db")
{
dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open today's dupes file: $!";
}
else
{ {
# Try yesterday's, then (in case today's hasn't been created yet) :-) $sortby = param("sortby");
$today = &days_ago(1);
if (-e "data/mining/dupes$today.db")
{
dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open yesterday's dupes file: $!";
}
else
{
die "There are no duplicate statistics for today or yesterday.";
}
} }
# Check for changedsince param, and see if it's a positive integer # Check for changedsince param, and see if it's a positive integer
if (defined(param("changedsince")) && param("changedsince") =~ /^\d{1,4}$/) if (defined(param("changedsince")) && param("changedsince") =~ /^\d{1,4}$/)
{ {
$changedsince = param("changedsince"); $changedsince = param("changedsince");
} }
else
# check for max rows param, and see if it's a positive integer
if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/)
{ {
# Otherwise, default to one week $maxrows = param("maxrows");
$changedsince = "7";
} }
$before = &days_ago($changedsince); # Start the page
print "Content-type: text/html\n";
print "\n";
PutHeader("Most Frequently Reported Bugs");
# check for max rows parameter # Open today's record of dupes
if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/) my $today = &days_ago(0);
if (-e "data/mining/dupes$today.db")
{ {
$maxrows = param("maxrows"); dbmopen(%dbmcount, "data/mining/dupes$today", 0644) ||
&die_politely("Can't open today's dupes file: $!");
} }
else
{
# Try yesterday's, then (in case today's hasn't been created yet)
$today = &days_ago(1);
if (-e "data/mining/dupes$today.db")
{
dbmopen(%dbmcount, "data/mining/dupes$today", 0644) ||
&die_politely("Can't open yesterday's dupes file: $!");
}
else
{
&die_politely("There are no duplicate statistics for today or yesterday.");
}
}
# Copy hash (so we don't mess up the on-disk file when we remove entries)
%count = %dbmcount;
my $key;
my $value;
my $threshold = Param("mostfreqthreshold");
if (-e "data/mining/dupes${before}.db") # Remove all those dupes under the threshold (for performance reasons)
while (($key, $value) = each %count)
{ {
dbmopen(%before, "data/mining/dupes${before}.db", 0644) && ($dobefore = 1); if ($value < $threshold)
{
delete $count{$key};
}
} }
print "Content-type: text/html\n"; # Try and open the database from "changedsince" days ago
print "\n"; $before = &days_ago($changedsince);
PutHeader("Most Frequently Reported Bugs");
if (-e "data/mining/dupes$before.db")
{
dbmopen(%before, "data/mining/dupes$before", 0644) && ($dobefore = 1);
}
print Param("mostfreqhtml"); print Param("mostfreqhtml");
print " print "
<table BORDER> <table BORDER>
<tr BGCOLOR=\"#CCCCCC\"> <tr BGCOLOR=\"#CCCCCC\">
<td><center><b>Bug #</b></center></td>
<td><center><b>Dupe<br>Count</b></center></td>\n"; <td><center><b>
<a href=\"duplicates.cgi?sortby=bug_no&maxrows=$maxrows&changedsince=$changedsince\">Bug #</a>
</b></center></td>
<td><center><b>
<a href=\"duplicates.cgi?sortby=dup_count&maxrows=$maxrows&changedsince=$changedsince\">Dupe<br>Count</a>
</b></center></td>\n";
my %delta;
if ($dobefore) if ($dobefore)
{ {
print "<td><center><b>Change in last<br>$changedsince day(s)</b></center></td> "; print "<td><center><b>
<a href=\"duplicates.cgi?sortby=delta&maxrows=$maxrows&changedsince=$changedsince\">Change in
last<br>$changedsince day(s)</a></b></center></td>";
# Calculate the deltas if we are doing a "before"
foreach (keys(%count))
{
$delta{$_} = $count{$_} - $before{$_};
}
} }
print " print "
...@@ -111,78 +148,94 @@ print " ...@@ -111,78 +148,94 @@ print "
<td><center><b>Summary</b></center></td> <td><center><b>Summary</b></center></td>
</tr>\n\n"; </tr>\n\n";
my %delta; # Sort, if required
my @sortedcount;
# Calculate the deltas if we are doing a "before" if ($sortby eq "delta")
if ($dobefore)
{ {
foreach (keys(%count)) @sortedcount = sort by_delta keys(%count);
{
$delta{$_} = $count{$_} - $before{$_};
}
} }
elsif ($sortby eq "bug_no")
# Offer the option of sorting on total count, or on the delta
my @sortedcount;
if (defined(param("sortby")) && param("sortby") == "delta")
{ {
@sortedcount = sort by_delta keys(%count); @sortedcount = sort by_bug_no keys(%count);
} }
else elsif ($sortby eq "dup_count")
{ {
@sortedcount = sort by_dup_count keys(%count); @sortedcount = sort by_dup_count keys(%count);
} }
my $i = 0; my $i = 0;
foreach (@sortedcount) foreach (@sortedcount)
{ {
my $id = $_; my $id = $_;
SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " . SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " .
" FROM bugs WHERE bug_id = $id"); " FROM bugs WHERE bug_id = $id");
my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData(); my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData();
next unless $groupset == 0; next unless $groupset == 0;
$summary = html_quote($summary); $summary = html_quote($summary);
print "<tr>"; print "<tr>";
print '<td><center><A HREF="show_bug.cgi?id=' . $id . '">'; print '<td><center><A HREF="show_bug.cgi?id=' . $id . '">';
print $id . "</A></center></td>"; print $id . "</A></center></td>";
print "<td><center>$count{$id}</center></td>"; print "<td><center>$count{$id}</center></td>";
if ($dobefore) if ($dobefore)
{ {
print "<td><center>$delta{$id}</center></td>"; print "<td><center>$delta{$id}</center></td>";
} }
print "<td>$component</td>\n "; print "<td>$component</td>\n ";
print "<td><center>$severity</center></td>"; print "<td><center>$severity</center></td>";
print "<td><center>$op_sys</center></td>"; print "<td><center>$op_sys</center></td>";
print "<td><center>$milestone</center></td>"; print "<td><center>$milestone</center></td>";
print "<td>$summary</td>"; print "<td>$summary</td>";
print "</tr>\n"; print "</tr>\n";
$i++; $i++;
if ($i == $maxrows) if ($i == $maxrows)
{ {
last; last;
} }
} }
print "</table><br><br>"; print "</table><br><br>";
PutFooter(); PutFooter();
sub by_bug_no
{
return ($a <=> $b);
}
sub by_dup_count sub by_dup_count
{ {
return -($count{$a} <=> $count{$b}); return -($count{$a} <=> $count{$b});
} }
sub by_delta sub by_delta
{ {
return -($delta{$a} <=> $delta{$b}); return -($delta{$a} <=> $delta{$b});
} }
sub days_ago sub days_ago
{ {
my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5]; my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5];
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
} }
sub die_politely {
my $msg = shift;
print <<FIN;
<p>
<table border=1 cellpadding=10>
<tr>
<td align=center>
<font color=blue>$msg</font>
</td>
</tr>
</table>
<p>
FIN
PutFooter();
exit;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment