diff options
author | Jesse Morgan <jesse@jesterpm.net> | 2016-12-17 21:28:53 -0800 |
---|---|---|
committer | Jesse Morgan <jesse@jesterpm.net> | 2016-12-17 21:28:53 -0800 |
commit | 54df2afaa61c6a03cbb4a33c9b90fa572b6d07b8 (patch) | |
tree | 18147b92b969d25ffbe61935fb63035cac820dd0 /db-4.8.30/test/db_reptest.tcl |
Berkeley DB 4.8 with rust build script for linux.
Diffstat (limited to 'db-4.8.30/test/db_reptest.tcl')
-rw-r--r-- | db-4.8.30/test/db_reptest.tcl | 778 |
1 files changed, 778 insertions, 0 deletions
diff --git a/db-4.8.30/test/db_reptest.tcl b/db-4.8.30/test/db_reptest.tcl new file mode 100644 index 0000000..fcd2ec8 --- /dev/null +++ b/db-4.8.30/test/db_reptest.tcl @@ -0,0 +1,778 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1999,2009 Oracle. All rights reserved. +# +# $Id$ +# +# TEST db_reptest +# TEST Wrapper to configure and run the db_reptest program. + +# +# TODO: +# late client start. +# Number of message proc threads. +# + +global last_nsites +set last_nsites 0 + +# +# There are 3 user-level procs that the user may invoke. +# 1. db_reptest - Runs randomized configurations in a loop. +# 2. basic_db_reptest - Runs a simple set configuration once, +# as a smoke test. +# 3. restore_db_reptest 'dir' - Runs the configuration given in 'dir' +# in a loop. The purpose is either to reproduce a problem +# that some configuration encountered, or test a fix. +# + +# +# db_reptest - Run a randomized configuration. Run the test +# 'count' times in a loop, or if no count it given, it is +# an infinite loop. +# +proc db_reptest { {count -1} } { + global rand_init + + berkdb srand $rand_init + set cmd "db_reptest_int random" + db_reptest_loop $cmd $count +} + +# +# Run a basic reptest. The types are: +# Basic 0 - Two sites, start with site 1 as master, 5 worker threads, btree, +# run 100 seconds, onesite remote knowledge. +# Basic 1 - Three sites, all sites start as client, 5 worker threads, btree +# run 150 seconds, full remote knowledge. +# +proc basic_db_reptest { { basic 0 } } { + global util_path + + if { [file exists $util_path/db_reptest] == 0 } { + puts "Skipping db_reptest. Is it built?" + return + } + if { $basic == 0 } { + db_reptest_int basic0 + } + if { $basic == 1 } { + db_reptest_int basic1 + } +} + +# +# Restore a configuration from the given directory and +# run that configuration in a loop 'count' times. +# +proc restore_db_reptest { restoredir { count -1 } } { + set cmd "db_reptest_int restore $restoredir/SAVE_RUN" + db_reptest_loop $cmd $count +} + +# +# Wrapper to run the command in a loop, 'count' times. +# +proc db_reptest_loop { cmd count } { + global util_path + + if { [file exists $util_path/db_reptest] == 0 } { + puts "Skipping db_reptest. Is it built?" + return + } + set iteration 1 + while { 1 } { + puts -nonewline "ITERATION $iteration: " + puts [clock format [clock seconds] -format "%H:%M %D"] + + # + eval $cmd + + puts -nonewline "COMPLETED $iteration: " + puts [clock format [clock seconds] -format "%H:%M %D"] + incr iteration + if { $count > 0 && $iteration > $count } { + break + } + } +} + +# +# Internal version of db_reptest that all user-level procs +# eventually call. It will configure a single run of +# db_reptest based on the configuration type specified +# in 'cfgtype'. This proc will: +# Configure a run of db_reptest +# Run db_reptest +# Verify the sites after db_reptest completes. +# +proc db_reptest_int { cfgtype { restoredir NULL } } { + source ./include.tcl + global rporttype + + env_cleanup $testdir + + set savedir TESTDIR/SAVE_RUN + reptest_cleanup $savedir + + # + # Get all the default or random values needed for the test + # and its args first. + # + set runtime 0 + set kill 0 + # + # Get number of sites first because pretty much everything else + # after here depends on how many sites there are. + # + set num_sites [get_nsites $cfgtype $restoredir] + set use_lease [get_lease $cfgtype $restoredir] + # + # Only use kill if we have > 2 sites. + # Returns the site number of the site to kill, or 0 + # if this will not be a kill test. + # + if { $num_sites > 2 } { + set kill [get_kill $cfgtype $restoredir $num_sites] + } + if { $cfgtype != "restore" } { + if { $use_lease } { + set use_master 0 + } else { + set use_master [get_usemaster $cfgtype] + } + set master_site [get_mastersite $cfgtype $use_master $num_sites] + set workers [get_workers $cfgtype $use_lease] + set dbtype [get_dbtype $cfgtype] + set runtime [get_runtime $cfgtype] + set use_peers [get_peers $cfgtype] + puts -nonewline "Running: $num_sites sites, $runtime seconds " + if { $kill } { + puts -nonewline "kill site $kill " + } + if { $use_lease } { + puts "with leases" + } elseif { $use_master } { + puts "master site $master_site" + } else { + puts "no master" + } + } + set baseport 6100 + set rporttype NULL + # + # This loop sets up the args to the invocation of db_reptest + # for each site. + # + for { set i 1 } {$i <= $num_sites } { incr i } { + set envdirs($i) TESTDIR/ENV$i + reptest_cleanup $envdirs($i) + # + # If we are restoring the args, just read them from the + # saved location for this sites. Otherwise build up + # the args for each piece we need. + # + if { $cfgtype == "restore" } { + set cid [open $restoredir/DB_REPTEST_ARGS.$i r] + set prog_args($i) [read $cid] + close $cid + if { $runtime == 0 } { + set runtime [parse_runtime $prog_args($i)] + puts "Runtime: $runtime" + } + } else { + set prog_args($i) \ + "-v -c $workers -t $dbtype -T $runtime " + set prog_args($i) \ + [concat $prog_args($i) "-h $envdirs($i)"] + # + # Add in if this site should kill itself. + # + if { $kill == $i } { + set prog_args($i) [concat $prog_args($i) "-k"] + } + # + # Add in if this site starts as a master or client. + # + if { $i == $master_site } { + set state($i) MASTER + set prog_args($i) [concat $prog_args($i) "-M"] + } else { + set state($i) CLIENT + # + # If we have a master, then we just want to + # start as a client. Otherwise start with + # elections. + # + if { $use_master } { + set prog_args($i) \ + [concat $prog_args($i) "-C"] + } else { + set prog_args($i) \ + [concat $prog_args($i) "-E"] + } + } + # + # Add in host:port configuration, both this site's + # local address and any remote addresses it knows. + # + set lport($i) [expr $baseport + $i] + set prog_args($i) \ + [concat $prog_args($i) "-l localhost:$lport($i)"] + set rport($i) [get_rport $baseport $i \ + $num_sites $cfgtype] + if { $use_peers } { + set remote_arg "-R" + } else { + set remote_arg "-r" + } + foreach p $rport($i) { + set prog_args($i) \ + [concat $prog_args($i) $remote_arg \ + "localhost:$p"] + } + } + save_db_reptest $savedir ARGS $i $prog_args($i) + } + + # Now make the DB_CONFIG file for each site. + reptest_make_config $savedir $num_sites envdirs state \ + $use_lease $cfgtype $restoredir + + # Run the test + run_db_reptest $savedir $num_sites $runtime + puts "Test run complete. Verify." + + # Verify the test run. + verify_db_reptest $num_sites envdirs $kill + +} + +# +# Make a DB_CONFIG file for all sites in the group +# +proc reptest_make_config { savedir nsites edirs st lease cfgtype restoredir } { + upvar $edirs envdirs + upvar $st state + + # + # Generate global config values that should be the same + # across all sites, such as number of sites and log size, etc. + # + set default_cfglist { + { "rep_set_nsites" $nsites } + { "rep_set_request" "150000 2400000" } + { "rep_set_timeout" "db_rep_checkpoint_delay 0" } + { "rep_set_timeout" "db_rep_connection_retry 2000000" } + { "rep_set_timeout" "db_rep_heartbeat_monitor 1000000" } + { "rep_set_timeout" "db_rep_heartbeat_send 500000" } + { "set_cachesize" "0 536870912 1" } + { "set_lg_max" "131072" } + { "set_lk_detect" "db_lock_default" } + { "set_verbose" "db_verb_recovery" } + { "set_verbose" "db_verb_replication" } + } + + set acks { db_repmgr_acks_all db_repmgr_acks_all_peers \ + db_repmgr_acks_none db_repmgr_acks_one db_repmgr_acks_one_peer \ + db_repmgr_acks_quorum } + + # + # Ack policy must be the same on all sites. + # + if { $cfgtype == "random" } { + if { $lease } { + set ackpolicy db_repmgr_acks_quorum + } else { + set done 0 + while { $done == 0 } { + set acksz [expr [llength $acks] - 1] + set myack [berkdb random_int 0 $acksz] + set ackpolicy [lindex $acks $myack] + # + # Only allow the "none" policy with 2 sites + # otherwise it can overwhelm the system and + # it is a rarely used option. + # + if { $ackpolicy == "db_repmgr_acks_none" && \ + $nsites > 2 } { + continue + } + set done 1 + } + } + } else { + set ackpolicy db_repmgr_acks_one + } + for { set i 1 } { $i <= $nsites } { incr i } { + # + # If we're restoring we just need to copy it. + # + if { $cfgtype == "restore" } { + file copy $restoredir/DB_CONFIG.$i \ + $envdirs($i)/DB_CONFIG + file copy $restoredir/DB_CONFIG.$i \ + $savedir/DB_CONFIG.$i + continue + } + # + # Otherwise set up per-site config information + # + set cfglist $default_cfglist + + # + # Add lease configuration if needed. We're running all + # locally, so there is no clock skew. + # + if { $lease } { + # + # We need to have an ack timeout > lease timeout. + # Otherwise txns can get committed without waiting + # long enough for leases to get granted. + # + lappend cfglist { "rep_set_config" "db_rep_conf_lease" } + lappend cfglist { "rep_set_timeout" \ + "db_rep_lease_timeout 10000000" } + lappend cfglist \ + { "rep_set_timeout" "db_rep_ack_timeout 20000000" } + } else { + lappend cfglist \ + { "rep_set_timeout" "db_rep_ack_timeout 5000000" } + } + + # + # Priority + # + if { $state($i) == "MASTER" } { + lappend cfglist { "rep_set_priority" 100 } + } else { + if { $cfgtype == "random" } { + set pri [berkdb random_int 10 25] + } else { + set pri 20 + } + lappend cfglist { "rep_set_priority" $pri } + } + # + # Others: limit size, bulk, 2site strict, + # + if { $cfgtype == "random" } { + set limit_sz [berkdb random_int 15000 1000000] + set bulk [berkdb random_int 0 1] + if { $bulk } { + lappend cfglist \ + { "rep_set_config" "db_rep_conf_bulk" } + } + if { $nsites == 2 } { + set strict [berkdb random_int 0 1] + if { $strict } { + lappend cfglist { "rep_set_config" \ + "db_repmgr_conf_2site_strict" } + } + } + } else { + set limit_sz 100000 + } + lappend cfglist { "rep_set_limit" "0 $limit_sz" } + lappend cfglist { "repmgr_set_ack_policy" $ackpolicy } + set cid [open $envdirs($i)/DB_CONFIG a] + foreach c $cfglist { + set carg [subst [lindex $c 0]] + set cval [subst [lindex $c 1]] + puts $cid "$carg $cval" + } + close $cid + set cid [open $envdirs($i)/DB_CONFIG r] + set cfg [read $cid] + close $cid + + save_db_reptest $savedir CONFIG $i $cfg + } + +} + +proc reptest_cleanup { dir } { + # + # For now, just completely remove it all. We might want + # to use env_cleanup at some point in the future. + # + fileremove -f $dir + file mkdir $dir +} + + +proc save_db_reptest { savedir op site savelist } { + # + # Save a copy of the configuration and args used to run this + # instance of the test. + # + if { $op == "CONFIG" } { + set outfile $savedir/DB_CONFIG.$site + } else { + set outfile $savedir/DB_REPTEST_ARGS.$site + } + set cid [open $outfile a] + puts -nonewline $cid $savelist + close $cid +} + +proc run_db_reptest { savedir numsites runtime } { + source ./include.tcl + global killed_procs + + set pids {} + for {set i 1} {$i <= $numsites} {incr i} { + lappend pids [exec $tclsh_path $test_path/wrap_reptest.tcl \ + $savedir/DB_REPTEST_ARGS.$i $savedir/site$i.log &] + tclsleep 1 + } + watch_procs $pids 15 [expr $runtime * 3] + set killed [llength $killed_procs] + if { $killed > 0 } { + error "Processes $killed_procs never finished" + } +} + +proc verify_db_reptest { num_sites edirs kill } { + upvar $edirs envdirs + + set startenv 1 + set cmpeid 2 + if { $kill == 1 } { + set startenv 2 + set cmpeid 3 + } + set envbase [berkdb_env_noerr -home $envdirs($startenv)] + for { set i $cmpeid } { $i <= $num_sites } { incr i } { + if { $i == $kill } { + continue + } + set cmpenv [berkdb_env_noerr -home $envdirs($i)] + puts "Compare $envdirs($startenv) with $envdirs($i)" + # + # Compare 2 envs. We assume the name of the database that + # db_reptest creates and know it is 'am1.db'. + # We want as other args: + # 0 - compare_shared_portion + # 1 - match databases + # 0 - don't compare logs (for now) + rep_verify $envdirs($startenv) $envbase $envdirs($i) $cmpenv \ + 0 1 0 am1.db + $cmpenv close + } + $envbase close +} + +proc get_nsites { cfgtype restoredir } { + global last_nsites + + # + # The number of sites must be the same for all. Read the + # first site's saved DB_CONFIG file if we're restoring since + # we only know we have at least 1 site. + # + if { $cfgtype == "restore" } { + set cid [open $restoredir/DB_CONFIG.1 r] + while { [gets $cid cfglist] } { + puts "Read in: $cfglist" + set cfg [lindex $cfglist 0] + if { $cfg == "rep_set_nsites" } { + set num_sites [lindex $cfglist 1] + break; + } + } + close $cid + return $num_sites + } + if { $cfgtype == "random" } { + # + # Sometimes 'random' doesn't seem to do a good job. I have + # seen on all iterations after the first one, nsites is + # always 2, 100% of the time. Add this bit to make sure + # this nsites values is different from the last iteration. + # + set n [berkdb random_int 2 5] + while { $n == $last_nsites } { + set n [berkdb random_int 2 5] +puts "Getting random nsites between 2 and 5. Got $n, last_nsites $last_nsites" + } + set last_nsites $n + return $n +# return [berkdb random_int 2 5] + } + if { $cfgtype == "basic0" } { + return 2 + } + if { $cfgtype == "basic1" } { + return 3 + } + return -1 +} + +# +# Run with master leases? 25%/75% (use a master lease 25% of the time). +# +proc get_lease { cfgtype restoredir } { + # + # The number of sites must be the same for all. Read the + # first site's saved DB_CONFIG file if we're restoring since + # we only know we have at least 1 site. + # + if { $cfgtype == "restore" } { + set use_lease 0 + set cid [open $restoredir/DB_CONFIG.1 r] + while { [gets $cid cfglist] } { +# puts "Read in: $cfglist" + if { [llength $cfglist] == 0 } { + break; + } + set cfg [lindex $cfglist 0] + if { $cfg == "rep_set_config" } { + set lease [lindex $cfglist 1] + if { $lease == "db_rep_conf_lease" } { + set use_lease 1 + break; + } + } + } + close $cid + return $use_lease + } + if { $cfgtype == "random" } { + set leases { 1 0 0 0 } + set len [expr [llength $leases] - 1] + set i [berkdb random_int 0 $len] + return [lindex $leases $i] + } + if { $cfgtype == "basic0" } { + return 0 + } + if { $cfgtype == "basic1" } { + return 0 + } +} + +# +# Do a kill test about half the time. We randomly choose a +# site number to kill, it could be a master or a client. +# Return 0 if we don't kill any site. +# +proc get_kill { cfgtype restoredir num_sites } { + if { $cfgtype == "restore" } { + set ksite 0 + for { set i 1 } { $i <= $num_sites } { incr i } { + set cid [open $restoredir/DB_REPTEST_ARGS.$i r] + # + # !!! + # We currently assume the args file is 1 line. + # We assume only 1 site can get killed. So, if we + # find one, we break the loop and don't look further. + # + gets $cid arglist + close $cid +# puts "Read in: $arglist" + set dokill [lsearch $arglist "-k"] + if { $dokill != -1 } { + set ksite $i + break + } + } + return $ksite + } + if { $cfgtype == "random" } { + set k { 0 0 0 1 1 1 0 1 1 0 } + set len [expr [llength $k] - 1] + set i [berkdb random_int 0 $len] + if { [lindex $k $i] == 1 } { + set ksite [berkdb random_int 1 $num_sites] + } else { + set ksite 0 + } + return $ksite + } + if { $cfgtype == "basic0" || $cfgtype == "basic1" } { + return 0 + } else { + error "Get_kill: Invalid config type $cfgtype" + } +} + +# +# Use peers or only the master for requests? 25%/75% (use a peer 25% +# of the time and master 75%) +# +proc get_peers { cfgtype } { + if { $cfgtype == "random" } { + set peer { 0 0 0 1 } + set len [expr [llength $peer] - 1] + set i [berkdb random_int 0 $len] + return [lindex $peer $i] + } + if { $cfgtype == "basic0" || $cfgtype == "basic1" } { + return 0 + } +} + +# +# Start with a master or all clients? 25%/75% (use a master 25% +# of the time and have all clients 75%) +# +proc get_usemaster { cfgtype } { + if { $cfgtype == "random" } { + set mst { 1 0 0 0 } + set len [expr [llength $mst] - 1] + set i [berkdb random_int 0 $len] + return [lindex $mst $i] + } + if { $cfgtype == "basic0" } { + return 1 + } + if { $cfgtype == "basic1" } { + return 0 + } +} + +# +# If we use a master, which site? This proc will return +# the site number of the mastersite, or it will return +# 0 if no site should start as master. Sites are numbered +# starting at 1. +# +proc get_mastersite { cfgtype usemaster nsites } { + if { $usemaster == 0 } { + return 0 + } + if { $cfgtype == "random" } { + return [berkdb random_int 1 $nsites] + } + if { $cfgtype == "basic0" } { + return 1 + } + if { $cfgtype == "basic1" } { + return 0 + } +} + +# +# This is the number of worker threads performing the workload. +# This is not the number of message processing threads. +# +# Scale back the number of worker threads if leases are in use. +# The timing with leases can be fairly sensitive and since all sites +# run on the local machine, too many workers on every site can +# overwhelm the system, causing lost messages and delays that make +# the tests fail. Rather than try to tweak timeouts, just reduce +# the workloads a bit. +# +proc get_workers { cfgtype lease } { + if { $cfgtype == "random" } { + if { $lease } { + return [berkdb random_int 2 4] + } else { + return [berkdb random_int 2 8] + } + } + if { $cfgtype == "basic0" || $cfgtype == "basic1" } { + return 5 + } +} + +proc get_dbtype { cfgtype } { + if { $cfgtype == "random" } { + # + # 50% btree, 25% queue 12.5% hash 12.5% recno + # We favor queue only because there is special handling + # for queue in internal init. + # +# set methods {btree btree btree btree queue queue hash recno} + set methods {btree btree btree btree hash recno} + set len [expr [llength $methods] - 1] + set i [berkdb random_int 0 $len] + return [lindex $methods $i] + } + if { $cfgtype == "basic0" || $cfgtype == "basic1" } { + return btree + } +} + +proc get_runtime { cfgtype } { + if { $cfgtype == "random" } { + return [berkdb random_int 100 500] + } + if { $cfgtype == "basic0" } { + return 100 + } + if { $cfgtype == "basic1" } { + return 150 + } +} + +proc get_rport { baseport i num_sites cfgtype} { + global rporttype + + if { $cfgtype == "random" && $rporttype == "NULL" } { + # + # The circular comm choices seem problematic. + # Remove them for now. + # +# set types {backcirc forwcirc full onesite} + set types {full onesite} + set len [expr [llength $types] - 1] + set rindex [berkdb random_int 0 $len] + set rporttype [lindex $types $rindex] + } + if { $cfgtype == "basic0" } { + set rporttype onesite + } + if { $cfgtype == "basic1" } { + set rporttype full + } + # + # This produces a circular knowledge ring. Either forward + # or backward. In the forwcirc, ENV1 knows (via -r) about + # ENV2, ENV2 knows about ENV3, ..., ENVX knows about ENV1. + # + if { $rporttype == "forwcirc" } { + if { $i != $num_sites } { + return [list [expr $baseport + $i + 1]] + } else { + return [list [expr $baseport + 1]] + } + } + if { $rporttype == "backcirc" } { + if { $i != 1 } { + return [list [expr $baseport + $i - 1]] + } else { + return [list [expr $baseport + $num_sites]] + } + } + # + # This produces a configuration where site 1 does not know + # about any other site and every other site knows about site 1. + # + if { $rporttype == "onesite" } { + if { $i == 1 } { + return {} + } else { + return [list [expr $baseport + 1]] + } + } + # + # This produces a fully connected configuration + # + if { $rporttype == "full" } { + set rlist {} + for { set site 1 } { $site <= $num_sites } { incr site } { + if { $site != $i } { + lappend rlist [expr $baseport + $site] + } + } + return $rlist + } +} + +proc parse_runtime { progargs } { + set i [lsearch $progargs "-T"] + set val [lindex $progargs [expr $i + 1]] + return $val +} |