Comparison of Techniques used in Q3B

Table of Contents

1 CSV files

All statistics are computed from the BenchExec CSV file results.csv.

1.1 Convert to a correct CSV

First, we need to replace tabs by commas:

sed -i 's/\t/,/g' results.csv

And then we remove the standard BenchExec header rows and replace them by more readable ones. The header is located in the file results_header.csv.

cat results_header.csv > results.out.csv
tail -n +4 results.csv >> results.out.csv

1.2 Split into benchmark sets

We split benchmark name into family and name.

sed -i 's#/#,##' results.out.csv
sed -i 's#20170501-Heizmann#heizmann#' results.out.csv
sed -i 's#UltimateAutomizer#ua#' results.out.csv

2 R environment

2.1 Load necessary libraries

library(dplyr)
library(ggplot2)
library(scales)
library(colorspace)
library(RColorBrewer)
library(xtable)
options("scipen"=100, "digits"=0)

2.2 Load all CSV files

res = list()
res <- read.csv("results.out.csv", header=TRUE, stringsAsFactors=FALSE)
configurations = c('none', 'noneUnconstrained', 'noneGoalUnconstrained', 'variables', 'variablesUnconstrained', 'variablesGoalUnconstrained', 'both', 'bothUnconstrained', 'bothGoalUnconstrained')
labels = c(none = 'none', noneUnconstrained = 'none-u', noneGoalUnconstrained='none-gu', variables='approx', variablesUnconstrained='approx-u', variablesGoalUnconstrained='approx-gu', both='full', bothUnconstrained='full-u', bothGoalUnconstrained='full-gu')

We need to modify all times for results other than sat or unsat to have the maximal values of walltime and CPU-time.

timeout <- 1200

res[['trivial']] <- TRUE

for (c in configurations)
{
  res[[paste(c, 'solved', sep='.')]] <- res[[paste(c, 'result', sep='.')]] == "sat" | res[[paste(c, 'result', sep='.')]] == "unsat"
  res[[paste(c, 'cputime', sep='.')]][!res[[paste(c, 'solved', sep='.')]]] <- timeout
  res[[paste(c, 'walltime', sep='.')]][!res[[paste(c, 'solved', sep='.')]]] <- timeout
  res[['trivial']] <- res[['trivial']] & res[[paste(c, 'cputime', sep='.')]] < 0.1
}

2.3 Check correctness of results

We need to check that no two solvers disagreed on the status of a benchmark (i.e. sat vs unsat). The result of the following command should be empty!

res %>%
    mutate(
    sat = ((none.result == "sat") | (noneUnconstrained.result == "sat") | (noneGoalUnconstrained.result == "sat") |
    (variables.result == "sat") | (variablesUnconstrained.result == "sat") | (variablesGoalUnconstrained.result == "sat") |
    (both.result == "sat") | (bothUnconstrained.result == "sat") | (bothGoalUnconstrained.result == "sat")),
    unsat = ((none.result == "unsat") | (noneUnconstrained.result == "unsat") | (noneGoalUnconstrained.result == "unsat") |
    (variables.result == "unsat") | (variablesUnconstrained.result == "unsat") | (variablesGoalUnconstrained.result == "unsat") |
    (both.result == "unsat") | (bothUnconstrained.result == "unsat") | (bothGoalUnconstrained.result == "unsat")),
    ) %>%
    filter(sat & unsat) %>%
    select(benchmark, family)
benchmark family

3 Statistics

3.1 Numbers of solved formulas

3.1.1 Helper function to generate tables

getTable <- function()
{
  counts <- list()

  for (c in configurations)
  {
    counts[[c]] <- count_(res, paste(c, 'result', sep='.'))
    colnames(counts[[c]]) <- c('result', c)
  }

  return(Reduce(function(...) merge(..., all=TRUE), counts))
}

3.1.2 Total

getTable()
result none noneUnconstrained noneGoalUnconstrained variables variablesUnconstrained variablesGoalUnconstrained both bothUnconstrained bothGoalUnconstrained
OUT OF MEMORY nil 19 20 nil nil nil nil nil nil
sat 319 331 331 596 600 600 616 617 617
TIMEOUT (timeout) 785 616 581 460 330 296 287 181 146
unsat 4647 4785 4819 4695 4821 4855 4848 4953 4988

3.2 Solved formulas in benchmark families

table <- res %>%
    group_by(family) %>%
    summarise(total = sum(n()),
              none = sum(none.solved),
              variables = sum(variables.solved),
              both = sum(both.solved),
              bothUnconstrained = sum(bothUnconstrained.solved),
              variablesUnconstrained = sum(variablesUnconstrained.solved))

table <- rbind(table, data.frame(family='Total',
   total = sum(table$total),
   none = sum(table$none),
   variables = sum(table$variables),
   both = sum(table$both),
   bothUnconstrained = sum(table$bothUnconstrained),
   variablesUnconstrained = sum(table$variablesUnconstrained)))

table %>% select(family, total, none, variables, variablesUnconstrained, both, bothUnconstrained)
family total none variables variablesUnconstrained both bothUnconstrained
2017-Preiner-keymaera 4035 3788 3886 3890 4009 4009
2017-Preiner-psyco 194 118 179 180 181 182
2017-Preiner-scholl-smt08 374 172 270 288 311 317
2017-Preiner-tptp 73 67 73 73 73 73
2017-Preiner-ua 153 153 153 153 153 153
2018-Preiner-cav18 600 435 448 531 453 530
heizmann-ua 131 103 98 122 102 124
wintersteiger 191 130 184 184 182 182
Total 5751 4966 5291 5421 5464 5570

3.2.1 Only sat

satTable <- res %>%
    filter(none.result == 'sat' | variables.result == 'sat' | variablesUnconstrained.result == 'sat' | both.result == 'sat' | bothUnconstrained.result == 'sat' | bothGoalUnconstrained.result == 'sat') %>%
    group_by(family) %>%
    summarise(total = sum(n()),
              result = '',
              none = sum(none.solved),
              variables = sum(variables.solved),
              both = sum(both.solved),
              bothUnconstrained = sum(bothUnconstrained.solved),
              bothGoalUnconstrained = sum(bothGoalUnconstrained.solved),
              variablesUnconstrained = sum(variablesUnconstrained.solved),
              )

satTable <- rbind(satTable, data.frame(family='Total SAT',
   result = 'SAT',
   total = sum(satTable$total),
   none = sum(satTable$none),
   variables = sum(satTable$variables),
   both = sum(satTable$both),
   bothUnconstrained = sum(satTable$bothUnconstrained),
   bothGoalUnconstrained = sum(satTable$bothGoalUnconstrained),
   variablesUnconstrained = sum(satTable$variablesUnconstrained)
   ))

satTable %>% select(result, family, total, none, variables, variablesUnconstrained, both, bothUnconstrained, bothGoalUnconstrained)
result family total none variables variablesUnconstrained both bothUnconstrained bothGoalUnconstrained
  2017-Preiner-keymaera 104 7 103 104 104 104 104
  2017-Preiner-psyco 126 74 124 124 123 123 123
  2017-Preiner-scholl-smt08 248 136 226 229 246 247 247
  2017-Preiner-tptp 17 13 17 17 17 17 17
  2017-Preiner-ua 16 16 16 16 16 16 16
  heizmann-ua 20 18 18 18 20 20 20
  wintersteiger 92 55 92 92 90 90 90
SAT Total SAT 623 319 596 600 616 617 617

3.2.2 Only unsat

unsatTable <- res %>%
    filter(none.result == 'unsat' | variables.result == 'unsat' | variablesUnconstrained.result == 'unsat' | both.result == 'unsat' | bothUnconstrained.result == 'unsat' | bothGoalUnconstrained.result == 'unsat') %>%
    group_by(family) %>%
    summarise(total = sum(n()),
              result = '',
              none = sum(none.solved),
              variables = sum(variables.solved),
              both = sum(both.solved),
              bothUnconstrained = sum(bothUnconstrained.solved),
              bothGoalUnconstrained = sum(bothGoalUnconstrained.solved),
              variablesUnconstrained = sum(variablesUnconstrained.solved))

unsatTable <- rbind(unsatTable, data.frame(family='Total UNSAT',
   result = 'UNSAT',
   total = sum(unsatTable$total),
   none = sum(unsatTable$none),
   variables = sum(unsatTable$variables),
   both = sum(unsatTable$both),
   bothUnconstrained = sum(unsatTable$bothUnconstrained),
   bothGoalUnconstrained = sum(unsatTable$bothGoalUnconstrained),
   variablesUnconstrained = sum(unsatTable$variablesUnconstrained)))

unsatTable %>% select(result, family, total, none, variables, variablesUnconstrained, both, bothGoalUnconstrained)
result family total none variables variablesUnconstrained both bothGoalUnconstrained
  2017-Preiner-keymaera 3905 3781 3783 3786 3905 3905
  2017-Preiner-psyco 59 44 55 56 58 59
  2017-Preiner-scholl-smt08 71 36 44 59 65 70
  2017-Preiner-tptp 56 54 56 56 56 56
  2017-Preiner-ua 137 137 137 137 137 137
  2018-Preiner-cav18 565 435 448 531 453 565
  heizmann-ua 107 85 80 104 82 104
  wintersteiger 93 75 92 92 92 92
UNSAT Total UNSAT 4993 4647 4695 4821 4848 4988

3.2.3 Both

bothTable <- rbind(unsatTable, satTable)
bothTable <- rbind(bothTable, data.frame(family='Total',
     result = '',
     total = sum(res %>% summarize(n())),
     none = sum(res$none.solved),
     variables = sum(res$variables.solved),
     variablesUnconstrained = sum(res$variablesUnconstrained.solved),
     both = sum(res$both.solved),
     bothUnconstrained = sum(res$bothUnconstrained.solved),
     bothGoalUnconstrained = sum(res$bothGoalUnconstrained.solved)))

bothTable <- bothTable %>% transmute(
     Result = result,
     Family = family,
     Total = total,
     none = none,
     approx = variables,
     approxu = variablesUnconstrained,
     full = both,
     fullu = bothUnconstrained,
     fullgu = bothGoalUnconstrained)

xt <- xtable(bothTable, type = "latex", label="tbl:experiments:q3bSolved",
             caption = "Numbers of benchmarks solved by the individual Q3B configurations divided by the satisfiability/unsatisfiability and the benchmark family.")
align(xt) <- c('l', 'l', 'l', 'r', 'r', 'r', 'r', 'r', 'r', 'r')
print(xt,
      file = "Tables/solved.tex", rotate.colnames=TRUE, include.rownames=FALSE, booktabs=TRUE, hline.after = c(-1, 0, 9, nrow(bothTable)-1, nrow(bothTable)), table.placement="tbp",
      sanitize.text.function=function(x) x)

bothTable
Result Family Total none approx approxu full fullu fullgu
  2017-Preiner-keymaera 3905 3781 3783 3786 3905 3905 3905
  2017-Preiner-psyco 59 44 55 56 58 59 59
  2017-Preiner-scholl-smt08 71 36 44 59 65 70 70
  2017-Preiner-tptp 56 54 56 56 56 56 56
  2017-Preiner-ua 137 137 137 137 137 137 137
  2018-Preiner-cav18 565 435 448 531 453 530 565
  heizmann-ua 107 85 80 104 82 104 104
  wintersteiger 93 75 92 92 92 92 92
UNSAT Total UNSAT 4993 4647 4695 4821 4848 4953 4988
  2017-Preiner-keymaera 104 7 103 104 104 104 104
  2017-Preiner-psyco 126 74 124 124 123 123 123
  2017-Preiner-scholl-smt08 248 136 226 229 246 247 247
  2017-Preiner-tptp 17 13 17 17 17 17 17
  2017-Preiner-ua 16 16 16 16 16 16 16
  heizmann-ua 20 18 18 18 20 20 20
  wintersteiger 92 55 92 92 90 90 90
SAT Total SAT 623 319 596 600 616 617 617
  Total 5751 4966 5291 5421 5464 5570 5605

3.3 Cross comparison

3.3.1 Helper function to generate tables

First we need a function which for given configurations computes a number of benchmarks that the first configuration has solved, but the second one has not.

firstIsBetter <- function(c1, c2)
{
  c1Solved <- res[[paste(c1, 'solved', sep='.')]]
  c2Solved <- res[[paste(c2, 'solved', sep='.')]]

  onlyC1Solved <- c1Solved & !(c2Solved)
  return(onlyC1Solved)
}

formulasFirstIsBetter <- function(c1, c2)
{
  return(res[firstIsBetter(c1, c2)])
}

compareConfigurations <- function(c1, c2)
{
  return(sum(firstIsBetter(c1, c2)))
}

We can use this function to generate the cross table.

crossTable <- function()
{
  results <- c()
  for (c1 in configurations)
  {
    for (c2 in configurations)
    {
      results <- c(results, compareConfigurations(c1, c2))
    }
  }

  results.table <- matrix(results, ncol=9,byrow=TRUE)
  colnames(results.table) <- configurations
  rownames(results.table) <- configurations
  out <- as.data.frame(results.table)
  return(out)
}

3.3.2 Results

        table <- crossTable()
        #table <- merge(table, uniqSolvedSum, by='row.names')
#        colnames(table) <- c("", "Boolector", "CVC4", "Q3B", "Z3", "Uniquely solved")
        table
  none noneUnconstrained noneGoalUnconstrained variables variablesUnconstrained variablesGoalUnconstrained both bothUnconstrained bothGoalUnconstrained
none 0 2 2 13 9 8 9 8 6
noneUnconstrained 152 0 0 134 7 6 112 7 5
noneGoalUnconstrained 186 34 0 168 41 7 146 41 6
variables 338 309 309 0 0 0 6 6 5
variablesUnconstrained 464 312 312 130 0 0 111 6 5
variablesGoalUnconstrained 497 345 312 164 34 0 145 40 5
both 507 460 460 179 154 154 0 0 0
bothUnconstrained 612 461 461 285 155 155 106 0 0
bothGoalUnconstrained 645 494 461 319 189 155 141 35 0
xt <- xtable(table, type = "latex", label="tbl:cross",
             caption = "The table shows cross-comparison of solved benchmarks by all pairs of the solvers. Each cell shows the number of benchmarks that were solved by the solver in the corresponding row, but not by the solver by the corresponding column. The column \\emph{Uniquely solved} shows the number of benchmarks that were solved only by the given solver.")
align(xt) <- "llrrrr|r"
print(xt,
      file = "Tables/cross.tex", include.rownames=FALSE, booktabs=TRUE, table.placement="tbp")

4 Plots

4.1 Unsolved benchmarks

plotUnsolved <- function(cs)
{
    cs <- rev(cs)
    unsolved = data.frame(family=character(),
                          configuration=character(),
                          stringsAsFactors=TRUE)
    for (c in cs)
    {
        cUnsolved <- res[["family"]][!res[[paste(c, 'solved', sep='.')]]]
        cUnsolved <- data.frame(
            family = cUnsolved,
            configuration = labels[c])
        unsolved <- rbind(unsolved, cUnsolved)
    }

    print(levels(unsolved$family))
    unsolved$family<- factor(unsolved$family, levels=sort(levels(unsolved$family)))
    print(levels(unsolved$family))

    chart.data <- unsolved %>%
                    arrange(family) %>%
                    group_by(family, configuration) %>%
                    summarize(freq = n()) %>%
                    group_by(configuration) %>%
                    mutate(pos = cumsum(freq) - (0.5 * freq))

    ggplot(data = chart.data, aes(x = configuration, y = freq, fill = family)) +
        geom_bar(stat="identity", position=position_stack(reverse=TRUE)) +
        coord_flip() +
        geom_text(data=chart.data, aes(x = configuration, y = pos, label = ifelse(freq < 20, "", freq)), size=3) +
        labs(y = "Number of unsolved benchmarks (less is better)", x = NULL, fill = "Benchmark family") +
        scale_fill_brewer(palette = "Set2") +
        theme(legend.position="bottom") +
        guides(fill = guide_legend(ncol = 3))
}
plotUnsolved(c('none', 'noneUnconstrained', 'noneGoalUnconstrained', 'variables', 'variablesUnconstrained', 'variablesGoalUnconstrained', 'both', 'bothUnconstrained', 'bothGoalUnconstrained'))

Sorry, your browser does not support SVG.

    pdf(file="Figures/unsolved.pdf",width=7,height=8); tryCatch({
plotUnsolved(c('none', 'noneUnconstrained', 'noneGoalUnconstrained', 'variables', 'variablesUnconstrained', 'variablesGoalUnconstrained', 'both', 'bothUnconstrained', 'bothGoalUnconstrained'))
    },error=function(e){plot(x=-1:1, y=-1:1, type='n', xlab='', ylab='', axes=FALSE); text(x=0, y=0, labels=e$message, col='red'); paste('ERROR', e$message, sep=' : ')}); dev.off()

Author: Martin Jonáš

Created: 2019-06-12 Wed 10:12