#!/bin/sh

############################################################################
#
# Script for testing block device I/O performance. Running this script on a
# block device that is connected to a remote SCST target device allows to
# test the performance of the transport protocols implemented in SCST. The
# operation of this script is similar to iozone, while this script is easier
# to use.
#
# Copyright (C) 2009 Bart Van Assche <bvanassche@acm.org>.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, version 2
# of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
############################################################################

#########################
# Function definitions  #
#########################

usage() {
  echo "Usage: $0 [-a] [-d] [-f] [-i <i>] [-j] [-n] [-r] [-s <l2s>] [-t [user@]host] <dev>"
  echo "        -a - use asynchronous (buffered) I/O."
  echo "        -d - use direct (non-buffered) I/O."
  echo "        -f - force -- skip the test if there is still data present."
  echo "        -i - number times each test is iterated."
  echo "        -j - use fio instead of dd for benchmarking."
  echo "        -m <l2min> - log2 of the smallest block size to use."
  echo "        -M <l2max> - log2 of the largest block size to use."
  echo "        -n - do not verify the data on <dev> before overwriting it."
  echo "        -r - only perform the read test."
  echo "        -s - logarithm base two of the I/O size."
  echo "        -t - username and hostname of the target to drop the caches on."
  echo "        -w - only perform the write test."
  echo "        <dev> - block device to run the I/O performance test on."
}

# Compute two raised to the power $1.
pow2() {
  if [ $1 = 0 ]; then
    echo 1
  else
    echo $((2 * $(pow2 $(($1 - 1)) ) ))
  fi
}

# Report via the exit status whether or not the current user has sufficient
# privileges to drop the VM caches.
can_drop_cache() {
  [ -w /proc/sys/vm/drop_caches ]
}

drop_caches() {
  sync
  if can_drop_cache; then
    echo 3 > /proc/sys/vm/drop_caches
  fi
  if [ "${target_login}" != "" ]; then
    ssh -n ${target_login} 'sync; if [ -w /proc/sys/vm/drop_caches ]; then echo 3 > /proc/sys/vm/drop_caches; fi'
  fi
}

# Set the scaling governor, minimum and maximum frequency to $1, $2 and $3
# respectively or disable frequency scaling when no arguments have been
# provided.
set_frequency_scaling() {
  local syscpu=/sys/devices/system/cpu
  if [ ! -e $syscpu/cpufreq ]; then
    return
  fi
  local governor=$(<$syscpu/cpu0/cpufreq/scaling_governor)
  local cpuinfo_min_freq=$(<$syscpu/cpu0/cpufreq/cpuinfo_min_freq)
  local cpuinfo_max_freq=$(<$syscpu/cpu0/cpufreq/cpuinfo_max_freq)
  local scaling_min_freq=$(<$syscpu/cpu0/cpufreq/scaling_min_freq)
  local scaling_max_freq=$(<$syscpu/cpu0/cpufreq/scaling_max_freq)
  if [ -w $syscpu/cpu0/cpufreq/scaling_governor ]; then
    for d in $syscpu/cpu*/cpufreq
    do
      echo "${1:-userspace}"         >"$d/scaling_governor"
      echo "${2:-$cpuinfo_max_freq}" >"$d/scaling_min_freq"
      echo "${3:-$cpuinfo_max_freq}" >"$d/scaling_max_freq"
    done
  fi
  echo $governor $scaling_min_freq $scaling_max_freq
}

# Read times in seconds from stdin, one number per line, echo each number
# using format $1, and also echo the average transfer size in MB/s, its
# standard deviation and the number of IOPS using the total I/O size $2 and
# the block transfer size $3.
echo_and_calc_avg() {
  awk -v fmt="$1" -v iosize="$2" -v blocksize="$3" 'BEGIN{pow_2_20=1024*1024}{if ($1 != 0){n++;sum+=iosize/$1;sumsq+=iosize*iosize/($1*$1)};printf fmt, $1} END{d=(n>0?sumsq/n-sum*sum/n/n:0);avg=(n>0?sum/n:0);stddev=(d>0?sqrt(d):0);iops=avg/blocksize;printf fmt fmt fmt,avg/pow_2_20,stddev/pow_2_20,iops}'
}

time_write() {
  if [ "${use_fio}" = "true" ]; then
    if [ "${iotype}" = "direct" ]; then
      fio_flags="--direct=1"
    else
      fio_flags="--direct=0 --end_fsync=1"
    fi
    fio  --rw=write --filename="${device}" --bs=$1 --size=$(($1*$2)) --ioengine=psync --end_fsync=1 --invalidate=1 ${fio_flags} --name=writeperftest \
    | sed -n 's/.*runt= *\([0-9]*\)msec.*/\1/p' \
    | awk '{print $1/1000}'
  else
    drop_caches
    if [ "${iotype}" = "direct" ]; then
      dd_oflags="oflag=direct conv=notrunc"
    else
      dd_oflags=""
    fi
    { dd if=/dev/zero of="${device}" bs=$1 count=$2 ${dd_oflags} 2>&1; sync; } \
      | sed -n -e 's/.* \([0-9.]*\) s[econds]*,.*/\1/p' | sed 's/^$/0/'
  fi
}

time_read() {
  if [ "${use_fio}" = "true" ]; then
    if [ "${iotype}" = "direct" ]; then
      fio_flags="--direct=1"
    else
      fio_flags="--direct=0"
    fi
    fio  --rw=read --filename="${device}" --bs=$1 --size=$(($1*$2)) --ioengine=psync --end_fsync=1 --invalidate=1 ${fio_flags} --name=readperftest \
    | sed -n 's/.*runt= *\([0-9]*\)msec.*/\1/p' \
    | awk '{print $1/1000}'
  else
    drop_caches
    if [ "${iotype}" = "direct" ]; then
      dd_iflags="iflag=direct"
    else
      dd_iflags=""
    fi
    dd if="${device}" of=/dev/null bs=$1 count=$2 ${dd_iflags} 2>&1 \
      | sed -n -e 's/.* \([0-9.]*\) s[econds]*,.*/\1/p' | sed 's/^$/0/'
  fi
}


#########################
# Default settings      #
#########################

force=false
iterations=3
log2_io_size=30       # 1 GB
log2_min_blocksize=9  # 512 bytes
log2_max_blocksize=26 # 64 MB
iotype=direct
perform_read_test=true
perform_write_test=true
target_login=""
use_fio=false
verify_device_data=true


#########################
# Argument processing   #
#########################

set -- $(/usr/bin/getopt "adfhi:jm:M:nrs:t:w" "$@")
while [ "$1" != "${1#-}" ]
do
  case "$1" in
    '-a') iotype="buffered"; shift;;
    '-d') iotype="direct"; shift;;
    '-f') force="true"; shift;;
    '-i') iterations="$2"; shift; shift;;
    '-j') use_fio=true; shift;;
    '-m') log2_min_blocksize="$2"; shift; shift;;
    '-M') log2_max_blocksize="$2"; shift; shift;;
    '-n') verify_device_data="false"; shift;;
    '-r') perform_write_test="false"; shift;;
    '-s') log2_io_size="$2"; shift; shift;;
    '-t') target_login="$2"; shift; shift;;
    '-w') perform_read_test="false"; shift;;
    '--') shift;;
    *)    usage; exit 1;;
  esac
done

if [ "$#" != 1 ]; then
  usage
  exit 1
fi

device="$1"


####################
# Performance test #
####################

if [ ! -e "${device}" ]; then
  echo "Error: device ${device} does not exist."
  exit 1
fi

if [ "${perform_write_test}" = "true" -a ! -w "${device}" ]; then
  echo "Error: device ${device} is not writeable."
  exit 1
fi

if [ "${perform_read_test}" = "true" -a                              \
     "$(($(</sys/class/block/$(basename $device)/size) * 512))" -lt $(pow2 $log2_io_size) ]
then
  echo "Error: device ${device} contains less than $(pow2 $log2_io_size) bytes."
  exit 1
fi

if [ "${perform_write_test}" = "true" -a "${verify_device_data}" = "true" ] \
   && [ "${force}" != "true" ] \
   && ! cmp -s -n $(pow2 $log2_io_size) "${device}" /dev/zero
then
  echo "Error: device ${device} still contains data."
  exit 1
fi

if ! can_drop_cache; then
  echo ""
  echo "WARNING: insufficient privileges to drop the file system cache"
  echo "-- results will be unreliable."
  echo ""
fi

# Disable frequency scaling
if [ -e /sys/devices/system/cpu/cpu0/cpufreq ]; then
  if [ -w /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
    frequency_scaling_params="$(set_frequency_scaling)"
  else
    echo ""
    echo "WARNING: insufficient privileges to disable CPU frequency scaling"
    echo "-- results will be unreliable."
    echo ""
  fi
fi

# Header, line 1
printf "%9s " blocksize
i=0
while [ $i -lt ${iterations} ]
do
  printf "%8s " "W"
  i=$((i+1))
done
printf "%8s %8s %8s " "W(avg," "W(std," "W"
i=0
while [ $i -lt ${iterations} ]
do
  printf "%8s " "R"
  i=$((i+1))
done
printf "%8s %8s %8s" "R(avg," "R(std," "R"
printf "\n"

# Header, line 2
printf "%9s " "(bytes)"
i=0
while [ $i -lt ${iterations} ]
do
  printf "%8s " "(s)"
  i=$((i+1))
done
printf "%8s %8s %8s " "MB/s)" "MB/s)" "(IOPS)"
i=0
while [ $i -lt ${iterations} ]
do
  printf "%8s " "(s)"
  i=$((i+1))
done
printf "%8s %8s %8s" "MB/s)" "MB/s)" "(IOPS)"
printf "\n"

# Measurements
log2_blocksize=${log2_max_blocksize}
while [ ! $log2_blocksize -lt $log2_min_blocksize ]
do
  if [ $log2_blocksize -gt $log2_io_size ]; then
    log2_blocksize=$((log2_blocksize - 1))
    continue
  fi
  iosize=$(pow2 $log2_io_size)
  bs=$(pow2 $log2_blocksize)
  count=$(pow2 $(($log2_io_size - $log2_blocksize)))
  printf "%9d " ${bs}
  i=0
  while [ $i -lt ${iterations} ]
  do
    if [ "${perform_write_test}" = "true" ]; then
      time_write ${bs} ${count}
    else
      echo " 0 s,"
    fi
    i=$((i+1))
  done | echo_and_calc_avg "%8.3f " ${iosize} ${bs}

  i=0
  while [ $i -lt ${iterations} ]
  do
    if [ "${perform_read_test}" = "true" ]; then
      time_read ${bs} ${count}
    else
      echo " 0 s,"
    fi
    i=$((i+1))
  done | echo_and_calc_avg "%8.3f " ${iosize} ${bs}
  printf "\n"
  log2_blocksize=$((log2_blocksize - 1))
done

# Restore frequency scaling
set_frequency_scaling ${frequency_scaling_params} >/dev/null
