#!/usr/bin/env bash
set -e

usage() {
    cat <<EOF >&2
USAGE: c++expr [-c CXX | -C | -I] [-i INCLUDE] [-l LIB] [-b STEPS] [-t TESTS] [-o FILE] [-O CXX_OPTS...] [-g 'GLOBAL CODE'] 'MAIN CODE'
OPTIONS:
    -c CXX          use specified c++ compiler
    -C              use cmake
    -k              keep generated worktree
    -I              integrate into ClickHouse build tree in current directory
    -i INC          add #include <INC>
    -l LIB          link against LIB (only for -I or -C)
    -b STEPS_NUM    make program to benchmark specified code snippet and run tests with STEPS_NUM each
    -b perf-top     run infinite benchmark and show perf top
    -j THREADS      Only make sense with -b, run specified number of std::thread in parallel
    -B build-dir    build directory for -I (default: "build")
    -t TESTS_NUM    make program to benchmark specified code snippet and run TESTS_NUM tests
    -o FILE         do not run, just save binary executable file
    -O CXX_OPTS     forward option compiler (e.g. -O "-O3 -std=c++20")
EXAMPLES:
  $ c++expr -g 'int fib(int n) { return n < 2 ? n : fib(n-2) + fib(n-1); }' 'OUT(fib(10)) OUT(fib(20)) OUT(fib(30))'
    fib(10) -> 55
    fib(20) -> 6765
    fib(30) -> 832040
  $ c++expr -I -i Interpreters/Context.h 'OUT(sizeof(DB::Context))'
    sizeof(DB::Context) -> 7776
  $ c++expr -I -i Common/Stopwatch.h -b 10000 'Stopwatch sw;'
     Steps per test: 10000
     Test #0:   0.025 us         39.526 Mps
     ...
     Test #4:   0.026 us         38.314 Mps
     MainAvg:   0.026 us         38.373 Mps
EOF
    exit 1
}

SOURCE_FILE=main.cpp
GLOBAL=
OUTPUT_EXECUTABLE=
INCS="vector iostream iomanip typeinfo cstdlib cmath sys/time.h"
LIBS=""
BUILD_DIR=build
BENCHMARK_STEPS=0
THREADS=1
RUN_PERFTOP=
BENCHMARK_TESTS=5
USE_CMAKE=
USE_CLICKHOUSE=
CXX=g++
CXX_OPTS=
CMD_PARAMS=
KEEP_WORKTREE=0

#
# Parse command line
#

if [ "$1" == "--help" ] || [ -z "$1" ]; then usage; fi
while getopts "vc:CIi:l:b:j:kB:t:o:O:g:" OPT; do
    case "$OPT" in
    v)      set -x; ;;
    c)      CXX="$OPTARG"; ;;
    C)      USE_CMAKE=y; ;;
    I)      USE_CLICKHOUSE=y; LIBS="$LIBS clickhouse_common_io"; ;;
    i)      INCS="$INCS $OPTARG"; ;;
    l)      LIBS="$LIBS $OPTARG"; ;;
    b)      if [ "$OPTARG" = perf-top ]; then BENCHMARK_STEPS=-1; RUN_PERFTOP=y; else BENCHMARK_STEPS="$OPTARG"; fi; ;;
    j)      THREADS="$OPTARG"; INCS="$INCS mutex thread barrier";;
    B)      BUILD_DIR="$OPTARG"; ;;
    k)      KEEP_WORKTREE=1; ;;
    t)      BENCHMARK_TESTS="$OPTARG"; ;;
    o)      OUTPUT_EXECUTABLE="$OPTARG"; ;;
    O)      CXX_OPTS="$CXX_OPTS $OPTARG"; ;;
    g)      GLOBAL="$OPTARG"; ;;
    esac
done
shift $(( $OPTIND - 1 ))

#
# Positional arguments
#

EXPR=$1
shift

if [ -z "$EXPR" ]; then usage; fi

#
# Arguments forwarded to program should go after main code and before --
#

while [ -n "$1" ] && [ "$1" != "--" ]; do
    CMD_PARAMS="$CMD_PARAMS $1"
    shift
done
if [ "$1" == "--" ]; then shift; fi

#
# Setup workdir
#

find_clickhouse_root () {
    local DIR="`pwd`"
    while [ $DIR != "/" ]; do
        if [ ! -e "$DIR/CMakeLists.txt" ]; then
            echo "error: $DIR has no CMakeLists.txt"
            return 1
        fi
        if grep "project(ClickHouse" "$DIR/CMakeLists.txt" >/dev/null 2>&1; then
            echo $DIR
            return 0
        fi
        DIR="`dirname $DIR`"
    done
    echo "error: unable to find Clickhouse root folder"
    return 1
}

find_clickhouse_build () {
    local CLICKHOUSE_ROOT="`find_clickhouse_root`"
    if [ -e "$CLICKHOUSE_ROOT/$BUILD_DIR/CMakeCache.txt" ]; then
        echo "$CLICKHOUSE_ROOT/$BUILD_DIR"
        return 0
    fi
    echo "error: $CLICKHOUSE_ROOT/$BUILD_DIR/CMakeCache.txt doesn't exist"
    return 1
}

CALL_DIR=`pwd`
EXECUTABLE=cppexpr_$$
EXECUTABLE_DIR=.

if [ -n "$USE_CLICKHOUSE" ]; then
    SUBDIR=cppexpr_$$
    WORKDIR=$CALL_DIR/$SUBDIR
    if [ ! -e $CALL_DIR/CMakeLists.txt ]; then
        echo "error: $CALL_DIR/CMakeLists.txt is required for integration" >&2
        exit 1
    fi

    CLICKHOUSE_ROOT="`find_clickhouse_root`"
    BUILD_ROOT="`find_clickhouse_build`"
    CLICKHOUSE_PATH="${WORKDIR/$CLICKHOUSE_ROOT}"
    EXECUTABLE_DIR="${BUILD_ROOT}${CLICKHOUSE_PATH}"

    if [ -z "$CLICKHOUSE_ROOT" ] || [ -z "$BUILD_ROOT" ] || [ -z "$CLICKHOUSE_PATH" ]; then
        echo "error: unable to locate ClickHouse" >&2
        exit 1
    fi

    cp $CALL_DIR/CMakeLists.txt $CALL_DIR/CMakeLists.txt.backup.$$
    echo "add_subdirectory ($SUBDIR)" >>$CALL_DIR/CMakeLists.txt
    cleanup() {
        mv $CALL_DIR/CMakeLists.txt.backup.$$ $CALL_DIR/CMakeLists.txt
        if [ $KEEP_WORKTREE -eq 0 ]; then
            rm -rf $WORKDIR
            rm -rf ${BUILD_ROOT}${CLICKHOUSE_PATH}
        fi
    }
else
    WORKDIR=/var/tmp/cppexpr_$$
    cleanup() {
        if [ $KEEP_WORKTREE -eq 0 ]; then
            rm -rf $WORKDIR
        fi
    }
fi

mkdir -p $WORKDIR
cd $WORKDIR

#
# Generate CMakeLists.txt
#
if [ -n "$USE_CMAKE" ]; then
    cat <<EOF >>CMakeLists.txt
project(CppExpr)
SET(PROJECT_NAME CppExpr)
SET(CMAKE_INCLUDE_CURRENT_DIR TRUE)
cmake_minimum_required(VERSION 2.8)
set(CMAKE_CXX_FLAGS -fPIC)
set(CMAKE_C_FLAGS -fPIC)
set(CMAKE_BUILD_TYPE Release)
set(SOURCES $SOURCE_FILE)
add_executable($EXECUTABLE \${SOURCES})
EOF
fi

#
# Generate CMakeLists.txt for integration
#
if [ -n "$USE_CLICKHOUSE" ]; then
    cat <<EOF >>CMakeLists.txt
add_executable($EXECUTABLE $SOURCE_FILE)
EOF
fi

#
# Add libraries to CMakeLists.txt
#
if [ -n "$LIBS" ]; then
    cat <<EOF >>CMakeLists.txt
target_link_libraries($EXECUTABLE PRIVATE $LIBS)
EOF
fi

#
# Generate source code
#
>$SOURCE_FILE
for INC in $INCS; do
    echo "#include <$INC>" >> $SOURCE_FILE
done
cat <<EOF >>$SOURCE_FILE

#define RESULT(label, us, mps) label << ": " << std::fixed << std::setw(7) << std::setfill(' ') << std::setprecision(3) << (us) << " us\t" << std::setw(7) << std::setfill(' ') << std::setprecision(3) << (mps) << " Mps"
#define OUT(expr) std::cout << #expr << " -> " << (expr) << std::endl;
#if $THREADS == 1
#define SYNC()
#define LOG(msg) std::cout << "     " << msg << std::endl
#define LOG_MAIN(msg) LOG(msg)
#define AVERAGE "MainAvg"
#else
std::barrier barrier($THREADS + 1);
std::mutex log_mutex;
#define SYNC() barrier.arrive_and_wait()
#define LOG(msg) do { std::scoped_lock lock{log_mutex}; std::cout << "\033[01;3" << (1 + thread_id % 8) << "m[" << std::setw(2) << std::setfill('0') << thread_id << "] " << msg << "\033[00m" << std::endl; } while (false)
#define LOG_MAIN(msg) do { std::scoped_lock lock{log_mutex}; std::cout << "     " << msg << std::endl; } while (false)
#define AVERAGE "Average"
#endif
size_t max_tests = $BENCHMARK_TESTS;
size_t max_steps = $BENCHMARK_STEPS;

$GLOBAL
int work(int thread_id = 0) {
  (void)thread_id;
  try {
EOF

if [ $BENCHMARK_STEPS -eq 0 ]; then
    cat <<EOF >>$SOURCE_FILE
    $EXPR
EOF
else
    cat <<EOF >>$SOURCE_FILE
    double total = 0.0;
    for (size_t test = 0; test < max_tests; test++) {
      SYNC(); // sync with all threads before test
      timeval beg, end;
      gettimeofday(&beg, nullptr);
      for (size_t step = 0; step < max_steps; step++) {
        asm volatile("" ::: "memory");
        $EXPR
      }
      gettimeofday(&end, nullptr);
      SYNC(); // sync with all threads after test
      double interval = (end.tv_sec - beg.tv_sec)*1e6 + (end.tv_usec - beg.tv_usec);
      LOG(RESULT("Test #" << test, interval / max_steps, max_steps / interval));
      total += interval;
    }
    SYNC(); // sync for avg printing
    LOG(RESULT(AVERAGE, total / max_tests / max_steps, max_steps / (total / max_tests)));
    SYNC(); // sync for avg printing
EOF
fi

cat <<EOF >>$SOURCE_FILE
    return 0;
  } catch (std::exception& e) {
    std::cerr << "unhandled exception (" << typeid(e).name() << "):" << e.what() << std::endl;
  } catch (...) {
    std::cerr << "unknown unhandled exception\n";
  }
  return 1;
}
int main(int argc, char** argv) {
  (void)argc; (void)argv;
  if (max_steps == 0)
    max_steps = 1;
  else
    LOG_MAIN("Steps per test: " << max_steps);
EOF
if [ $THREADS -gt 1 ] && [ $BENCHMARK_STEPS -ne 0 ]; then
    cat <<EOF >>$SOURCE_FILE
  LOG_MAIN("Threads: " << $THREADS);
  std::vector<std::thread> threads;
  for (size_t i = 0; i < $THREADS; ++i)
    threads.emplace_back(work, i);
  timeval main_beg, main_end;
  double main_total = 0.0;
  for (size_t test = 0; test < max_tests; test++) {
    gettimeofday(&main_beg, nullptr);
    SYNC(); // sync with all threads before test
    SYNC(); // sync with all threads after test
    gettimeofday(&main_end, nullptr);
    double interval = (main_end.tv_sec - main_beg.tv_sec)*1e6 + (main_end.tv_usec - main_beg.tv_usec);
    double per_step = interval / (max_steps * $THREADS);
    double mps = max_steps * $THREADS / interval;
    LOG_MAIN(RESULT("Test #" << test, per_step, mps));
    main_total += interval;
  }
  double avg = main_total / (max_tests * max_steps * $THREADS);
  double mps = max_tests * max_steps * $THREADS / main_total;
  SYNC(); // sync for avg printing
  SYNC(); // sync for avg printing
  LOG_MAIN(RESULT("MainAvg", avg, mps));
  for (auto& thread : threads)
    thread.join();
  return 0;
EOF
else
cat <<EOF >>$SOURCE_FILE
  return work();
EOF
fi
cat <<EOF >>$SOURCE_FILE
}
#ifdef OUT // just to avoid unused macro warning
#undef OUT
#endif
#undef LOG
#undef LOG_MAIN
#ifdef SYNC
#undef SYNC
#endif
#ifdef AVERAGE
#undef AVERAGE
#endif
#ifdef RESULT
#undef RESULT
#endif
EOF

#
# Compile
#
if [ -n "$USE_CMAKE" ]; then
    if ! (cmake . && make); then
        cat -n $SOURCE_FILE
        cleanup
        exit 1
    fi
elif [ -n "$USE_CLICKHOUSE" ]; then
    if ! (cd $BUILD_ROOT && ninja $EXECUTABLE) >stdout.log 2>stderr.log; then
        cat stdout.log
        cat stderr.log >&2
        cat -n $SOURCE_FILE
        cleanup
        exit 1
    fi
else
    RET=0
    $CXX $CXX_OPTS -I$CALL_DIR -o $EXECUTABLE $SOURCE_FILE || RET=$?
    if [ $RET -ne 0 ]; then
        cat -n $SOURCE_FILE
	cleanup
        exit $RET
    fi
fi

#
# Execute
#
RET=0
if [ -z "$OUTPUT_EXECUTABLE" ]; then
    if [ -z "$RUN_PERFTOP" ]; then
        "$@" $EXECUTABLE_DIR/$EXECUTABLE $CMD_PARAMS || RET=$?
    else
        "$@" $EXECUTABLE_DIR/$EXECUTABLE $CMD_PARAMS &
        PID=$!
        perf top -p $PID
        kill $PID
    fi
else
    cp $EXECUTABLE_DIR/$EXECUTABLE $CALL_DIR/$OUTPUT_EXECUTABLE
fi

#
# Cleanup
#
cleanup
echo "Exit code: $RET"
exit $RET
